From 694e90a3214f09c3474182537294da5a495ee0fc Mon Sep 17 00:00:00 2001 From: catavallejos Date: Thu, 5 Oct 2017 21:37:46 +0100 Subject: [PATCH] Revert "Merge branch 'Devel' into master" This reverts commit af2672a978c5f64f1ba5e5cb38b792a93382a64d, reversing changes made to 1d7711d3facc79aaff3b68b4974fbb3bc71b3c4a. --- NEWS | 19 -- R/BASiCS_MCMC.R | 73 +++-- R/BASiCS_TestDE.R | 1 - R/RcppExports.R | 4 +- R/makeExampleBASiCS_Data.R | 29 +- R/newBASiCS_Data.R | 59 ++-- man/newBASiCS_Data.Rd | 2 +- src/BASiCS_CPPcode.cpp | 557 ++++++++++++++++++++-------------- src/RcppExports.cpp | 27 +- vignettes/BASiCS_nospikes.Rmd | 151 --------- 10 files changed, 433 insertions(+), 489 deletions(-) delete mode 100644 vignettes/BASiCS_nospikes.Rmd diff --git a/NEWS b/NEWS index 78d7fcb4..a8dfbe4c 100644 --- a/NEWS +++ b/NEWS @@ -595,25 +595,6 @@ version 0.99.5: (2017-09-26) version 0.99.6: (2017-10-05) - After @nturaga review - `inst` folder has been deleted - Removal of `...` arg in `segments` - `plot` method for `BASiCS_Summary` - -No-spikes implementation -- `makeExampleBASiCS_Data` + `newBASiCS_Data` adapted to no-spikes case -- Temporary vignette file for no-spikes case has been created -- `HiddenBASiCS_MCMCcppNoSpikes` updated to recycle vertical integration -update functions -- Fixed bug to allow use of stochastic reference -- Fixed bug in argument order for no-spikes version -- Spikes version code clean-up -Summary of constrain types -- ConstrainType: full or subset of genes - * For the second case, different filter thresholds have been proposed -- StochasticRef or no -- Idea: - * Try StochasticRef with no filter - * Try StochasticRef with filter > 0 total counts - -TO-DO -- Implement Giacomo's suggestion? - DEVELOPMENT NOTES (potential future changes): * Delete BASiCS_VarianceDecomp? diff --git a/R/BASiCS_MCMC.R b/R/BASiCS_MCMC.R index 8b8d4e2e..e99815dc 100644 --- a/R/BASiCS_MCMC.R +++ b/R/BASiCS_MCMC.R @@ -343,28 +343,59 @@ BASiCS_MCMC <- function(Data, N, Thin, Burn, ...) { # 1: Full constrain; 2: Non-zero genes only ConstrainType <- ifelse("ConstrainType" %in% names(args), args$ConstrainType, 2) - StochasticRef <- ifelse("StochasticRef" %in% names(args), - args$StochasticRef, TRUE) + ConstrainLimit <- ifelse("ConstrainLimit" %in% names(args), + args$ConstrainLimit, 1) + ConstrainAlpha <- ifelse("ConstrainAlpha" %in% names(args), + args$ConstrainAlpha, 0.05) + ConstrainProb <- ifelse("ConstrainProb" %in% names(args), + args$ConstrainProb, 0.95) + BatchDesign <- model.matrix(~as.factor(metadata(Data)$BatchInfo) - 1) + BatchSizes <- table(metadata(Data)$BatchInfo) + BatchIds <- as.numeric(names(BatchSizes)) + BatchOffSet <- rep(1, times = nBatch) + for (k in 2:nBatch) + { + aux1 <- matrixStats::colSums2(assay(Data)[, + metadata(Data)$BatchInfo == BatchIds[k]]) + aux2 <- colSums(assay(Data)[, metadata(Data)$BatchInfo == BatchIds[1]]) + BatchOffSet[k] <- median(aux1) / median(aux2) + } + # Auxiliary vector contaning a gene index Index <- (1:q.bio) - 1 # In the following '+1' is used as c++ vector indexes vectors setting # '0' as its first element Constrain for gene-specific expression rates if (ConstrainType == 1) { - # Full constrain + # Full constrain Note we use 'ConstrainLimit + 1' as 1 + # pseudo-count was added + # when computing 'mu0' (to avoid numerical issues) ConstrainGene <- (1:q.bio) - 1 NotConstrainGene <- 0 Constrain <- mean(log(mu0[ConstrainGene + 1])) } if (ConstrainType == 2) { - # Trimmed constrain based on total counts > 0 - ConstrainGene <- which(sum.bycell.bio > 0) - 1 - NotConstrainGene <- which(sum.bycell.bio == 0) - 1 + # Trimmed constrain based on mean Note we use 'ConstrainLimit + 1' + # as 1 pseudo-count + # was added when computing 'mu0' (to avoid numerical issues) + ConstrainGene <- which(mu0 >= ConstrainLimit + 1) - 1 + NotConstrainGene <- which(mu0 < ConstrainLimit + 1) - 1 Constrain <- mean(log(mu0[ConstrainGene + 1])) } - + if (ConstrainType == 3) + { + # Trimmed constrain based on detection + Detection <- rowMeans(assay(Data) > 0) + ConstrainGene <- which(Detection >= ConstrainLimit) - 1 + NotConstrainGene <- which(Detection < ConstrainLimit) - 1 + Constrain <- mean(log(mu0[ConstrainGene + 1])) + } + + StochasticRef <- ifelse("StochasticRef" %in% names(args), + args$StochasticRef, FALSE) + if (StochasticRef == TRUE) { aux.ref <- cbind(ConstrainGene, @@ -387,40 +418,40 @@ BASiCS_MCMC <- function(Data, N, Thin, Burn, ...) { as.matrix(assay(Data)), BatchDesign, mu0, delta0, - phi0, nu0, - rep(theta0, nBatch), + phi0, nu0, theta0, PriorParam$s2.mu, PriorParam$a.delta, PriorParam$b.delta, - PriorParam$s2.delta, - PriorDeltaNum, - PriorParam$a.s, - PriorParam$b.s, + PriorParam$a.phi, + PriorParam$b.phi, PriorParam$a.theta, PriorParam$b.theta, AR, ls.mu0, ls.delta0, - ls.nu0, - rep(ls.theta0, nBatch), + ls.nu0, ls.theta0, sum.bycell.all, sum.bygene.all, StoreAdaptNumber, StopAdapt, as.numeric(PrintProgress), - Constrain, + PriorParam$s2.delta, + PriorDeltaNum, + metadata(Data)$BatchInfo, + BatchIds, + as.vector(BatchSizes), + BatchOffSet, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, - ConstrainType, - as.numeric(StochasticRef))) + ConstrainType)) } Chain$mu <- Chain$mu[, 1:q.bio] colnames(Chain$mu) <- rownames(assay(Data))[!isSpike(Data)] colnames(Chain$delta) <- rownames(assay(Data))[!isSpike(Data)] CellLabels <- paste0("Cell", 1:n, "_Batch", metadata(Data)$BatchInfo) - colnames(Chain$s) <- CellLabels - if (length(metadata(Data)$SpikeInput) > 1) { colnames(Chain$phi) <- CellLabels } + colnames(Chain$phi) <- CellLabels + if (length(metadata(Data)$SpikeInput) > 1) { colnames(Chain$s) <- CellLabels } colnames(Chain$nu) <- CellLabels colnames(Chain$theta) <- paste0("Batch", unique(metadata(Data)$BatchInfo)) @@ -437,7 +468,7 @@ BASiCS_MCMC <- function(Data, N, Thin, Burn, ...) { if (length(metadata(Data)$SpikeInput) == 1) { - Chain$phi <- matrix(1, ncol = ncol(Chain$s), nrow = nrow(Chain$s)) + Chain$s <- matrix(1, ncol = ncol(Chain$phi), nrow = nrow(Chain$phi)) } ChainClass <- newBASiCS_Chain(mu = Chain$mu, delta = Chain$delta, diff --git a/R/BASiCS_TestDE.R b/R/BASiCS_TestDE.R index bffb7e9e..b7f7bf39 100644 --- a/R/BASiCS_TestDE.R +++ b/R/BASiCS_TestDE.R @@ -157,7 +157,6 @@ BASiCS_TestDE <- function(Chain1, GenesSelect = NULL, ...) { # Checking validity of input arguments - # Test if (!is(Chain1, "BASiCS_Chain")) stop("'Chain1' is not a 'BASiCS_Chain' class object.") if (!is(Chain2, "BASiCS_Chain")) diff --git a/R/RcppExports.R b/R/RcppExports.R index cb824ffd..86cf8f98 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -9,7 +9,7 @@ HiddenBASiCS_DenoisedRates <- function(CountsBio, Mu, TransInvDelta, PhiNu, N, q .Call('_BASiCS_HiddenBASiCS_DenoisedRates', PACKAGE = 'BASiCS', CountsBio, Mu, TransInvDelta, PhiNu, N, q0, n) } -HiddenBASiCS_MCMCcppNoSpikes <- function(N, Thin, Burn, Counts, BatchDesign, mu0, delta0, s0, nu0, theta0, s2mu, adelta, bdelta, s2delta, prior_delta, as, bs, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType, StochasticRef) { - .Call('_BASiCS_HiddenBASiCS_MCMCcppNoSpikes', PACKAGE = 'BASiCS', N, Thin, Burn, Counts, BatchDesign, mu0, delta0, s0, nu0, theta0, s2mu, adelta, bdelta, s2delta, prior_delta, as, bs, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType, StochasticRef) +HiddenBASiCS_MCMCcppNoSpikes <- function(N, Thin, Burn, Counts, BatchDesign, mu0, delta0, phi0, nu0, theta0, s2mu, adelta, bdelta, aphi, bphi, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, s2delta, prior_delta, BatchInfo, BatchIds, BatchSizes, BatchOffSet, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType) { + .Call('_BASiCS_HiddenBASiCS_MCMCcppNoSpikes', PACKAGE = 'BASiCS', N, Thin, Burn, Counts, BatchDesign, mu0, delta0, phi0, nu0, theta0, s2mu, adelta, bdelta, aphi, bphi, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, s2delta, prior_delta, BatchInfo, BatchIds, BatchSizes, BatchOffSet, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType) } diff --git a/R/makeExampleBASiCS_Data.R b/R/makeExampleBASiCS_Data.R index c341d6aa..3ce84a65 100644 --- a/R/makeExampleBASiCS_Data.R +++ b/R/makeExampleBASiCS_Data.R @@ -34,17 +34,17 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE, WithSpikes = TRUE) { Mu <- c( 8.36, 10.65, 4.88, 6.29, 21.72, 12.93, 30.19, 83.92, 3.89, 6.34, - 57.87, 12.45, 8.08, 7.31, 15.56, 15.91, 12.24, 15.96, 19.14, 4.20, + 57.87, 12.45, 8.08, 7.31, 15.56, 15.91, 12.24, 15.96, 19.14, 4.20, 6.75, 27.74, 8.88, 21.21, 19.89, 7.14, 11.09, 7.19, 20.64, 73.9, 9.05, 6.13, 16.40, 6.08, 17.89, 6.98, 10.25, 14.05, 8.14, 5.67, 6.95, 11.16, 11.83, 7.56,159.05, 16.41, 4.58, 15.46, 10.96, 25.05, - 18.54, 8.5, 4.05, 5.37, 4.63, 4.08, 3.75, 5.02, 27.74, 10.28, + 18.54, 8.5, 4.05, 5.37, 4.63, 4.08, 3.75, 5.02, 27.74, 10.28, 3.91, 13.10, 8.23, 3.64, 80.77, 20.36, 3.47, 20.12, 13.29, 7.92, - 25.51,173.01, 27.71, 4.89, 33.10, 3.42, 6.19, 4.29, 5.19, 8.36, - 10.27, 6.86, 5.72, 37.25, 3.82, 23.97, 5.80, 14.30, 29.07, 5.30, + 25.51,173.01, 27.71, 4.89, 33.10, 3.42, 6.19, 4.29, 5.19, 8.36, + 10.27, 6.86, 5.72, 37.25, 3.82, 23.97, 5.80, 14.30, 29.07, 5.30, 7.47, 8.44, 4.24, 16.15, 23.39,120.22, 8.92, 97.15, 9.75, 10.07, - 1010.72, 7.90, 31.59, 63.17, 1.97,252.68, 31.59, 31.59, 31.59, 63.17, - 4042.89,4042.89, 3.95,126.34,252.68, 7.90, 15.79,126.34, 3.95, 15.79) + 1010.72, 7.90, 31.59, 63.17, 1.97,252.68, 31.59, 31.59, 31.59, 63.17, + 4042.89,4042.89, 3.95,126.34,252.68, 7.90, 15.79,126.34, 3.95, 15.79) Delta <- c(1.29, 0.88, 1.51, 1.49, 0.54, 0.40, 0.85, 0.27, 0.53, 1.31, 0.26, 0.81, 0.72, 0.70, 0.96, 0.58, 1.15, 0.82, 0.25, 5.32, 1.13, 0.31, 0.66, 0.27, 0.76, 1.39, 1.18, 1.57, 0.55, 0.17, @@ -59,14 +59,14 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE, 1.09, 1.16, 1.19, 1.14, 0.87, 1.10, 0.48, 1.06, 0.94, 0.97) S <- c(0.38, 0.40, 0.38, 0.39, 0.34, 0.39, 0.31, 0.39, 0.40, 0.37, 0.38, 0.40, 0.38, 0.39, 0.34, 0.39, 0.31, 0.39, 0.40, 0.37) - + Mu <- c(Mu[1:50], Mu[101:120]) Delta <- Delta[1:50] - + q <- length(Mu) q.bio <- length(Delta) n <- length(Phi) - + if (!WithBatch) { Theta <- 0.5 } else { @@ -75,7 +75,7 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE, Theta2 <- 0.75 Theta <- ifelse(seq_len(n) <= 10, Theta1, Theta2) } - + if (WithSpikes) { # Matrix where simulated counts will be stored @@ -115,7 +115,7 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE, rownames(Counts.sim) <- paste0("Gene", seq_len(q)) SpikeInfo <- data.frame(paste0("Gene", seq(q.bio+1, q)), Mu[seq(q.bio+1, q)]) - + if (!WithBatch) { Data <- newBASiCS_Data(Counts = Counts.sim, @@ -160,11 +160,12 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE, Counts.sim[i, ] <- rpois(n, lambda = Nu * Rho[i, ] * Mu[i]) } rownames(Counts.sim) <- paste0("Gene", seq_len(q.bio)) - + Data <- newBASiCS_Data(Counts = Counts.sim, Tech = rep(FALSE, q.bio), + SpikeInfo = 1, BatchInfo = c(rep(1, 10), rep(2, 10))) - + } return(Data) -} \ No newline at end of file +} diff --git a/R/newBASiCS_Data.R b/R/newBASiCS_Data.R index f5a4d16a..1318805c 100644 --- a/R/newBASiCS_Data.R +++ b/R/newBASiCS_Data.R @@ -48,65 +48,54 @@ #' @references #' Vallejos, Marioni and Richardson (2015). PLoS Computational Biology. #' -newBASiCS_Data <- function(Counts, - Tech, - SpikeInfo = NULL, - BatchInfo = NULL) +newBASiCS_Data <- function(Counts, Tech, SpikeInfo, BatchInfo = NULL) { # Validity checks for SpikeInfo - if(!is.null(SpikeInfo)) - { - if (!is.data.frame(SpikeInfo)) + if (!is.data.frame(SpikeInfo)) stop("'SpikeInfo' must be a 'data.frame'") - if (data.table::is.data.table(SpikeInfo)) - stop("'SpikeInfo' must be a 'data.frame'") - } - else - { - message("Spikes information is not provided.\n", - "Only the horizontal integration method can be used.") - } - + if (data.table::is.data.table(SpikeInfo)) + stop("'SpikeInfo' must be a 'data.frame'") + if (is.null(BatchInfo)) { BatchInfo <- rep(1, times = ncol(Counts)) } - + # Re-ordering genes Counts <- as.matrix(rbind(Counts[!Tech, ], Counts[Tech, ])) Tech <- c(Tech[!Tech], Tech[Tech]) GeneName <- rownames(Counts) - + if (!is.null(SpikeInfo)) { # Extracting spike-in input molecules in the correct order if (sum(!(GeneName[Tech] %in% SpikeInfo[, 1])) > 0) - stop("'SpikeInfo' is missing information for some of the spikes") + stop("'SpikeInfo' is missing information for some of the spikes") if (sum(!(SpikeInfo[, 1] %in% GeneName[Tech])) > 0) - stop("'SpikeInfo' includes spikes that are not in 'Counts'") + stop("'SpikeInfo' includes spikes that are not in 'Counts'") matching <- match(GeneName[Tech], SpikeInfo[, 1]) SpikeInput <- SpikeInfo[matching, 2] } else { SpikeInput <- 1 } - + # Checks to assess if the data contains the required information errors <- HiddenChecksBASiCS_Data(Counts, Tech, SpikeInput, GeneName, BatchInfo) if (length(errors) > 0) stop(errors) - + # Create a SingleCellExperiment data object Data <- SingleCellExperiment::SingleCellExperiment( - assays = list(Counts = as.matrix(Counts)), - metadata = list(SpikeInput = SpikeInput, - BatchInfo = BatchInfo)) + assays = list(Counts = as.matrix(Counts)), + metadata = list(SpikeInput = SpikeInput, + BatchInfo = BatchInfo)) isSpike(Data, "ERCC") <- Tech - + message("\n", "NOTICE: BASiCS requires a pre-filtered dataset \n", - " - You must remove poor quality cells before hand \n", - " - We recommend to pre-filter lowly expressed transcripts. \n", - " Inclusion criteria may vary for each data. \n", - " For example, remove transcripts: \n", - " - with low total counts across of all of the cells \n", - " - that are only expressed in a few cells \n", - " (genes expressed in only 1 cell are not accepted) \n", - "\n BASiCS_Filter can be used for this purpose. \n") + " - You must remove poor quality cells before hand \n", + " - We recommend to pre-filter lowly expressed transcripts. \n", + " Inclusion criteria may vary for each data. \n", + " For example, remove transcripts: \n", + " - with low total counts across of all of the cells \n", + " - that are only expressed in a few cells \n", + " (genes expressed in only 1 cell are not accepted) \n", + "\n BASiCS_Filter can be used for this purpose. \n") return(Data) -} \ No newline at end of file +} diff --git a/man/newBASiCS_Data.Rd b/man/newBASiCS_Data.Rd index 86b0e619..220d06e8 100644 --- a/man/newBASiCS_Data.Rd +++ b/man/newBASiCS_Data.Rd @@ -5,7 +5,7 @@ \title{Creates a SingleCellExperiment object from a matrix of expression counts and experimental information about spike-in genes} \usage{ -newBASiCS_Data(Counts, Tech, SpikeInfo = NULL, BatchInfo = NULL) +newBASiCS_Data(Counts, Tech, SpikeInfo, BatchInfo = NULL) } \arguments{ \item{Counts}{Matrix of dimensions \code{q} times \code{n} whose elements diff --git a/src/BASiCS_CPPcode.cpp b/src/BASiCS_CPPcode.cpp index c6e968cb..b057b6f8 100644 --- a/src/BASiCS_CPPcode.cpp +++ b/src/BASiCS_CPPcode.cpp @@ -1185,21 +1185,20 @@ arma::mat muUpdateNoSpikes( double const& s2_mu, int const& q0, int const& n, + arma::vec & mu, + arma::vec & ind, int const& RefGene, arma::uvec const& ConstrainGene, arma::uvec const& NotConstrainGene, - int const& ConstrainType, - arma::vec & mu1, - arma::vec & u, - arma::vec & ind) + int const& ConstrainType) { using arma::span; // PROPOSAL STEP - mu1 = exp(arma::randn(q0) % sqrt(prop_var) + log(mu0)); - u = arma::randu(q0); + arma::vec y = exp(arma::randn(q0) % sqrt(prop_var) + log(mu0)); + arma::vec u = arma::randu(q0); // INITIALIZE MU - arma::vec mu = mu0 + 1 - 1; + mu = mu0 + 1 - 1; double aux; double iAux; double sumAux = sum(log(mu0.elem(ConstrainGene))) - log(mu0(RefGene)); @@ -1208,7 +1207,7 @@ arma::mat muUpdateNoSpikes( // Step 1: Computing the likelihood contribution of the acceptance rate // Calculated in the same way for all genes, // but the reference one (no need to be sequential) - arma::vec log_aux = (log(mu1) - log(mu0)) % sum_bycell_all; + arma::vec log_aux = (log(y) - log(mu0)) % sum_bycell_all; for (int i=0; i < q0; i++) { if(i != RefGene) @@ -1216,7 +1215,7 @@ arma::mat muUpdateNoSpikes( for (int j=0; j < n; j++) { log_aux(i) -= ( Counts(i,j) + (1/delta(i)) ) * - log( ( nu(j)*mu1(i)+(1/delta(i)) ) / ( nu(j)*mu(i)+(1/delta(i)) )); + log( ( nu(j)*y(i)+(1/delta(i)) ) / ( nu(j)*mu(i)+(1/delta(i)) )); } } } @@ -1230,12 +1229,12 @@ arma::mat muUpdateNoSpikes( if(iAux != RefGene) { aux = 0.5 * (ConstrainGene.size() * Constrain - (sumAux - log(mu(iAux)))); - log_aux(iAux) -= (0.5 * 2 /s2_mu) * (pow(log(mu1(iAux)) - aux,2)); + log_aux(iAux) -= (0.5 * 2 /s2_mu) * (pow(log(y(iAux)) - aux,2)); log_aux(iAux) += (0.5 * 2 /s2_mu) * (pow(log(mu0(iAux)) - aux,2)); // ACCEPT REJECT - if((log(u(iAux)) < log_aux(iAux)) & (mu1(iAux) > 1e-3)) + if((log(u(iAux)) < log_aux(iAux)) & (y(iAux) > 1e-3)) { - ind(iAux) = 1; mu(iAux) = mu1(iAux); + ind(iAux) = 1; mu(iAux) = y(iAux); sumAux += log(mu(iAux)) - log(mu0(iAux)); } else{ind(iAux) = 0; mu(iAux) = mu0(iAux); } @@ -1252,11 +1251,11 @@ arma::mat muUpdateNoSpikes( for (int i=0; i < NotConstrainGene.size(); i++) { iAux = NotConstrainGene(i); - log_aux(iAux) -= (0.5/s2_mu) * (pow(log(mu1(iAux)),2) - pow(log(mu0(iAux)),2)); + log_aux(iAux) -= (0.5/s2_mu) * (pow(log(y(iAux)),2) - pow(log(mu0(iAux)),2)); // ACCEPT REJECT - if((log(u(iAux)) < log_aux(iAux)) & (mu1(iAux) > 1e-3)) + if((log(u(iAux)) < log_aux(iAux)) & (y(iAux) > 1e-3)) { - ind(iAux) = 1; mu(iAux) = mu1(iAux); + ind(iAux) = 1; mu(iAux) = y(iAux); } else{ind(iAux) = 0; mu(iAux) = mu0(iAux);} } @@ -1265,6 +1264,152 @@ arma::mat muUpdateNoSpikes( return join_rows(mu, ind); } +/* Metropolis-Hastings updates of delta + * Updates are implemented simulateaneously for all biological genes + */ +arma::mat deltaUpdateNoSpikes( + arma::vec const& delta0, + arma::vec const& prop_var, + arma::mat const& Counts, + arma::vec const& mu, + arma::vec const& nu, + double const& a_delta, + double const& b_delta, + int const& q0, + int const& n, + double const& s2delta, + double const& prior_delta) +{ + using arma::span; + + // CREATING VARIABLES WHERE TO STORE DRAWS + arma::vec delta = arma::zeros(q0); + arma::vec ind = arma::zeros(q0); + + // PROPOSAL STEP + arma::vec y = exp(arma::randn(q0) % sqrt(prop_var) + log(delta0)); + arma::vec u = arma::randu(q0); + + // ACCEPT/REJECT STEP + arma::vec log_aux = - n * (lgamma_cpp(1/y)-lgamma_cpp(1/delta0)); + // +1 should appear because we update log(delta) not delta. + // However, it cancels out with the prior. + log_aux -= n * ( (log(y)/y) - (log(delta0)/delta0) ); + + // Loop to replace matrix operations, through genes and cells + for (int i=0; i < q0; i++) + { + for (int j=0; j < n; j++) + { + log_aux(i) += R::lgammafn(Counts(i,j) + (1/y(i))) - R::lgammafn(Counts(i,j) + (1/delta0(i))); + log_aux(i) -= ( Counts(i,j) + (1/y(i)) ) * log( nu(j)*mu(i)+(1/y(i)) ); + log_aux(i) += ( Counts(i,j) + (1/delta0(i)) ) * log( nu(j)*mu(i)+(1/delta0(i)) ); + } + } + + // Component related to the prior + if(prior_delta == 1) {log_aux += (log(y) - log(delta0)) * a_delta - b_delta * (y - delta0);} + else {log_aux -= (0.5/s2delta) * (pow(log(y),2) - pow(log(delta0),2));} + + // CREATING OUTPUT VARIABLE & DEBUG + for (int i=0; i < q0; i++) + { + if((arma::is_finite(log_aux(i)))) + { + if((y(i) > 1e-3) & (log(u(i)) < log_aux(i))) + { + // DEBUG: Reject very small values to avoid numerical issues + if(arma::is_finite(y(i))) {ind(i) = 1; delta(i) = y(i);} + else{ind(i) = 0; delta(i) = delta0(i);} + } + else{ind(i) = 0; delta(i) = delta0(i);} + } + // DEBUG: Reject values such that acceptance rate cannot be computed (due no numerical innacuracies) + // DEBUG: Reject values such that the proposed value is not finite (due no numerical innacuracies) + else + { + ind(i) = 0; delta(i) = delta0(i); + Rcpp::Rcout << "delta0(i): " << delta0(i) << std::endl; + Rcpp::Rcout << "y(i): " << y(i) << std::endl; + Rcpp::Rcout << "mu(i): " << mu(i) << std::endl; + Rcpp::Rcout << "SomeThing went wrong when updating delta " << i+1 << std::endl; + Rcpp::warning("If this error repeats often, please consider additional filter of the input dataset or use a smaller value for s2mu."); + } + } + + // OUTPUT + return join_rows(delta, ind); +} + +/* Auxiliary function required for some of the Metropolis-Hastings updates of nu */ +arma::mat UpdateAux_nuTrick( + arma::vec const& nu, /* Auxiliary variable (function of the current and proposed value of nu) */ +arma::vec const& mu, /* Current value of $\mu=(\mu_1,...,\mu_q)'$ */ +arma::vec const& delta, /* Current value of $\delta=(\delta_1,...,\delta_{q_0})'$ */ +arma::vec const& phi) /* Current value of $\phi=(\phi_1,...,\phi_n)$' */ +{ + arma::mat x = ((phi % nu) * mu.t()).t(); + x.each_col() += 1/delta; + return x; +} + +/* Metropolis-Hastings updates of nu + * Updates are implemented simulateaneously for all cells, significantly reducing the computational burden. + */ +arma::mat nuUpdateNoSpikes( + arma::vec const& nu0, /* Current value of $\nu=(\nu_1,...,\nu_n)'$ */ + arma::vec const& prop_var, /* Current value of the proposal variances for $\nu=(\nu_1,...,\nu_n)'$ */ + arma::mat const& Counts, /* $q \times n$ matrix of expression counts (technical genes at the bottom) */ + arma::mat const& BatchDesign, + arma::vec const& mu, /* Current value of $\mu=(\mu_1,...,\mu_q)'$ */ + arma::vec const& delta, /* Current value of $\delta=(\delta_1,...,\delta_{q_0})'$ */ + arma::vec const& phi, /* Current value of $\phi=(\phi_1,...,\phi_n)$' */ + arma::vec const& theta, /* Current value of $\theta$' */ + arma::vec const& sum_bygene_all, /* Sum of expression counts by gene (all genes) */ + int const& q0, + int const& n, + arma::vec const& BatchInfo, + arma::vec const& BatchIds, + int const& nBatch, + arma::vec const& BatchSizes, + arma::vec const& BatchOffSet) +{ + using arma::span; + + // CREATING VARIABLES WHERE TO STORE DRAWS + arma::vec lognu = log(nu0); + arma::vec thetaBatch = BatchDesign * theta; + + // PROPOSAL STEP + arma::vec y = arma::randn(n) % sqrt(prop_var) + lognu; + arma::vec u = arma::randu(n); + + // ACCEPT/REJECT STEP + arma::vec log_aux = (y - lognu) % (sum_bygene_all + 1/thetaBatch); + log_aux -= nu0 % (exp(y-lognu)-1) % (1/(thetaBatch % phi)); + arma::mat m = Counts.rows(0, q0 - 1); + m.each_col() += 1 / delta; + arma::mat num = UpdateAux_nuTrick(exp(y), mu, delta, arma::ones(n)); + num /= UpdateAux_nuTrick(nu0, mu, delta, arma::ones(n)); + m %= log(num); + log_aux -= sum(m, 0).t(); + arma::umat ind = log(u) < log_aux; + // DEBUG: Reject values such that acceptance rate cannot be computed (due no numerical innacuracies) + // DEBUG: Print warning message + ind.elem(find_nonfinite(log_aux)).zeros(); + if(size(find_nonfinite(log_aux),0)>0) + { + Rcpp::Rcout << "SomeThing went wrong when updating nu" << size(find_nonfinite(log_aux),0) << std::endl; + Rcpp::stop("Please consider additional filter of the input dataset."); + } + + // CREATING OUTPUT VARIABLE + arma::vec nu = ind % exp(y) + (1 - ind) % nu0; + + // OUTPUT + return join_rows(nu, arma::conv_to::from(ind)); +} + // [[Rcpp::export]] arma::mat HiddenBASiCS_DenoisedRates( NumericMatrix CountsBio, @@ -1305,36 +1450,6 @@ arma::mat HiddenBASiCS_DenoisedRates( /* MCMC sampler - * N: Total number of MCMC draws - * Thin: Thinning period for MCMC chain - * Burn: Burning period for MCMC chain - * Counts: Matrix of expression counts - * BatchDesign: Design matrix representing batch information - * mu0: Starting value for mu - * delta0: Starting value for delta - * phi0: Starting value for phi - * nu0: Starting value for nu - * theta0: Starting value for theta - * s2mu: prior variance for mu - * adelta: prior shape for delta (when using a gamma prior) - * bdelta: prior rate for delta (when using a gamma prior) - * s2delta: prior variance for delta (when using a log-normal prior) - * prior_delta: whether gamma or log-normal prior is used for delta - * aphi: Shape hyper-parameter for phi - * bphi: Rate hyper-parameter for phi - * atheta: prior shape for theta - * btheta: prior rate for theta - * ar: Optimal acceptance rate for adaptive Metropolis-Hastings updates - * LSmu0: Starting value of adaptive proposal variance of mu (log-scale) - * LSdelta0: Starting value of adaptive proposal variance of delta (log-scale) - * LSnu0: Starting value of adaptive proposal variance of nu (log-scale) - * LStheta0: Starting value of adaptive proposal variance of theta (log-scale) - * sumByCellAll: Sum of expression counts by cell (all genes) - * sumByGeneAll: Sum of expression counts by gene (all genes) - * StoreAdapt: whether to store adaptive variances - * EndAdapt: when to stop the adaptation - * PrintProgress: whether to print progress report - * ADD ADDITIONAL ARGS HERE! */ // [[Rcpp::export]] Rcpp::List HiddenBASiCS_MCMCcppNoSpikes( @@ -1345,67 +1460,65 @@ Rcpp::List HiddenBASiCS_MCMCcppNoSpikes( NumericMatrix BatchDesign, // Design matrix representing batch information (number of columns must be equal to number of batches) NumericVector mu0, // Starting value of $\mu=(\mu_1,...,\mu_q_0)'$ (true mRNA content for technical genes) NumericVector delta0, // Starting value of $\delta=(\delta_1,...,\delta_{q_0})'$ - NumericVector s0, // Starting value of $\phi=(\phi_1,...,\phi_n)$'$ + NumericVector phi0, // Starting value of $\phi=(\phi_1,...,\phi_n)$'$ NumericVector nu0, // Starting value of $\nu=(\nu_1,...,\nu_n)$'$ - NumericVector theta0, // Starting value of $\theta$ + double theta0, // Starting value of $\theta$ double s2mu, double adelta, // Shape hyper-parameter of the Gamma($a_{\delta}$,$b_{\delta}$) prior assigned to each $\delta_i$ double bdelta, // Rate hyper-parameter of the Gamma($a_{\delta}$,$b_{\delta}$) prior assigned to each $\delta_i$ - double s2delta, - double prior_delta, - double as, - double bs, + double aphi, + double bphi, double atheta, // Shape hyper-parameter of the Gamma($a_{\theta}$,$b_{\theta}$) prior assigned to $\theta$ double btheta, // Rate hyper-parameter of the Gamma($a_{\theta}$,$b_{\theta}$) prior assigned to $\theta$ double ar, // Optimal acceptance rate for adaptive Metropolis-Hastings updates NumericVector LSmu0, // Starting value of adaptive proposal variance of $\mu=(\mu_1,...,\mu_q_0)'$ (log-scale) NumericVector LSdelta0, // Starting value of adaptive proposal variance of $\delta=(\delta_1,...,\delta_{q_0})'$ (log-scale) NumericVector LSnu0, // Starting value of adaptive proposal variance of $\nu=(\nu_1,...,\nu_n)'$ (log-scale) - NumericVector LStheta0, // Starting value of adaptive proposal variance of $\theta$ (log-scale) + double LStheta0, // Starting value of adaptive proposal variance of $\theta$ (log-scale) NumericVector sumByCellAll, // Sum of expression counts by cell (all genes) NumericVector sumByGeneAll, // Sum of expression counts by gene (all genes) int StoreAdapt, int EndAdapt, int PrintProgress, + double s2delta, + double prior_delta, + NumericVector BatchInfo, + NumericVector BatchIds, + NumericVector BatchSizes, + NumericVector BatchOffSet, double Constrain, NumericVector Index, int RefGene, NumericVector RefGenes, NumericVector ConstrainGene, NumericVector NotConstrainGene, - int ConstrainType, - int StochasticRef) + int ConstrainType) { - - using arma::ones; - using arma::zeros; - using Rcpp::Rcout; - // NUMBER OF CELLS, GENES AND STORED DRAWS - int n = Counts.ncol(); + int n = Counts.ncol(); int q0 = Counts.nrow(); int Naux = N/Thin - Burn/Thin; int nBatch = BatchDesign.ncol(); - int q0 = delta0.size(); - int Naux = N/Thin - Burn/Thin; // TRANSFORMATION TO ARMA ELEMENTS arma::vec sumByCellAll_arma = as_arma(sumByCellAll); arma::vec sumByGeneAll_arma = as_arma(sumByGeneAll); - arma::mat Counts_arma = as_arma(Counts); - arma::mat BatchDesign_arma = as_arma(BatchDesign); + arma::mat Counts_arma = as_arma(Counts); arma::mat BatchDesign_arma = as_arma(BatchDesign); + arma::vec BatchInfo_arma = as_arma(BatchInfo); + arma::vec BatchIds_arma = as_arma(BatchIds); + arma::vec BatchSizes_arma = as_arma(BatchSizes); + arma::vec BatchOffSet_arma = as_arma(BatchOffSet); + arma::vec mu0_arma = as_arma(mu0); + arma::vec phi0_arma = as_arma(phi0); arma::vec Index_arma = as_arma(Index); arma::uvec ConstrainGene_arma = Rcpp::as(ConstrainGene); arma::uvec NotConstrainGene_arma = Rcpp::as(NotConstrainGene); arma::vec RefGenes_arma = as_arma(RefGenes); - // OTHER GLOBAL QUANTITIES - arma::vec BatchSizes = sum(BatchDesign_arma,0).t(); - // OBJECTS WHERE DRAWS WILL BE STORED - arma::mat mu = zeros(q0, Naux); - arma::mat delta = zeros(q0, Naux); - arma::mat s = zeros(n, Naux); - arma::mat nu = zeros(n, Naux); - arma::mat theta = zeros(nBatch, Naux); + arma::mat mu = arma::zeros(Naux, q0); + arma::mat delta = arma::zeros(Naux, q0); + arma::mat phi = arma::ones(Naux, n); + arma::mat nu = arma::zeros(Naux, n); + arma::mat theta = arma::zeros(Naux, nBatch); arma::mat LSmu; arma::mat LSdelta; arma::mat LSnu; @@ -1414,117 +1527,100 @@ Rcpp::List HiddenBASiCS_MCMCcppNoSpikes( // LOG-PROPOSAL VARIANCES if(StoreAdapt == 1) { - LSmu = zeros(q0, Naux); - LSdelta = zeros(q0, Naux); - LSnu = zeros(n, Naux); - LStheta = zeros(nBatch, Naux); + LSmu = arma::zeros(Naux, q0); + LSdelta = arma::zeros(Naux, q0); + LSnu = arma::zeros(Naux, n); + LStheta = arma::zeros(Naux, nBatch); } // ACCEPTANCE RATES FOR ADAPTIVE METROPOLIS-HASTINGS UPDATES - arma::vec muAccept = zeros(q0); arma::vec PmuAux = zeros(q0); - arma::vec deltaAccept = zeros(q0); arma::vec PdeltaAux = zeros(q0); - arma::vec nuAccept = zeros(n); arma::vec PnuAux = zeros(n); - arma::vec thetaAccept = zeros(nBatch); arma::vec PthetaAux = zeros(nBatch); - + arma::vec muAccept = arma::zeros(q0); arma::vec PmuAux = arma::zeros(q0); + arma::vec deltaAccept = arma::zeros(q0); arma::vec PdeltaAux = arma::zeros(q0); + arma::vec nuAccept = arma::zeros(n); arma::vec PnuAux = arma::zeros(n); + arma::vec thetaAccept = arma::zeros(nBatch); arma::vec PthetaAux = arma::zeros(nBatch); // INITIALIZATION OF VALUES FOR MCMC RUN - arma::mat muAux = zeros(q0,2); muAux.col(0) = as_arma(mu0); - arma::mat deltaAux = zeros(q0,2); deltaAux.col(0) = as_arma(delta0); - arma::vec sAux = as_arma(s0); - arma::mat nuAux = zeros(n,2); nuAux.col(0) = as_arma(nu0); - arma::mat thetaAux = zeros(nBatch, 2); thetaAux.col(0) = as_arma(theta0); - arma::vec thetaBatch = BatchDesign_arma * as_arma(theta0); - - // INITIALIZATION OF ADAPTIVE VARIANCES - arma::vec LSmuAux = as_arma(LSmu0); - arma::vec LSdeltaAux = as_arma(LSdelta0); - arma::vec LSnuAux = as_arma(LSnu0); - arma::vec LSthetaAux = as_arma(LStheta0); - - + arma::mat muAux = arma::zeros(q0,2); muAux.col(0)=as_arma(mu0); arma::vec LSmuAux = as_arma(LSmu0); + arma::mat deltaAux = arma::zeros(q0,2); deltaAux.col(0)=as_arma(delta0); arma::vec LSdeltaAux = as_arma(LSdelta0); + arma::vec phiAux = phi0_arma; + arma::mat nuAux = arma::zeros(n,2); nuAux.col(0)=as_arma(nu0); arma::vec LSnuAux = as_arma(LSnu0); + arma::mat thetaAux = arma::zeros(nBatch, 2); thetaAux.col(0) = theta0 * arma::ones(nBatch); + arma::vec LSthetaAux = LStheta0 * arma::ones(nBatch); // OTHER AUXILIARY QUANTITIES FOR ADAPTIVE METROPOLIS UPDATES - arma::vec PmuAux0 = zeros(q0); arma::vec PdeltaAux0 = zeros(q0); - arma::vec PnuAux0 = zeros(n); arma::vec PthetaAux0 = zeros(nBatch); + arma::vec PmuAux0 = arma::zeros(q0); arma::vec PdeltaAux0 = arma::zeros(q0); + arma::vec PnuAux0 = arma::zeros(n); arma::vec PthetaAux0 = arma::ones(nBatch); - // BATCH INITIALIZATION FOR ADAPTIVE METROPOLIS UPDATES - // (RE-INITIALIZE EVERY 50 ITERATIONS) - int Ibatch = 0; - - // INITIALIZATION OF PARAMETERS TO RETURN IN UPDATE FUNCTIONS - // To avoid repeated initialisation - arma::vec y_q0 = ones(q0); arma::vec y_n = ones(n); - arma::vec ind_q0 = zeros(q0); arma::vec ind_n = zeros(n); - arma::vec u_q0 = zeros(q0); arma::vec u_n = zeros(n); - arma::vec ones_n = ones(n); + // BATCH INITIALIZATION FOR ADAPTIVE METROPOLIS UPDATES (RE-INITIALIZE EVERY 50 ITERATIONS) + int Ibatch = 0; int i; // INITIALIZATION OF PARAMETERS TO RETURN IN UPDATE FUNCTIONS - arma::vec RefFreq = zeros(q0); + arma::vec muUpdateAux = arma::ones(q0); + arma::vec indQ = arma::zeros(q0); + double LSmuAuxExtra = 0; + arma::vec RefFreq = arma::zeros(q0); int RefAux; - Rcout << "-------------------------------------------------------------" << std::endl; - Rcout << "MCMC sampler has been started: " << N << " iterations to go." << std::endl; - Rcout << "-------------------------------------------------------------" << std::endl; + arma::vec y_n = arma::ones(n); + + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << "MCMC sampler has been started: " << N << " iterations to go." << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; // START OF MCMC LOOP - for (int i=0; i=Burn) {thetaAccept += thetaAux.col(1);} - thetaBatch = BatchDesign_arma * thetaAux.col(0); - - +// Rcpp::Rcout << "theta" << thetaAux.col(0).t() << std::endl; + // UPDATE OF MU: 1st COLUMN IS THE UPDATE, 2nd COLUMN IS THE ACCEPTANCE INDICATOR - if(StochasticRef == 1) + // Additional steps required for ConstrainType = 3 (stochastic reference) + if((ConstrainType == 3) | (ConstrainType == 5)) { - RefAux = as_scalar(arma::randi(1,arma::distr_param(0, RefGenes_arma.size()-1))); + RefAux = as_scalar(arma::randi( 1, arma::distr_param(0, RefGenes_arma.size()-1) )); RefGene = RefGenes(RefAux); if(i >= Burn) {RefFreq(RefGene) += 1;} } - muAux = muUpdateNoSpikes(muAux.col(0), exp(LSmuAux), Constrain, Counts_arma, - deltaAux.col(0), nuAux.col(0), sumByCellAll_arma, - s2mu, q0, n, RefGene, - ConstrainGene_arma, NotConstrainGene_arma, - ConstrainType, y_q0, u_q0, ind_q0); + muAux = muUpdateNoSpikes(muAux.col(0), exp(LSmuAux), Constrain, Counts_arma, deltaAux.col(0), + nuAux.col(0), sumByCellAll_arma, s2mu, q0, n, + muUpdateAux, indQ, RefGene, ConstrainGene_arma, NotConstrainGene_arma, + ConstrainType); PmuAux += muAux.col(1); if(i>=Burn) {muAccept += muAux.col(1);} - +// Rcpp::Rcout << "mu" << muAux.col(0).t() << std::endl; - // UPDATE OF DELTA: - // 1st COLUMN IS THE UPDATE, - // 2nd COLUMN IS THE ACCEPTANCE INDICATOR - deltaAux = deltaUpdate(deltaAux.col(0), exp(LSdeltaAux), Counts_arma, - muAux.col(0), nuAux.col(0), - adelta, bdelta, s2delta, prior_delta, - q0, n, y_q0, u_q0, ind_q0); + // UPDATE OF DELTA: 1st COLUMN IS THE UPDATE, 2nd COLUMN IS THE ACCEPTANCE INDICATOR + deltaAux = deltaUpdateNoSpikes(deltaAux.col(0), exp(LSdeltaAux), Counts_arma, + muAux.col(0), nuAux.col(0), adelta, bdelta, q0, n, s2delta, prior_delta); PdeltaAux += deltaAux.col(1); if(i>=Burn) {deltaAccept += deltaAux.col(1);} - - // UPDATE OF NU: - // 1st COLUMN IS THE UPDATE, - // 2nd COLUMN IS THE ACCEPTANCE INDICATOR - nuAux = nuUpdateBatch(nuAux.col(0), exp(LSnuAux), Counts_arma, 0, - BatchDesign_arma, - muAux.col(0), 1/deltaAux.col(0), - ones_n, sAux, thetaBatch, sumByGeneAll_arma, q0, n, - y_n, u_n, ind_n); +// Rcpp::Rcout << "delta" << deltaAux.col(0).t() << std::endl; + + // UPDATE OF NU: 1st COLUMN IS THE UPDATE, 2nd COLUMN IS THE ACCEPTANCE INDICATOR + nuAux = nuUpdateNoSpikes(nuAux.col(0), exp(LSnuAux), Counts_arma, + BatchDesign_arma, + muAux.col(0), deltaAux.col(0), + phiAux, thetaAux.col(0), sumByGeneAll_arma, q0, n, + BatchInfo_arma, BatchIds_arma, nBatch, + BatchSizes_arma, BatchOffSet_arma); PnuAux += nuAux.col(1); if(i>=Burn) {nuAccept += nuAux.col(1);} +// Rcpp::Rcout << "nu" << nuAux.col(0).t() << std::endl; // STOP ADAPTING THE PROPOSAL VARIANCES AFTER EndAdapt ITERATIONS if(i < EndAdapt) @@ -1532,18 +1628,14 @@ Rcpp::List HiddenBASiCS_MCMCcppNoSpikes( // UPDATE OF PROPOSAL VARIANCES (ONLY EVERY 50 ITERATIONS) if(Ibatch==50) { - PmuAux = PmuAux / (50-RefFreq); - PmuAux = -1 + 2*arma::conv_to::from(PmuAux>ar); + PmuAux=PmuAux/(50-RefFreq); PmuAux = -1+2*arma::conv_to::from(PmuAux>ar);// LSmuAux.elem(find(Index_arma != RefGene)) = LSmuAux.elem(find(Index_arma != RefGene)) + PmuAux.elem(find(Index_arma != RefGene))*0.1; - PdeltaAux = PdeltaAux / 50; - PdeltaAux = -1 + 2*arma::conv_to::from(PdeltaAux>ar); - LSdeltaAux = LSdeltaAux + PdeltaAux*0.1; - PnuAux = PnuAux / 50; - PnuAux = -1 + 2*arma::conv_to::from(PnuAux>ar); - LSnuAux = LSnuAux + PnuAux*0.1; - PthetaAux = PthetaAux / 50; - PthetaAux = -1 + 2*arma::conv_to::from(PthetaAux>ar); - LSthetaAux = LSthetaAux + PthetaAux*0.1; + PdeltaAux=PdeltaAux/50; PdeltaAux = -1+2*arma::conv_to::from(PdeltaAux>ar); + LSdeltaAux=LSdeltaAux+PdeltaAux*0.1; + PnuAux=PnuAux/50; PnuAux = -1+2*arma::conv_to::from(PnuAux>ar); + LSnuAux=LSnuAux+PnuAux*0.1; + PthetaAux=PthetaAux/50; PthetaAux = -1+2*arma::conv_to::from(PthetaAux>ar); + LSthetaAux= LSthetaAux + PthetaAux*0.1; Ibatch = 0; PmuAux = PmuAux0; PdeltaAux = PdeltaAux0; @@ -1551,93 +1643,92 @@ Rcpp::List HiddenBASiCS_MCMCcppNoSpikes( } } - // STORAGE OF DRAWS if(i%Thin==0 & i>=Burn) - { - mu.col(i/Thin - Burn/Thin) = muAux.col(0); - delta.col(i/Thin - Burn/Thin) = deltaAux.col(0); - s.col(i/Thin - Burn/Thin) = sAux; - nu.col(i/Thin - Burn/Thin) = nuAux.col(0); - theta.col(i/Thin - Burn/Thin) = thetaAux.col(0); + { + mu.row(i/Thin - Burn/Thin) = muAux.col(0).t(); + delta.row(i/Thin - Burn/Thin) = deltaAux.col(0).t(); + phi.row(i/Thin - Burn/Thin) = phiAux.t(); + nu.row(i/Thin - Burn/Thin) = nuAux.col(0).t(); + theta.row(i/Thin - Burn/Thin) = thetaAux.col(0).t(); if(StoreAdapt == 1) { - LSmu.col(i/Thin - Burn/Thin) = LSmuAux; - LSdelta.col(i/Thin - Burn/Thin) = LSdeltaAux; - LSnu.col(i/Thin - Burn/Thin) = LSnuAux; - LStheta.col(i/Thin - Burn/Thin) = LSthetaAux; + LSmu.row(i/Thin - Burn/Thin) = LSmuAux.t(); + LSdelta.row(i/Thin - Burn/Thin) = LSdeltaAux.t(); + LSnu.row(i/Thin - Burn/Thin) = LSnuAux.t(); + LStheta.row(i/Thin - Burn/Thin) = LSthetaAux.t(); } } // PRINT IN CONSOLE SAMPLED VALUES FOR FEW SELECTED PARAMETERS if(i%(2*Thin) == 0 & PrintProgress == 1) { - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << "MCMC iteration " << i << " out of " << N << " has been completed." << std::endl; - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << "Current draws of some selected parameters are displayed below." << std::endl; - Rcout << "mu (gene 1): " << muAux(0,0) << std::endl; - Rcout << "delta (gene 1): " << deltaAux(0,0) << std::endl; - Rcout << "s (cell 1): " << sAux(0) << std::endl; - Rcout << "nu (cell 1): " << nuAux(0,0) << std::endl; - Rcout << "theta (batch 1): " << thetaAux(0,0) << std::endl; - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << "Current proposal variances for Metropolis Hastings updates (log-scale)." << std::endl; - Rcout << "LSmu (gene 1): " << LSmuAux(0) << std::endl; - Rcout << "LSdelta (gene 1): " << LSdeltaAux(0) << std::endl; - Rcout << "LSnu (cell 1): " << LSnuAux(0) << std::endl; - Rcout << "LStheta (batch 1): " << LSthetaAux(0) << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << "MCMC iteration " << i << " out of " << N << " has been completed." << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << "Current draws of some selected parameters are displayed below." << std::endl; + Rcpp::Rcout << "mu (gene 1): " << muAux(0,0) << std::endl; + Rcpp::Rcout << "delta (gene 1): " << deltaAux(0,0) << std::endl; + Rcpp::Rcout << "phi (cell 1): " << phiAux(0) << std::endl; + Rcpp::Rcout << "nu (cell 1): " << nuAux(0,0) << std::endl; + Rcpp::Rcout << "theta (batch 1): " << thetaAux(0,0) << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << "Current proposal variances for Metropolis Hastings updates (log-scale)." << std::endl; + Rcpp::Rcout << "LSmu (gene 1): " << LSmuAuxExtra + LSmuAux(0) << std::endl; + Rcpp::Rcout << "LSdelta (gene 1): " << LSdeltaAux(0) << std::endl; + Rcpp::Rcout << "LSnu (cell 1): " << LSnuAux(0) << std::endl; + Rcpp::Rcout << "LStheta (batch 1): " << LSthetaAux(0) << std::endl; } } // ACCEPTANCE RATE CONSOLE OUTPUT - Rcout << " " << std::endl; - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << "All " << N << " MCMC iterations have been completed." << std::endl; - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << " " << std::endl; + Rcpp::Rcout << " " << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << "All " << N << " MCMC iterations have been completed." << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << " " << std::endl; // ACCEPTANCE RATE CONSOLE OUTPUT - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << "Please see below a summary of the overall acceptance rates." << std::endl; - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << " " << std::endl; - - Rcout << "Minimum acceptance rate among mu[i]'s: " << min(muAccept.elem(find(Index_arma != RefGene))/(N-Burn)) << std::endl; - Rcout << "Average acceptance rate among mu[i]'s: " << mean(muAccept.elem(find(Index_arma != RefGene))/(N-Burn)) << std::endl; - Rcout << "Maximum acceptance rate among mu[i]'s: " << max(muAccept.elem(find(Index_arma != RefGene))/(N-Burn)) << std::endl; - - Rcout << " " << std::endl; - Rcout << "Minimum acceptance rate among delta[i]'s: " << min(deltaAccept/(N-Burn)) << std::endl; - Rcout << "Average acceptance rate among delta[i]'s: " << mean(deltaAccept/(N-Burn)) << std::endl; - Rcout << "Average acceptance rate among delta[i]'s: " << max(deltaAccept/(N-Burn)) << std::endl; - Rcout << " " << std::endl; - Rcout << "Minimum acceptance rate among nu[jk]'s: " << min(nuAccept/(N-Burn)) << std::endl; - Rcout << "Average acceptance rate among nu[jk]'s: " << mean(nuAccept/(N-Burn)) << std::endl; - Rcout << "Maximum acceptance rate among nu[jk]'s: " << max(nuAccept/(N-Burn)) << std::endl; - Rcout << " " << std::endl; - Rcout << "Minimum acceptance rate among theta[k]'s: " << min(thetaAccept/(N-Burn)) << std::endl; - Rcout << "Average acceptance rate among theta[k]'s: " << mean(thetaAccept/(N-Burn)) << std::endl; - Rcout << "Maximum acceptance rate among theta[k]'s: " << max(thetaAccept/(N-Burn)) << std::endl; - Rcout << "--------------------------------------------------------------------" << std::endl; - Rcout << " " << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << "Please see below a summary of the overall acceptance rates." << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << " " << std::endl; + + Rcpp::Rcout << "Minimum acceptance rate among mu[i]'s: " << min(muAccept.elem(find(Index_arma != RefGene))/(N-Burn)) << std::endl; + Rcpp::Rcout << "Average acceptance rate among mu[i]'s: " << mean(muAccept.elem(find(Index_arma != RefGene))/(N-Burn)) << std::endl; + Rcpp::Rcout << "Maximum acceptance rate among mu[i]'s: " << max(muAccept.elem(find(Index_arma != RefGene))/(N-Burn)) << std::endl; + + Rcpp::Rcout << " " << std::endl; + Rcpp::Rcout << "Minimum acceptance rate among delta[i]'s: " << min(deltaAccept/(N-Burn)) << std::endl; + Rcpp::Rcout << "Average acceptance rate among delta[i]'s: " << mean(deltaAccept/(N-Burn)) << std::endl; + Rcpp::Rcout << "Average acceptance rate among delta[i]'s: " << max(deltaAccept/(N-Burn)) << std::endl; + Rcpp::Rcout << " " << std::endl; + Rcpp::Rcout << "Minimum acceptance rate among nu[jk]'s: " << min(nuAccept/(N-Burn)) << std::endl; + Rcpp::Rcout << "Average acceptance rate among nu[jk]'s: " << mean(nuAccept/(N-Burn)) << std::endl; + Rcpp::Rcout << "Maximum acceptance rate among nu[jk]'s: " << max(nuAccept/(N-Burn)) << std::endl; + Rcpp::Rcout << " " << std::endl; + Rcpp::Rcout << "Minimum acceptance rate among theta[k]'s: " << min(thetaAccept/(N-Burn)) << std::endl; + Rcpp::Rcout << "Average acceptance rate among theta[k]'s: " << mean(thetaAccept/(N-Burn)) << std::endl; + Rcpp::Rcout << "Maximum acceptance rate among theta[k]'s: " << max(thetaAccept/(N-Burn)) << std::endl; + Rcpp::Rcout << "--------------------------------------------------------------------" << std::endl; + Rcpp::Rcout << " " << std::endl; if(StoreAdapt == 1) { // OUTPUT (AS A LIST) return(Rcpp::List::create( - Rcpp::Named("mu") = mu.t(), - Rcpp::Named("delta") = delta.t(), - Rcpp::Named("s") = s.t(), - Rcpp::Named("nu") = nu.t(), - Rcpp::Named("theta") = theta.t(), - Rcpp::Named("ls.mu") = LSmu.t(), - Rcpp::Named("ls.delta") = LSdelta.t(), - Rcpp::Named("ls.nu") = LSnu.t(), - Rcpp::Named("ls.theta") = LStheta.t(), + Rcpp::Named("mu") = mu, + Rcpp::Named("delta") = delta, + Rcpp::Named("phi") = phi, + Rcpp::Named("nu") = nu, + Rcpp::Named("theta") = theta, + Rcpp::Named("ls.mu") = LSmu, + Rcpp::Named("ls.delta") = LSdelta, + Rcpp::Named("ls.nu") = LSnu, + Rcpp::Named("ls.theta") = LStheta, Rcpp::Named("RefFreq") = RefFreq/(N-Burn))); } @@ -1645,11 +1736,11 @@ Rcpp::List HiddenBASiCS_MCMCcppNoSpikes( { // OUTPUT (AS A LIST) return(Rcpp::List::create( - Rcpp::Named("mu") = mu.t(), - Rcpp::Named("delta") = delta.t(), - Rcpp::Named("s") = s.t(), - Rcpp::Named("nu") = nu.t(), - Rcpp::Named("theta") = theta.t(), + Rcpp::Named("mu") = mu, + Rcpp::Named("delta") = delta, + Rcpp::Named("phi") = phi, + Rcpp::Named("nu") = nu, + Rcpp::Named("theta") = theta, Rcpp::Named("RefFreq") = RefFreq/(N-Burn))); } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 6e51767d..7507ba4c 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -69,8 +69,8 @@ BEGIN_RCPP END_RCPP } // HiddenBASiCS_MCMCcppNoSpikes -Rcpp::List HiddenBASiCS_MCMCcppNoSpikes(int N, int Thin, int Burn, NumericMatrix Counts, NumericMatrix BatchDesign, NumericVector mu0, NumericVector delta0, NumericVector s0, NumericVector nu0, NumericVector theta0, double s2mu, double adelta, double bdelta, double s2delta, double prior_delta, double as, double bs, double atheta, double btheta, double ar, NumericVector LSmu0, NumericVector LSdelta0, NumericVector LSnu0, NumericVector LStheta0, NumericVector sumByCellAll, NumericVector sumByGeneAll, int StoreAdapt, int EndAdapt, int PrintProgress, double Constrain, NumericVector Index, int RefGene, NumericVector RefGenes, NumericVector ConstrainGene, NumericVector NotConstrainGene, int ConstrainType, int StochasticRef); -RcppExport SEXP _BASiCS_HiddenBASiCS_MCMCcppNoSpikes(SEXP NSEXP, SEXP ThinSEXP, SEXP BurnSEXP, SEXP CountsSEXP, SEXP BatchDesignSEXP, SEXP mu0SEXP, SEXP delta0SEXP, SEXP s0SEXP, SEXP nu0SEXP, SEXP theta0SEXP, SEXP s2muSEXP, SEXP adeltaSEXP, SEXP bdeltaSEXP, SEXP s2deltaSEXP, SEXP prior_deltaSEXP, SEXP asSEXP, SEXP bsSEXP, SEXP athetaSEXP, SEXP bthetaSEXP, SEXP arSEXP, SEXP LSmu0SEXP, SEXP LSdelta0SEXP, SEXP LSnu0SEXP, SEXP LStheta0SEXP, SEXP sumByCellAllSEXP, SEXP sumByGeneAllSEXP, SEXP StoreAdaptSEXP, SEXP EndAdaptSEXP, SEXP PrintProgressSEXP, SEXP ConstrainSEXP, SEXP IndexSEXP, SEXP RefGeneSEXP, SEXP RefGenesSEXP, SEXP ConstrainGeneSEXP, SEXP NotConstrainGeneSEXP, SEXP ConstrainTypeSEXP, SEXP StochasticRefSEXP) { +Rcpp::List HiddenBASiCS_MCMCcppNoSpikes(int N, int Thin, int Burn, NumericMatrix Counts, NumericMatrix BatchDesign, NumericVector mu0, NumericVector delta0, NumericVector phi0, NumericVector nu0, double theta0, double s2mu, double adelta, double bdelta, double aphi, double bphi, double atheta, double btheta, double ar, NumericVector LSmu0, NumericVector LSdelta0, NumericVector LSnu0, double LStheta0, NumericVector sumByCellAll, NumericVector sumByGeneAll, int StoreAdapt, int EndAdapt, int PrintProgress, double s2delta, double prior_delta, NumericVector BatchInfo, NumericVector BatchIds, NumericVector BatchSizes, NumericVector BatchOffSet, double Constrain, NumericVector Index, int RefGene, NumericVector RefGenes, NumericVector ConstrainGene, NumericVector NotConstrainGene, int ConstrainType); +RcppExport SEXP _BASiCS_HiddenBASiCS_MCMCcppNoSpikes(SEXP NSEXP, SEXP ThinSEXP, SEXP BurnSEXP, SEXP CountsSEXP, SEXP BatchDesignSEXP, SEXP mu0SEXP, SEXP delta0SEXP, SEXP phi0SEXP, SEXP nu0SEXP, SEXP theta0SEXP, SEXP s2muSEXP, SEXP adeltaSEXP, SEXP bdeltaSEXP, SEXP aphiSEXP, SEXP bphiSEXP, SEXP athetaSEXP, SEXP bthetaSEXP, SEXP arSEXP, SEXP LSmu0SEXP, SEXP LSdelta0SEXP, SEXP LSnu0SEXP, SEXP LStheta0SEXP, SEXP sumByCellAllSEXP, SEXP sumByGeneAllSEXP, SEXP StoreAdaptSEXP, SEXP EndAdaptSEXP, SEXP PrintProgressSEXP, SEXP s2deltaSEXP, SEXP prior_deltaSEXP, SEXP BatchInfoSEXP, SEXP BatchIdsSEXP, SEXP BatchSizesSEXP, SEXP BatchOffSetSEXP, SEXP ConstrainSEXP, SEXP IndexSEXP, SEXP RefGeneSEXP, SEXP RefGenesSEXP, SEXP ConstrainGeneSEXP, SEXP NotConstrainGeneSEXP, SEXP ConstrainTypeSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -81,28 +81,32 @@ BEGIN_RCPP Rcpp::traits::input_parameter< NumericMatrix >::type BatchDesign(BatchDesignSEXP); Rcpp::traits::input_parameter< NumericVector >::type mu0(mu0SEXP); Rcpp::traits::input_parameter< NumericVector >::type delta0(delta0SEXP); - Rcpp::traits::input_parameter< NumericVector >::type s0(s0SEXP); + Rcpp::traits::input_parameter< NumericVector >::type phi0(phi0SEXP); Rcpp::traits::input_parameter< NumericVector >::type nu0(nu0SEXP); - Rcpp::traits::input_parameter< NumericVector >::type theta0(theta0SEXP); + Rcpp::traits::input_parameter< double >::type theta0(theta0SEXP); Rcpp::traits::input_parameter< double >::type s2mu(s2muSEXP); Rcpp::traits::input_parameter< double >::type adelta(adeltaSEXP); Rcpp::traits::input_parameter< double >::type bdelta(bdeltaSEXP); - Rcpp::traits::input_parameter< double >::type s2delta(s2deltaSEXP); - Rcpp::traits::input_parameter< double >::type prior_delta(prior_deltaSEXP); - Rcpp::traits::input_parameter< double >::type as(asSEXP); - Rcpp::traits::input_parameter< double >::type bs(bsSEXP); + Rcpp::traits::input_parameter< double >::type aphi(aphiSEXP); + Rcpp::traits::input_parameter< double >::type bphi(bphiSEXP); Rcpp::traits::input_parameter< double >::type atheta(athetaSEXP); Rcpp::traits::input_parameter< double >::type btheta(bthetaSEXP); Rcpp::traits::input_parameter< double >::type ar(arSEXP); Rcpp::traits::input_parameter< NumericVector >::type LSmu0(LSmu0SEXP); Rcpp::traits::input_parameter< NumericVector >::type LSdelta0(LSdelta0SEXP); Rcpp::traits::input_parameter< NumericVector >::type LSnu0(LSnu0SEXP); - Rcpp::traits::input_parameter< NumericVector >::type LStheta0(LStheta0SEXP); + Rcpp::traits::input_parameter< double >::type LStheta0(LStheta0SEXP); Rcpp::traits::input_parameter< NumericVector >::type sumByCellAll(sumByCellAllSEXP); Rcpp::traits::input_parameter< NumericVector >::type sumByGeneAll(sumByGeneAllSEXP); Rcpp::traits::input_parameter< int >::type StoreAdapt(StoreAdaptSEXP); Rcpp::traits::input_parameter< int >::type EndAdapt(EndAdaptSEXP); Rcpp::traits::input_parameter< int >::type PrintProgress(PrintProgressSEXP); + Rcpp::traits::input_parameter< double >::type s2delta(s2deltaSEXP); + Rcpp::traits::input_parameter< double >::type prior_delta(prior_deltaSEXP); + Rcpp::traits::input_parameter< NumericVector >::type BatchInfo(BatchInfoSEXP); + Rcpp::traits::input_parameter< NumericVector >::type BatchIds(BatchIdsSEXP); + Rcpp::traits::input_parameter< NumericVector >::type BatchSizes(BatchSizesSEXP); + Rcpp::traits::input_parameter< NumericVector >::type BatchOffSet(BatchOffSetSEXP); Rcpp::traits::input_parameter< double >::type Constrain(ConstrainSEXP); Rcpp::traits::input_parameter< NumericVector >::type Index(IndexSEXP); Rcpp::traits::input_parameter< int >::type RefGene(RefGeneSEXP); @@ -110,8 +114,7 @@ BEGIN_RCPP Rcpp::traits::input_parameter< NumericVector >::type ConstrainGene(ConstrainGeneSEXP); Rcpp::traits::input_parameter< NumericVector >::type NotConstrainGene(NotConstrainGeneSEXP); Rcpp::traits::input_parameter< int >::type ConstrainType(ConstrainTypeSEXP); - Rcpp::traits::input_parameter< int >::type StochasticRef(StochasticRefSEXP); - rcpp_result_gen = Rcpp::wrap(HiddenBASiCS_MCMCcppNoSpikes(N, Thin, Burn, Counts, BatchDesign, mu0, delta0, s0, nu0, theta0, s2mu, adelta, bdelta, s2delta, prior_delta, as, bs, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType, StochasticRef)); + rcpp_result_gen = Rcpp::wrap(HiddenBASiCS_MCMCcppNoSpikes(N, Thin, Burn, Counts, BatchDesign, mu0, delta0, phi0, nu0, theta0, s2mu, adelta, bdelta, aphi, bphi, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, s2delta, prior_delta, BatchInfo, BatchIds, BatchSizes, BatchOffSet, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType)); return rcpp_result_gen; END_RCPP } @@ -119,7 +122,7 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_BASiCS_HiddenBASiCS_MCMCcpp", (DL_FUNC) &_BASiCS_HiddenBASiCS_MCMCcpp, 35}, {"_BASiCS_HiddenBASiCS_DenoisedRates", (DL_FUNC) &_BASiCS_HiddenBASiCS_DenoisedRates, 7}, - {"_BASiCS_HiddenBASiCS_MCMCcppNoSpikes", (DL_FUNC) &_BASiCS_HiddenBASiCS_MCMCcppNoSpikes, 37}, + {"_BASiCS_HiddenBASiCS_MCMCcppNoSpikes", (DL_FUNC) &_BASiCS_HiddenBASiCS_MCMCcppNoSpikes, 40}, {NULL, NULL, 0} }; diff --git a/vignettes/BASiCS_nospikes.Rmd b/vignettes/BASiCS_nospikes.Rmd deleted file mode 100644 index c805d629..00000000 --- a/vignettes/BASiCS_nospikes.Rmd +++ /dev/null @@ -1,151 +0,0 @@ - - - ```{r, echo=FALSE, results="hide", message=TRUE} -require(knitr) -opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) -``` - -```{r style, echo=FALSE, results='asis'} -BiocStyle::markdown() -``` - -```{r library, echo=FALSE} -library(BASiCS) -``` - -# BASiCS: vertical versus horizontal integration. - -Package: `r Biocpkg("BASiCS")`
-Authors: Catalina Vallejos (cnvallej@uc.cl) and Nils Eling (eling@ebi.ac.uk)
-Compilation date: `r Sys.Date()` - -*** - -# Introduction - -This vignette is to illustrate the no-spikes case of BASiCS. - -# Quick start - -### The input dataset - -The input dataset for BASiCS must be stored as an `SingleCellExperiment` -object (see `r Biocpkg("SingleCellExperiment")` package). - -The `newBASiCS_Data` function can be used to create the input data object based -on the following information: - -* `Counts`: a matrix of raw expression counts with dimensions $q$ times $n$. -Within this matrix, $q_0$ rows must correspond to biological genes and $q-q_0$ -rows must correspond to technical spike-in genes. Gene names must be stored as `rownames(Counts)`. - -* `Tech`: a vector of `TRUE`/`FALSE` elements with length $q$. -If `Tech[i] = FALSE` the gene `i` is biological; otherwise the gene is spike-in. -This vector must be specified in the same order of genes as in the -`Counts` matrix. - -* `SpikeInfo`: a `data.frame` with $q-q_0$ rows. First column must contain the -names associated to the spike-in genes (as in `rownames(Counts)`). Second column -must contain the input number of molecules for the spike-in genes -(amount per cell). - -* `BatchInfo` (optional): vector of length $n$ to indicate batch structure -(whenever cells have been processed using multiple batches). - -For example, the following code simulates a dataset with 50 genes -(40 biological and 10 spike-in) and 40 cells. - -```{r ExampleDataTest} -set.seed(1) -Counts <- matrix(rpois(50*40, 2), ncol = 40) -rownames(Counts) <- c(paste0("Gene", 1:40), paste0("Spike", 1:10)) -Tech <- c(rep(FALSE,40),rep(TRUE,10)) -set.seed(2) -SpikeInput <- rgamma(10,1,1) -SpikeInfo <- data.frame("SpikeID" = paste0("Spike", 1:10), - "SpikeInput" = SpikeInput) - -# With spikes; with batches -DataExample <- newBASiCS_Data(Counts, Tech, SpikeInfo, - BatchInfo = rep(c(1,2), each = 20)) - -# Without spikes; with batches -DataExample <- newBASiCS_Data(Counts, Tech, - BatchInfo = rep(c(1,2), each = 20)) -``` - - -*** - - -### Running the MCMC sampler - - Parameter estimation is performed using the `BASiCS_MCMC` function. -Essential parameters for running this algorithm are: - -* `N`: total number of iterations -* `Thin`: length of the thining period (i.e. only every `Thin` iterations will -be stored in the output of the `BASiCS_MCMC`) -* `Burn`: length of burn-in period (i.e. the initial `Burn` iterations that will -be discarded from the output of the `BASiCS_MCMC`) - -If the optional parameter `PrintProgress` is set to `TRUE`, the R -console will display the progress of the MCMC algorithm. -For other optional parameters refer to `help(BASiCS_MCMC)`. - -Here, we illustrate the usage of `BASiCS_MCMC` using a built-in -synthetic datasets. - -- Vertical integration (with spikes) - -```{r quick-start-MCMC-vertical} -Data1 <- makeExampleBASiCS_Data(WithSpikes = TRUE, WithBatch = TRUE) -Chain1 <- BASiCS_MCMC(Data = Data1, N = 1000, Thin = 10, Burn = 500, - PrintProgress = FALSE) -Summary1 <- Summary(Chain1) -``` - - -- Horizontal integration (no spikes case) - -```{r quick-start-MCMC-horizontal} -Data2 <- makeExampleBASiCS_Data(WithSpikes = FALSE, WithBatch = TRUE) -Chain2 <- BASiCS_MCMC(Data = Data2, N = 10000, Thin = 10, Burn = 5000, - PrintProgress = FALSE, StoreDir = tempdir(), - StochasticRef = FALSE) -plot(Chain2, Param = "phi", Cell = 1) -plot(Chain2, Param = "theta", Batch = 1) -plot(Chain2, Param = "mu", Gene = 1) -plot(Chain2, Param = "delta", Gene = 1) -Summary2 <- Summary(Chain2) -``` - -```{r} -Chain3 <- BASiCS_MCMC(Data = Data2, N = 10000, Thin = 10, Burn = 5000, - PrintProgress = FALSE, StoreDir = tempdir(), - StochasticRef = TRUE) -plot(Chain3, Param = "phi", Cell = 1) -plot(Chain3, Param = "theta", Batch = 1) -plot(Chain3, Param = "mu", Gene = 1) -plot(Chain3, Param = "delta", Gene = 1) -Summary3 <- Summary(Chain3) -``` - -# Case studies - -The following datasets can be used to test the performance of this approach: - - - - - - -# Session information - -```{r SessionInfo} -sessionInfo() -```