From cbb6c062aa5caea78977a251ed9bfabd556aff35 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 27 May 2023 15:21:42 -0400 Subject: [PATCH] Fix batch_var for vst2 --- DESCRIPTION | 4 ++-- R/fit.R | 60 ++++++++++++++++++++++++++++++++--------------------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f3b3324..fa87b7f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: sctransform Type: Package Title: Variance Stabilizing Transformations for Single Cell UMI Data -Version: 0.3.5.9005 -Date: 2023-05-22 +Version: 0.3.5.9006 +Date: 2023-05-27 Authors@R: c( person(given = "Christoph", family = "Hafemeister", email = "christoph.hafemeister@nyu.edu", role = "aut", comment = c(ORCID = "0000-0001-6365-8254")), person(given = "Saket", family = "Choudhary", email = "schoudhary@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5202-7633")), diff --git a/R/fit.R b/R/fit.R index ede2355..eb48af3 100644 --- a/R/fit.R +++ b/R/fit.R @@ -106,6 +106,10 @@ fit_overdisp_mle <- function(umi, mu, intercept, slope){ # Use log_umi as offset using glmGamPoi fit_glmGamPoi_offset <- function(umi, model_str, data, allow_inf_theta=FALSE) { + log10_umi <- data$log_umi + stopifnot(!is.null(log10_umi)) + log_umi <- log(10^log10_umi) + # only intercept varies includes.batch_var <- FALSE if (grepl(pattern = "\\(log_umi\\) :", x = model_str)) { @@ -113,21 +117,17 @@ fit_glmGamPoi_offset <- function(umi, model_str, data, allow_inf_theta=FALSE) { } new_formula <- gsub("y", "", model_str) + includes.log_umi <- grepl(pattern = "~ log_umi", x = new_formula) if (!includes.batch_var) { # if therse is no batch variable - remove log_umi and fix it # remove log_umi from model formula if it is with batch variables - new_formula <- gsub("\\+ log_umi", "", new_formula) + new_formula <- gsub(pattern = "\\+ log_umi", replacement = "", x = new_formula) # replace log_umi with 1 if it is the only formula - new_formula <- gsub("log_umi", "1", new_formula) - # log_umi <- 0 + new_formula <- gsub(pattern = "log_umi", replacement = "1", x = new_formula) + } else { + log_umi <- 0 } - - log10_umi <- data$log_umi - stopifnot(!is.null(log10_umi)) - log_umi <- log(10^log10_umi) - - #browser() fit <- glmGamPoi::glm_gp(data = umi, design = as.formula(new_formula), col_data = data, @@ -138,30 +138,41 @@ fit_glmGamPoi_offset <- function(umi, model_str, data, allow_inf_theta=FALSE) { fit$theta <- pmin(1 / fit$overdispersions, rowMeans(fit$Mu) / 1e-4) } if ("Intercept" %in% colnames(x = fit$Beta)) { - model_pars <- cbind(fit$theta, - fit$Beta[, "Intercept"], - rep(log(10), nrow(umi))) - dimnames(model_pars) <- list(rownames(umi), c('theta', '(Intercept)', 'log_umi')) - n_coefficients <- ncol(fit$Beta) - if (n_coefficients>1){ - model_pars <- cbind(model_pars, fit$Beta[, 2:n_coefficients]) - colnames(x = model_pars)[4:ncol(x = model_pars)] <- colnames(x = fit$Beta)[2:n_coefficients] + if (includes.log_umi){ + model_pars <- cbind(fit$theta, + fit$Beta[, "Intercept"], + rep(log(10), nrow(umi))) + dimnames(model_pars) <- list(rownames(umi), c('theta', '(Intercept)', 'log_umi')) + + n_coefficients <- ncol(fit$Beta) + if (n_coefficients>1){ + model_pars <- cbind(model_pars, fit$Beta[, 2:n_coefficients]) + colnames(x = model_pars)[4:ncol(x = model_pars)] <- colnames(x = fit$Beta)[2:n_coefficients] + } + } else { + model_pars <- cbind(fit$theta, + fit$Beta) + dimnames(model_pars) <- list(rownames(umi), c('theta', colnames(x = fit$Beta))) } } else { if (!includes.batch_var){ - model_pars <- cbind(fit$theta, - rep(log(10), nrow(umi)), - fit$Beta) - dimnames(model_pars) <- list(rownames(umi), c('theta', 'log_umi', colnames(x = fit$Beta))) + if (includes.log_umi){ + model_pars <- cbind(fit$theta, + rep(log(10), nrow(umi)), + fit$Beta) + dimnames(model_pars) <- list(rownames(umi), c('theta', 'log_umi', colnames(x = fit$Beta))) + } else { + model_pars <- cbind(fit$theta, + fit$Beta) + dimnames(model_pars) <- list(rownames(umi), c('theta', colnames(x = fit$Beta))) + } } else { model_pars <- cbind(fit$theta, fit$Beta) dimnames(model_pars) <- list(rownames(umi), c('theta', colnames(x = fit$Beta))) } - - } - + colnames(x = model_pars)[match(x = 'Intercept', table = colnames(x = model_pars))] <- "(Intercept)" return(model_pars) } @@ -192,5 +203,6 @@ fit_nb_offset <- function(umi, model_str, data, allow_inf_theta=FALSE) { model_pars <- t(par_mat) model_pars <- cbind(model_pars, rep(log(10), nrow(umi))) rownames(x = model_pars) <- rownames(x = umi) + colnames(x = model_pars)[match(x = 'Intercept', table = colnames(x = model_pars))] <- "(Intercept)" return(model_pars) }