From 6c5d2842bbd7b226f73c04ca91d8010a51fbc34f Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 19 Aug 2022 12:44:57 -0400 Subject: [PATCH 1/6] update sparse coersions --- DESCRIPTION | 4 ++-- R/denoise.R | 2 +- R/generate.R | 2 +- R/vst.R | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c072634..3ab5dd1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sctransform Type: Package Title: Variance Stabilizing Transformations for Single Cell UMI Data -Version: 0.3.3 +Version: 0.3.3.9000 Date: 2022-01-10 Authors@R: c( person(given = "Christoph", family = "Hafemeister", email = "christoph.hafemeister@nyu.edu", role = "aut", comment = c(ORCID = "0000-0001-6365-8254")), @@ -42,4 +42,4 @@ Suggests: knitr Enhances: glmGamPoi -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 diff --git a/R/denoise.R b/R/denoise.R index 81a9b39..48c7a84 100644 --- a/R/denoise.R +++ b/R/denoise.R @@ -236,7 +236,7 @@ correct_counts <- function(x, umi, cell_attr = x$cell_attr, scale_factor = NA, v y.res <- mu + pearson_residual * sqrt(variance) y.res <- round(y.res, 0) y.res[y.res < 0] <- 0 - corrected_data[[length(corrected_data) + 1]] <- as(y.res, Class = 'dgCMatrix') + corrected_data[[length(corrected_data) + 1]] <- as(as(as(y.res, "dMatrix"), "generalMatrix"), "CsparseMatrix") if (verbosity > 1) { setTxtProgressBar(pb, i) } diff --git a/R/generate.R b/R/generate.R index 4d198e4..32aea32 100644 --- a/R/generate.R +++ b/R/generate.R @@ -40,6 +40,6 @@ generate <- function(vst_out, genes = rownames(vst_out$model_pars_fit), x <- MASS::rnegbin(n = length(gene.mu), mu = gene.mu, theta = theta[gene]) return(x) })) - x.sim <- as(x.sim, Class = 'dgCMatrix') + x.sim <- as(as(as(x.sim, "dMatrix"), "generalMatrix"), "CsparseMatrix") return(x.sim) } diff --git a/R/vst.R b/R/vst.R index ed12fd7..b17676c 100644 --- a/R/vst.R +++ b/R/vst.R @@ -432,7 +432,7 @@ vst <- function(umi, } else { rv$umi_corrected <- sctransform::correct(rv, do_round = TRUE, do_pos = TRUE, scale_factor = scale_factor, verbosity = verbosity) - rv$umi_corrected <- as(object = rv$umi_corrected, Class = 'dgCMatrix') + rv$umi_corrected <- as(as(as(rv$umi_corrected, "dMatrix"), "generalMatrix"), "CsparseMatrix") } } From 3146ea91aae8c21c9f9751aa855ecc2642ac6a5f Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 19 Aug 2022 13:31:09 -0400 Subject: [PATCH 2/6] Add utility method to convert matrices to dgCMatrix --- DESCRIPTION | 4 ++-- R/denoise.R | 2 +- R/generate.R | 2 +- R/utils.R | 9 +++++++++ R/vst.R | 2 +- README.md | 10 ++++------ 6 files changed, 18 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3ab5dd1..165569a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sctransform Type: Package Title: Variance Stabilizing Transformations for Single Cell UMI Data -Version: 0.3.3.9000 +Version: 0.3.3.9001 Date: 2022-01-10 Authors@R: c( person(given = "Christoph", family = "Hafemeister", email = "christoph.hafemeister@nyu.edu", role = "aut", comment = c(ORCID = "0000-0001-6365-8254")), @@ -13,7 +13,7 @@ Description: A normalization method for single-cell UMI count data using a negative binomial regression model with regularized parameters. As part of the same regression framework, this package also provides functions for batch correction, and data correction. See Hafemeister and Satija (2019) - , and Choudhary and Satija (2021) + , and Choudhary and Satija (2022) for more details. URL: https://github.com/satijalab/sctransform BugReports: https://github.com/satijalab/sctransform/issues diff --git a/R/denoise.R b/R/denoise.R index 48c7a84..07ef3ba 100644 --- a/R/denoise.R +++ b/R/denoise.R @@ -236,7 +236,7 @@ correct_counts <- function(x, umi, cell_attr = x$cell_attr, scale_factor = NA, v y.res <- mu + pearson_residual * sqrt(variance) y.res <- round(y.res, 0) y.res[y.res < 0] <- 0 - corrected_data[[length(corrected_data) + 1]] <- as(as(as(y.res, "dMatrix"), "generalMatrix"), "CsparseMatrix") + corrected_data[[length(corrected_data) + 1]] <- make.sparse(mat = y.res) if (verbosity > 1) { setTxtProgressBar(pb, i) } diff --git a/R/generate.R b/R/generate.R index 32aea32..6a66839 100644 --- a/R/generate.R +++ b/R/generate.R @@ -40,6 +40,6 @@ generate <- function(vst_out, genes = rownames(vst_out$model_pars_fit), x <- MASS::rnegbin(n = length(gene.mu), mu = gene.mu, theta = theta[gene]) return(x) })) - x.sim <- as(as(as(x.sim, "dMatrix"), "generalMatrix"), "CsparseMatrix") + x.sim <- make.sparse(mat = x.sim) return(x.sim) } diff --git a/R/utils.R b/R/utils.R index 17fe3e7..46c419f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -524,3 +524,12 @@ get_nz_median2 <- function(umi){ return (median(umi@x)) } +#' Convert a given matrix to dgCMatrix +#' +#' @param matrix Input matrix +#' +#' @return A dgCMatrix +make.sparse <- function(mat){ + mat <- as(object = mat, Class = "Matrix") + return (as(object = as(object = as(object = mat, Class = "dMatrix"), Class = "generalMatrix"), Class = "CsparseMatrix")) +} diff --git a/R/vst.R b/R/vst.R index b17676c..628b193 100644 --- a/R/vst.R +++ b/R/vst.R @@ -432,7 +432,7 @@ vst <- function(umi, } else { rv$umi_corrected <- sctransform::correct(rv, do_round = TRUE, do_pos = TRUE, scale_factor = scale_factor, verbosity = verbosity) - rv$umi_corrected <- as(as(as(rv$umi_corrected, "dMatrix"), "generalMatrix"), "CsparseMatrix") + rv$umi_corrected <- make.sparse(mat = rv$umi_corrected) } } diff --git a/README.md b/README.md index 3a64893..ea488d8 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # sctransform ## R package for normalization and variance stabilization of single-cell RNA-seq data using regularized negative binomial regression -The sctransform package was developed by Christoph Hafemeister in [Rahul Satija's lab](https://satijalab.org/) at the New York Genome Center and described in [Hafemeister and Satija, Genome Biology 2019](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1). Recent updates are described in [(Choudhary and Satija, Genome Biology, in press)](https://doi.org/10.1101/2021.07.07.451498). Core functionality of this package has been integrated into [Seurat](https://satijalab.org/seurat/), an R package designed for QC, analysis, and exploration of single cell RNA-seq data. +The sctransform package was developed by Christoph Hafemeister in [Rahul Satija's lab](https://satijalab.org/) at the New York Genome Center and described in [Hafemeister and Satija, Genome Biology 2019](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1). Recent updates are described in [(Choudhary and Satija, Genome Biology, 2022)](https://doi.org/10.1186/s13059-021-02584-9). +Core functionality of this package has been integrated into [Seurat](https://satijalab.org/seurat/), an R package designed for QC, analysis, and exploration of single cell RNA-seq data. ## Quick start @@ -43,15 +44,12 @@ Available vignettes: - [Using sctransform in Seurat](https://htmlpreview.github.io/?https://github.com/satijalab/sctransform/blob/supp_html/supplement/seurat.html) - [Examples of how to perform normalization, feature selection, integration, and differential expression with sctransform v2 regularization](https://satijalab.org/seurat/articles/sctransform_v2_vignette.html) -## Known Issues - -* `node stack overflow` error when Rfast package is loaded. The Rfast package does not play nicely with the future.apply package. Try to avoid loading the Rfast package. See discussions: https://github.com/RfastOfficial/Rfast/issues/5 https://github.com/satijalab/sctransform/issues/108 Please use [the issue tracker](https://github.com/satijalab/sctransform/issues) if you encounter a problem ## References -- Hafemeister, C. & Satija, R. Normalization and variance stabilization of single-cell RNA-seq data using regularized negative binomial regression. Genome Biol 20, 296 (December 23, 2019). [https://doi.org/10.1186/s13059-019-1874-1](https://doi.org/10.1186/s13059-019-1874-1). An early version of this work was used in the paper [Developmental diversification of cortical inhibitory interneurons, Nature 555, 2018](https://github.com/ChristophH/in-lineage). +- Hafemeister, C. & Satija, R. Normalization and variance stabilization of single-cell RNA-seq data using regularized negative binomial regression. Genome Biology 20, 296 (2019). [https://doi.org/10.1186/s13059-019-1874-1](https://doi.org/10.1186/s13059-019-1874-1). An early version of this work was used in the paper [Developmental diversification of cortical inhibitory interneurons, Nature 555, 2018](https://github.com/ChristophH/in-lineage). -- Choudhary, S. & Satija, R. Comparison and evaluation of statistical error models for scRNA-seq. bioRxiv (2021). [https://doi.org/10.1101/2021.07.07.451498](https://doi.org/10.1101/2021.07.07.451498) +- Choudhary, S. & Satija, R. Comparison and evaluation of statistical error models for scRNA-seq. Genome Biology 23.1 (2022). [https://doi.org/10.1186/s13059-021-02584-9](https://doi.org/10.1186/s13059-021-02584-9) From 7c0dce46c8628d05c7ad7b85c05d0ee743d0b53c Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 19 Aug 2022 13:38:09 -0400 Subject: [PATCH 3/6] Update date --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 165569a..d2b5acd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: sctransform Type: Package Title: Variance Stabilizing Transformations for Single Cell UMI Data Version: 0.3.3.9001 -Date: 2022-01-10 +Date: 2022-08-19 Authors@R: c( person(given = "Christoph", family = "Hafemeister", email = "christoph.hafemeister@nyu.edu", role = "aut", comment = c(ORCID = "0000-0001-6365-8254")), person(given = "Saket", family = "Choudhary", email = "schoudhary@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5202-7633")), @@ -42,4 +42,4 @@ Suggests: knitr Enhances: glmGamPoi -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.1 From 2364fb131dce23dfb6d89f9919e940ed3ae48462 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 19 Aug 2022 13:38:38 -0400 Subject: [PATCH 4/6] Update manual --- man/make.sparse.Rd | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 man/make.sparse.Rd diff --git a/man/make.sparse.Rd b/man/make.sparse.Rd new file mode 100644 index 0000000..279b680 --- /dev/null +++ b/man/make.sparse.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{make.sparse} +\alias{make.sparse} +\title{Convert a given matrix to dgCMatrix} +\usage{ +make.sparse(mat) +} +\arguments{ +\item{matrix}{Input matrix} +} +\value{ +A dgCMatrix +} +\description{ +Convert a given matrix to dgCMatrix +} From f6dd8e52d4474f626d9689aab58b0059fe8b2bb7 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 19 Aug 2022 15:59:03 -0400 Subject: [PATCH 5/6] Update CRAN comments; bump version --- DESCRIPTION | 2 +- NEWS.md | 11 ++++++++--- R/utils.R | 2 +- cran-comments.md | 26 +------------------------- man/make.sparse.Rd | 2 +- 5 files changed, 12 insertions(+), 31 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d2b5acd..cfcdef7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sctransform Type: Package Title: Variance Stabilizing Transformations for Single Cell UMI Data -Version: 0.3.3.9001 +Version: 0.3.4 Date: 2022-08-19 Authors@R: c( person(given = "Christoph", family = "Hafemeister", email = "christoph.hafemeister@nyu.edu", role = "aut", comment = c(ORCID = "0000-0001-6365-8254")), diff --git a/NEWS.md b/NEWS.md index d1ac03f..dacbc37 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,14 +1,19 @@ # News All notable changes will be documented in this file. -## [0.3.3] - UNRELEASED +## [0.3.4] - 2022-08-19 + +### Added +- Add `make.sparse` to handle `dgCMatrix` coercsions + +## [0.3.3] - 2022-01-13 ### Added - `vst.flavor` argument to `vst()` to allow for invoking running updated regularization (sctransform v2, proposed in [Satija and Choudhary, 2021](https://doi.org/10.1101/2021.07.07.451498). See paper for details. - `scale_factor` to `correct()` to allow for a custom library size when correcting counts -## [0.3.2.9008] - 2021-07-28 +## [0.3.2] - 2021-07-28 ### Added - Add future.seed = TRUE to all `future_lapply()` calls @@ -18,7 +23,7 @@ All notable changes will be documented in this file. ### Fixed - Fix logical comparison of vectors of length one in `diff_mean_test()` -## [0.3.2.9003] - 2020-02-11 +## [0.3.2] - 2020-02-11 ### Added - `compare` argument to the nonparametric differential expression test `diff_mean_test()` to allow for multiple comparisons and various ways to specify which groups to compare - Input checking at various places in `vst()` and `diff_mean_test()` diff --git a/R/utils.R b/R/utils.R index 46c419f..e45363e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -526,7 +526,7 @@ get_nz_median2 <- function(umi){ #' Convert a given matrix to dgCMatrix #' -#' @param matrix Input matrix +#' @param mat Input matrix #' #' @return A dgCMatrix make.sparse <- function(mat){ diff --git a/cran-comments.md b/cran-comments.md index 77efcbd..77d2eb7 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -11,29 +11,5 @@ ## R CMD check results -0 errors | 0 warnings | 2 notes +0 errors | 0 warnings | 0 notes -``` -* checking CRAN incoming feasibility ... NOTE -``` - -There is change in maintaine status: -New maintainer: - Saket Choudhary -Old maintainer(s): - Christoph Hafemeister - -``` -* checking package dependencies ... NOTE -Package which this enhances but not available for checking: ‘glmGamPoi’ -S -``` -`glmGamPoi` is an entirely optional package that is not required for core functionality, but only needed for alternative/faster implementations of the methods in this package. It is only available on Bioconductor. - -## Reverse dependencies - -Tested using `revdepcheck::revdep_check`: -We checked 4 reverse dependencies (1 from CRAN + 3 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package. - - * We saw 0 new problems - * We failed to check 0 packages diff --git a/man/make.sparse.Rd b/man/make.sparse.Rd index 279b680..058833a 100644 --- a/man/make.sparse.Rd +++ b/man/make.sparse.Rd @@ -7,7 +7,7 @@ make.sparse(mat) } \arguments{ -\item{matrix}{Input matrix} +\item{mat}{Input matrix} } \value{ A dgCMatrix From 3cbc836c25602d8317c3cdd32f19e573cf88343a Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 19 Aug 2022 17:09:21 -0400 Subject: [PATCH 6/6] bitwise -> boolean operator --- NEWS.md | 3 +++ src/utils.cpp | 52 +++++++++++++++++++++++++-------------------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/NEWS.md b/NEWS.md index dacbc37..946fc8b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,9 @@ All notable changes will be documented in this file. ### Added - Add `make.sparse` to handle `dgCMatrix` coercsions +### Fixed +- Convert bitwise operators to boolean operators in utils.cpp + ## [0.3.3] - 2022-01-13 ### Added diff --git a/src/utils.cpp b/src/utils.cpp index 9fa0dfd..c408fc6 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -17,7 +17,7 @@ NumericVector row_mean_dgcmatrix(S4 matrix) { IntegerVector dim = matrix.slot("Dim"); int rows = dim[0]; int cols = dim[1]; - + NumericVector ret(rows, 0.0); int x_length = x.length(); for (int k=0; k=p[col]) { @@ -65,7 +65,7 @@ NumericMatrix row_mean_grouped_dgcmatrix(S4 matrix, IntegerVector group, ++col; ++groupsize[group[col-1]-1]; } - + for (int j=0; j(dn[0]); @@ -186,7 +186,7 @@ IntegerMatrix row_nonzero_count_grouped_dgcmatrix(S4 matrix, IntegerVector group CharacterVector levs = group.attr("levels"); int groups = levs.length(); IntegerMatrix ret(rows, groups); - + int col = 0; for (int k=0; k=p[col]) { @@ -194,7 +194,7 @@ IntegerMatrix row_nonzero_count_grouped_dgcmatrix(S4 matrix, IntegerVector group } ret(i[k], group[col-1]-1)++; } - + colnames(ret) = levs; List dn = matrix.slot("Dimnames"); if (dn[0] != R_NilValue) { @@ -234,12 +234,12 @@ NumericVector grouped_mean_diff_per_row(NumericMatrix x, IntegerVector group, bo NumericMatrix tmp(2, nrows); IntegerVector groupsize(2); NumericVector ret(nrows, 0.0); - + if (shuffle) { group = clone(group); std::random_shuffle(group.begin(), group.end(), randWrapper); } - + for (int i = 0; i < ncols; i++) { ++groupsize(group(i)); for (int j = 0; j < nrows; j++) { @@ -252,7 +252,7 @@ NumericVector grouped_mean_diff_per_row(NumericMatrix x, IntegerVector group, bo return ret; } -// Bootstrapped mean +// Bootstrapped mean // [[Rcpp::export]] NumericVector mean_boot(NumericVector x, int N, int S) { NumericVector ret(N); @@ -270,7 +270,7 @@ NumericMatrix mean_boot_grouped(NumericVector x, IntegerVector group, int N, int int groups = max(group) + 1; // we need as many columns NumericMatrix ret(N, groups); - + for (int g = 0; g < groups; g++) { NumericVector xg = x[group == g]; ret(_, g) = mean_boot(xg, N, S); @@ -344,33 +344,33 @@ NumericVector distribution_shift(NumericMatrix x) { // with kind permission from the authors. // It has been slightly adopted for our use case here. // [[Rcpp::export]] -List qpois_reg(NumericMatrix X, NumericVector Y, const double tol, const int maxiters, +List qpois_reg(NumericMatrix X, NumericVector Y, const double tol, const int maxiters, const double minphi, const bool returnfit){ const unsigned int n=X.nrow(), pcols=X.ncol(), d=pcols; - + arma::colvec b_old(d, arma::fill::zeros), b_new(d), L1(d), yhat(n), y(Y.begin(), n, false), m(n), phi(n); arma::vec unique_vals; arma::mat L2, x(X.begin(), n, pcols, false), x_tr(n, pcols); double dif; - + // Identify the intercept term(s) and initialize the coefficients for(int i=0;itol;){ yhat=x*b_old; m=(exp(yhat)); @@ -387,7 +387,7 @@ List qpois_reg(NumericMatrix X, NumericVector Y, const double tol, const int max double p=sum(arma::square(phi)/m)/(n-pcols); NumericVector coefs = NumericVector(b_new.begin(), b_new.end()); coefs.names() = colnames(X); - + List l; l["coefficients"]=coefs; l["phi"]=p; @@ -395,6 +395,6 @@ List qpois_reg(NumericMatrix X, NumericVector Y, const double tol, const int max if(returnfit){ l["fitted"]=NumericVector(m.begin(), m.end()); } - + return l; }