change name to signals

shahcompbio · Jan 20, 2022 · b8641d6 · b8641d6
1 parent 3875523
commit b8641d6
Show file tree

Hide file tree

Showing 18 changed files with 72 additions and 64 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,9 +7,12 @@ inst/doc
 doc
 Meta
 .DS_Store
-schnapps.Rproj
+signals.Rproj
 vignettes/ASCN-phasing.Rmd
 test_data/
 .Rbuildignore
 profiling.R
 SA530.R
+debug.Rmd
+docs/*
+docs
diff --git a/ASCN-RNA.Rmd b/ASCN-RNA.Rmd
@@ -16,7 +16,7 @@ knitr::opts_chunk$set(
 ```
 
 ```{r setup}
-library(schnapps)
+library(signals)
 library(tidyr)
 library(dplyr)
 library(cowplot)
@@ -27,7 +27,7 @@ library(ggplot2)
 This vignette illustrates how to perform allele specific copy number inference in scRNAseq data. There are 2 methods to perform this inference, one which issues prior knowledge from allele specific scDNAseq inference and the second without prior knowledge. We'll demonstrate both approaches here.
 
 ## Data
-The data needed to perform the allele specific copy number inference are SNP calls of heterozygous positions in individual single cells. Ideally haplotype block calling should also be performed, so that each allele in each cell can be assigned to a haplotype block. To get this data you will ideally need a whole genome sequenced bulk sample of normal tissue to identify heterozygous SNPs and then you can use a tool such as cellSNP or vartrix to get per cell counts. An example dataset from ~1000 cells is included here. For this dataset we also have paired scDNAseq, where we have called ASCN using `schnapps`, we'll use this as input for the method that utilzes prior knowledge.
+The data needed to perform the allele specific copy number inference are SNP calls of heterozygous positions in individual single cells. Ideally haplotype block calling should also be performed, so that each allele in each cell can be assigned to a haplotype block. To get this data you will ideally need a whole genome sequenced bulk sample of normal tissue to identify heterozygous SNPs and then you can use a tool such as cellSNP or vartrix to get per cell counts. An example dataset from ~1000 cells is included here. For this dataset we also have paired scDNAseq, where we have called ASCN using `signals`, we'll use this as input for the method that utilzes prior knowledge.
 
 
 ```{r}

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
-Package: schnapps
-Title: Single Cell Haplotype copy Number Analysis through Phased Probabilistic States
+Package: signals
+Title: Single Cell Genomes with Allele Specificity
 Version: 0.6.2
 Author@R: c(person("Marc", "Williams", email = "[email protected]",
                   role = c("aut", "cre")),
@@ -8,9 +8,9 @@ Author@R: c(person("Marc", "Williams", email = "[email protected]",
 Author: Marc J Williams [aut, cre],
   Tyler Funnell [ctb]
 Maintainer: Marc J Williams <[email protected]>
-Description: schnapps (Single Cell Haplotype copy Number Analysis through Phased Probabilistic States) 
+Description: signals (Single Cell Haplotype copy Number Analysis through Phased Probabilistic States) 
     is a tool to estimate allele and haplotype specific copy number states in single cells with low coverage (~0.01X). 
-    schnapps phases alleles based on losses and gains across all cells and then assigns allele specific states for each bin 
+    signals phases alleles based on losses and gains across all cells and then assigns allele specific states for each bin 
     in each cell using a hidden markov model.
 License: MIT + file LICENSE
 Encoding: UTF-8

diff --git a/NAMESPACE b/NAMESPACE
@@ -96,4 +96,4 @@ export(widen_haplotypebins)
 import(data.table)
 importFrom(Rcpp,sourceCpp)
 importFrom(magrittr,"%>%")
-useDynLib(schnapps)
+useDynLib(signals)
diff --git a/NEWS.md b/NEWS.md
@@ -1,71 +1,71 @@
-# schnapps 0.6.2
+# signals 0.6.2
 
 * fix missing argument
 
-# schnapps 0.6.1
+# signals 0.6.1
 
 * changed colours
 * fill in missing bins
 * improved documentation about inputs
 
-# schnapps 0.6.0
+# signals 0.6.0
 
 * add option to remove noisy cells from phasing
 
-# schnapps 0.5.5
+# signals 0.5.5
 
 * update plotting and clustering
 
-# schnapps 0.5.4
+# signals 0.5.4
 
 * update docker
 
-# schnapps 0.5.3
+# signals 0.5.3
 
 * some small updates to plotting and default params
 
-# schnapps 0.5.2
+# signals 0.5.2
 
 * update to vignette and plotting
 
-# schnapps 0.5.1
+# signals 0.5.1
 
 * updated default parameters
 
-# schnapps 0.5.0
+# signals 0.5.0
 
 * update ascn inference
 
-# schnapps 0.4.3
+# signals 0.4.3
 
 * filtering utility function
 
-# schnapps 0.4.2
+# signals 0.4.2
 
 * fix bug with filtering
 
-# schnapps 0.4.1
+# signals 0.4.1
 
 * Fix bug in HMM when total copy number = 0
 * Add function to filter hscn object
 
-# schnapps 0.4.0
+# signals 0.4.0
 
 * Add option to filter haplotypes
 * Added Dockerfile and github actions to push to Dockerhub
 * Added QC metadata table to output
 
-# schnapps 0.3.0
+# signals 0.3.0
 
 * Version associated with preprint
 * some fixes to plotting
 
-# schnapps 0.2.1
+# signals 0.2.1
 
 * updates to heatmap plotting
 
-# schnapps 0.2.0
+# signals 0.2.0
 
-# schnapps 0.1.0
+# signals 0.1.0
 
 * Added a `NEWS.md` file to track changes to the package.
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -2,10 +2,10 @@
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 logspace_addcpp <- function(logx, logy) {
-    .Call('_schnapps_logspace_addcpp', PACKAGE = 'schnapps', logx, logy)
+    .Call('_signals_logspace_addcpp', PACKAGE = 'signals', logx, logy)
 }
 
 viterbi <- function(emission, transition, observations) {
-    .Call('_schnapps_viterbi', PACKAGE = 'schnapps', emission, transition, observations)
+    .Call('_signals_viterbi', PACKAGE = 'signals', emission, transition, observations)
 }
 
diff --git a/R/callHSCN.R b/R/callHSCN.R
@@ -580,7 +580,7 @@ filter_haplotypes <- function(haplotypes, fraction){
 #' @param likelihood Likelihood model for HMM, default is `binomial`, other option is `betabinomial` or use `auto` and the algorithm will choose the likelihood that best fits the data.
 #' @param minbins Minimum number of bins containing both haplotype counts and copy number data for a cell to be included
 #' @param minbinschr Minimum number of bins containing both haplotype counts and copy number data per chromosome for a cell to be included
-#' @param phased_haplotypes Use this if you want to manually define the haplotypes phasing if for example the default heuristics used by schnapps does not return a good fit.
+#' @param phased_haplotypes Use this if you want to manually define the haplotypes phasing if for example the default heuristics used by signals does not return a good fit.
 #' @param clustering_method Method to use to cluster cells for haplotype phasing, default is `copy`, other option is `breakpoints`
 #' @param maxloherror Maximum value for LOH error rate
 #' @param mincells Minimum cluster size used for phasing
@@ -1331,4 +1331,4 @@ switchbins <- function(cn, phase_df){
   }
 
   return(x)
-}
+}
diff --git a/R/callHSCNrna.R b/R/callHSCNrna.R
@@ -333,7 +333,7 @@ segments_to_bins <- function(segments, binsize = 0.5e6, ncores = 1){
     )
   }
 
-  bins <- schnapps::getBins(binsize = binsize)
+  bins <- signals::getBins(binsize = binsize)
 
   if (ncores == 1) {
     df <- data.table::rbindlist(lapply(unique(segments$cell_id),

diff --git a/R/clustering.R b/R/clustering.R
@@ -141,7 +141,7 @@ umap_clustering_breakpoints <- function(CNbins,
   minPts <- max(minPts, 2)
 
   message("Creating breakpoint matrix...")
-  segs <- schnapps::create_segments(CNbins, field = field)
+  segs <- signals::create_segments(CNbins, field = field)
   segs_matrix <- createbreakpointmatrix(segs,
     transpose = TRUE,
     internalonly = internalonly,

diff --git a/R/convert.R b/R/convert.R
@@ -31,4 +31,4 @@ assign_bins_haplotypes <- function(haplotypes, binsize = 0.5e6){
     .[, position := NULL]
 
   return(binshaps)
-}
+}
diff --git a/R/heatmap_plot.R b/R/heatmap_plot.R
@@ -1301,7 +1301,7 @@ plotHeatmapQC <- function(cn,
     CNbins <- dplyr::left_join(CNbins, p$cl$clustering, by = "cell_id")
     CNbins <- dplyr::left_join(CNbins, p$prop)
   } else {
-    CNbins$chrarm <- paste0(CNbins$chr, schnapps:::coord_to_arm(CNbins$chr, CNbins$start))
+    CNbins$chrarm <- paste0(CNbins$chr, signals:::coord_to_arm(CNbins$chr, CNbins$start))
     CNbins <- dplyr::left_join(CNbins, p$cl$clustering, by = "cell_id")
     CNbins <- dplyr::left_join(CNbins, p$prop)
   }
@@ -1310,7 +1310,7 @@ plotHeatmapQC <- function(cn,
     dplyr::mutate(state_BAF = ifelse(is.na(propA), "-1", state_BAF))
 
   plotcol <- "state_BAF"
-  colvals <- schnapps:::cn_colours_bafstate
+  colvals <- signals:::cn_colours_bafstate
   colvals[["-1"]] <- "gray90"
   legendname <- "Allelic Imbalance"
 

diff --git a/R/schnapps-package.R → R/signals-package.R b/R/schnapps-package.R → R/signals-package.R
@@ -1,5 +1,5 @@
 ## usethis namespace: start
-#' @useDynLib schnapps
+#' @useDynLib signals
 #' @importFrom Rcpp sourceCpp
 ## usethis namespace: end
 NULL
diff --git a/README.md b/README.md
@@ -1,35 +1,35 @@
 
-# schnapps
+# signals
 
 <!-- badges: start -->
-[![R build status](https://github.com/shahcompbio/schnapps/workflows/R-CMD-check/badge.svg)](https://github.com/shahcompbio/schnapps/actions)
-[![Codecov test coverage](https://codecov.io/gh/shahcompbio/schnapps/branch/master/graph/badge.svg)](https://codecov.io/gh/shahcompbio/schnapps?branch=master)
-[![Docker](https://img.shields.io/docker/cloud/build/marcjwilliams1/schnapps)](https://hub.docker.com/repository/docker/marcjwilliams1/schnapps)
+[![R build status](https://github.com/shahcompbio/signals/workflows/R-CMD-check/badge.svg)](https://github.com/shahcompbio/signals/actions)
+[![Codecov test coverage](https://codecov.io/gh/shahcompbio/signals/branch/master/graph/badge.svg)](https://codecov.io/gh/shahcompbio/signals?branch=master)
+[![Docker](https://img.shields.io/docker/cloud/build/marcjwilliams1/signals)](https://hub.docker.com/repository/docker/marcjwilliams1/signals)
 <!-- badges: end -->
 
-schnapps (Single Cell Haplotype copy Number Analysis through Phased Probabilistic States) is a tool to estimate allele and haplotype specific copy number states in single cells with low coverage (0.01-0.1X). schnapps phases alleles based on losses and gains across all cells and then assigns allele specific states for each bin in each cell using a hidden markov model.  Documentation is available [here](https://shahcompbio.github.io/schnapps/).
+signals (single cell genomes with allele specificity) is a tool to estimate allele and haplotype specific copy number states in single cells with low coverage (0.01-0.1X). signals phases alleles based on losses and gains across all cells and then assigns allele specific states for each bin in each cell using a hidden markov model.  Documentation is available [here](https://shahcompbio.github.io/signals/).
 
-You can read more about schnapps in our [preprint](https://www.biorxiv.org/content/10.1101/2021.06.04.447031v1).
+You can read more about signals in our [preprint](https://www.biorxiv.org/content/10.1101/2021.06.04.447031v1).
 
 ## Installation
 
-You can install schnapps with the following command 
+You can install signals with the following command 
 
 ``` r
-devtools::install_github("shahcompbio/schnapps")
+devtools::install_github("shahcompbio/signals")
 ```
 
 ## Input data
 
-`schnapps` was developed to work with Direct Library Preperation + (DLP+) data. A high throughput single cell whole genome sequencing workflow, described in [Laks et al.](https://www.sciencedirect.com/science/article/pii/S0092867419311766). As such it works using the output of the the pipeline developed to process this type of data, available [here](https://github.com/shahcompbio/single_cell_pipeline). Despite being developed with this type of data and pipeline in mind, it should work well with other single cell whole genome technologies. We have run it succesfully with 10X CNV data for example. The required inputs are total copy number estimates in bins across the genome and haplotype block counts per cell (SNP counts may also work). See the test datasets provided with the package for example inputs. If you have a different type of technology and would like some advice or help running schnapps please open an issue. We describe in more detail the necessary input below.
+`signals` was developed to work with Direct Library Preperation + (DLP+) data. A high throughput single cell whole genome sequencing workflow, described in [Laks et al.](https://www.sciencedirect.com/science/article/pii/S0092867419311766). As such it works using the output of the the pipeline developed to process this type of data, available [here](https://github.com/shahcompbio/single_cell_pipeline). Despite being developed with this type of data and pipeline in mind, it should work well with other single cell whole genome technologies. We have run it succesfully with 10X CNV data for example. The required inputs are total copy number estimates in bins across the genome and haplotype block counts per cell (SNP counts may also work). See the test datasets provided with the package for example inputs. If you have a different type of technology and would like some advice or help running signals please open an issue. We describe in more detail the necessary input below.
 
 ### DLP+ data
 
 You will need the HMM copy results table (`CNbins`) with the following columns: `chr`, `start`,`end`, `cell_id`, `state`, `copy`. `state` is the inferred total copy number state. `copy` values are GC-correceted, ploidy corrected normalized read counts (this is what HMMcopy uses to infer the total copy number states). You will also need the cell specific haplotype counts (`allele_data`) as outputted by the `inferhaps` and `couthaps` sub commands in the pipeline. This includes the following columns: `chr`, `start`,`end`, `cell_id`, `hap_label`, `allele_id`, `readcount`. `allele_id` identifies the 2 alleles. `hap_label` is a label given to the haplotype block, these labels are consistent across cells, and in combination with `chr` gives each block a unique ID. These blocks are computed by finding SNP's that can be confidently phased together. See the section of "phasing uncertainty" in SHAPEIT [here](https://mathgen.stats.ox.ac.uk/genetics_software/shapeit/shapeit.html#uncertainty). This notion of phasing uncertainty is also used in [remixt](https://github.com/amcpherson/remixt). Note that this notion of haplotype block is different to other tools such as CHISEL where it is assumed that the the phasing remains consistent over seem specified length such as 50kb. Finally the `readcount` columns gives the sum of the number of reads for each allele in each block.
 
 ### Other technologies
 
-Other technologies and software should also be compatible with schnapps. For example, we have used 10X data successfully. If you have single cell bam files or fastq files see the detailed documentation for running our single cell pipeline [here](https://github.com/shahcompbio/single_cell_pipeline/blob/master/docs/source/install.md). Alternatively, we provide a lightweight snakemake pipeline with the key steps [here](https://github.com/marcjwilliams1/hscn_pipeline). Also included there are some scripts to demultiplex 10X CNV bams.
+Other technologies and software should also be compatible with signals. For example, we have used 10X data successfully. If you have single cell bam files or fastq files see the detailed documentation for running our single cell pipeline [here](https://github.com/shahcompbio/single_cell_pipeline/blob/master/docs/source/install.md). Alternatively, we provide a lightweight snakemake pipeline with the key steps [here](https://github.com/marcjwilliams1/hscn_pipeline). Also included there are some scripts to demultiplex 10X CNV bams.
 
 If you total copy number calls from other software such as 10X cellranger or [scope](https://github.com/rujinwang/SCOPE), these may also work but not something we have tried. Feel free to open an issue if you need some advice.
 
@@ -51,10 +51,10 @@ It is imortant to have 4 string's seperated by "-", but the unique cell identifi
 
 ## Example
 
-Here we'll show an example of running schnapps with a small dataset of 250 cells from the DLP platform. First we need to do some data wrangling to convert the `allele_data` table from long to wide format.
+Here we'll show an example of running signals with a small dataset of 250 cells from the DLP platform. First we need to do some data wrangling to convert the `allele_data` table from long to wide format.
 
 ``` r
-library(schnapps)
+library(signals)
 haplotypes<- format_haplotypes_dlp(haplotypes, CNbins)
 ```
 
@@ -81,7 +81,7 @@ plotHeatmap(ascn, plotcol = "state_BAF")
 ```
 This will cluster the cell using umap and hdbscan.
 
-`schnapps` includes a number of other utilities for analysing single cell (haplotype-specific) copy number including the following:
+`signals` includes a number of other utilities for analysing single cell (haplotype-specific) copy number including the following:
 
 * integration with scRNAseq using [seurat](https://satijalab.org/seurat/index.html)
 * extensive plotting functions

diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -5,9 +5,14 @@
 
 using namespace Rcpp;
 
+#ifdef RCPP_USE_GLOBAL_ROSTREAM
+Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
+#endif
+
 // logspace_addcpp
 double logspace_addcpp(double logx, double logy);
-RcppExport SEXP _schnapps_logspace_addcpp(SEXP logxSEXP, SEXP logySEXP) {
+RcppExport SEXP _signals_logspace_addcpp(SEXP logxSEXP, SEXP logySEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -19,7 +24,7 @@ END_RCPP
 }
 // viterbi
 NumericVector viterbi(NumericMatrix emission, NumericMatrix transition, NumericVector observations);
-RcppExport SEXP _schnapps_viterbi(SEXP emissionSEXP, SEXP transitionSEXP, SEXP observationsSEXP) {
+RcppExport SEXP _signals_viterbi(SEXP emissionSEXP, SEXP transitionSEXP, SEXP observationsSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -32,12 +37,12 @@ END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
-    {"_schnapps_logspace_addcpp", (DL_FUNC) &_schnapps_logspace_addcpp, 2},
-    {"_schnapps_viterbi", (DL_FUNC) &_schnapps_viterbi, 3},
+    {"_signals_logspace_addcpp", (DL_FUNC) &_signals_logspace_addcpp, 2},
+    {"_signals_viterbi", (DL_FUNC) &_signals_viterbi, 3},
     {NULL, NULL, 0}
 };
 
-RcppExport void R_init_schnapps(DllInfo *dll) {
+RcppExport void R_init_signals(DllInfo *dll) {
     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
     R_useDynamicSymbols(dll, FALSE);
 }
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -1,4 +1,4 @@
 library(testthat)
-library(schnapps)
+library(signals)
 
-test_check("schnapps")
+test_check("signals")
-Original file line number
+Diff line change
@@ Expand Up @@
         .[, position := NULL]
       return(binshaps)
-    }
+    }