man/callHaplotypeSpecificCN.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/callHSCN.R
\name{callHaplotypeSpecificCN}
\alias{callHaplotypeSpecificCN}
\title{Call haplotype specific copy number in single cell datasets}
\usage{
callHaplotypeSpecificCN(
  CNbins,
  haplotypes,
  eps = 1e-12,
  maskedbins = NULL,
  loherror = 0.02,
  maxCN = NULL,
  selftransitionprob = 0.95,
  progressbar = TRUE,
  ncores = 1,
  phasebyarm = FALSE,
  minfrachaplotypes = 0.7,
  likelihood = "auto",
  minbins = 0,
  minbinschr = 0,
  phased_haplotypes = NULL,
  clustering_method = "copy",
  maxloherror = 0.035,
  mincells = 7,
  overwritemincells = NULL,
  cluster_per_chr = TRUE,
  viterbiver = "cpp",
  filterhaplotypes = 0.1,
  firstpassfiltering = TRUE,
  smoothsingletons = TRUE,
  fillmissing = TRUE,
  global_phasing_for_balanced = FALSE,
  chr_cell_list = NULL,
  chrs_for_global_phasing = NULL,
  female = TRUE
)
}
\arguments{
\item{CNbins}{single cell copy number dataframe with the following columns: \code{cell_id}, \code{chr}, \code{start}, \code{end}, \code{state}, \code{copy}}

\item{haplotypes}{single cell haplotypes dataframe with the following columns: \code{cell_id}, \code{chr}, \code{start}, \code{end}, \code{hap_label}, \code{allele1}, \code{allele0}, \code{totalcounts}}

\item{eps}{default 1e-12}

\item{maskedbins}{data.frame with columns chr, start and end. These bins will be masked from the inference and copy number states assigned to these bins based on the states of neighbouring bins.}

\item{loherror}{LOH error rate for initial assignment, this is inferred directly from the data in the second pass, default = 0.02}

\item{maxCN}{maximum copy number to infer allele specific states, default=NULL which will use the maximum state from CNbins}

\item{selftransitionprob}{probability to stay in the same state in the HMM, default = 0.95, set to 0.0 for an IID model}

\item{progressbar}{Boolean to display progressbar or not, default = TRUE, will only show if ncores == 1}

\item{ncores}{Number of cores to use, default = 1}

\item{phasebyarm}{Phasing by chromosome arm, default = FALSE}

\item{minfrachaplotypes}{Minimum proportion of haplotypes to retain when clustering + phasing, default = 0.7}

\item{likelihood}{Likelihood model for HMM, default is \code{binomial}, other option is \code{betabinomial} or use \code{auto} and the algorithm will choose the likelihood that best fits the data. Default \code{auto}}

\item{minbins}{Minimum number of bins containing both haplotype counts and copy number data for a cell to be included}

\item{minbinschr}{Minimum number of bins containing both haplotype counts and copy number data per chromosome for a cell to be included}

\item{phased_haplotypes}{Use this if you want to manually define the haplotypes phasing if for example the default heuristics used by signals does not return a good fit.}

\item{clustering_method}{Method to use to cluster cells for haplotype phasing, default is \code{copy} (using copy column), other option is \code{breakpoints} (using breakpoint for clustering)}

\item{maxloherror}{Maximum value for LOH error rate}

\item{mincells}{Minimum cluster size used for phasing, default = 7}

\item{overwritemincells}{Force the number of cells to use for clustering/phasing rather than use the output of the clustering}

\item{cluster_per_chr}{Whether to cluster per chromosome to rephase alleles or not}

\item{filterhaplotypes}{filter out haplotypes present in less than X fraction, default is 0.1}

\item{firstpassfiltering}{Filter out cells with large discrepancy after first pass state assignment}

\item{smoothsingletons}{Remove singleton bins by smoothing over based on states in adjacent bins}

\item{fillmissing}{For bins with missing counts fill in values based on neighbouring bins, this ensures that the returned object is the same size as input CNbins}

\item{chr_cell_list}{Cells to use for phasing for each chromosome, this should be a named list with a vector of cell_ids for each chromosome eg list("1" = c("cell_id1", "cell_id2)) etc. Default is null. If provided overrides internal phasing.}

\item{chrs_for_global_phasing}{Which chromosomes to phase using all cells for diploid regions, default is NULL which uses all chromosomes}

\item{female}{Default is \code{TRUE}, if set to \code{FALSE} and patient is "XY", X chromosome states are set to A|0 where A=Hmmcopy state}

\item{viterbver}{Version of viterbi algorithm to use (cpp or R)}

\item{global_phasing_for_diploid}{When using cluster_per_chr, use all cells for phasing diploid regions within the cluster}
}
\value{
Haplotype specific copy number object
}
\description{
Call haplotype specific copy number in single cell datasets
}
\details{
The haplotype specific copy number object include the following additional columns
\itemize{
\item \code{A} A allele copy number
\item \code{B} B allele copy number
\item \code{state_AS_phased} A|B
\item \code{state_min} Minor allele copy number
\item \code{LOH} =LOH if bin is LOH, NO otherwise
\item \code{state_phase} Discretized haplotype specific states
\item \code{phase} Whether the A allele or B allele is dominant
\item \code{alleleA} Counts for the A allele
\item \code{alleleB} Counts for the B allele
\item \code{totalcounts} Total number of counts
\item \code{BAF} B-allele frequency (alleleB / totalcounts)
}
}
\examples{
sim_data <- simulate_data_cohort(
  clone_num = c(20, 20),
  clonal_events = list(
    list("1" = c(2, 0), "5" = c(3, 1)),
    list("2" = c(6, 3), "3" = c(1, 0))
  ),
  loherror = 0.02,
  coverage = 100
)

results <- callHaplotypeSpecificCN(sim_data$CNbins, sim_data$haplotypes)
}