-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcallHaplotypeSpecificCN.Rd
132 lines (103 loc) · 5.5 KB
/
callHaplotypeSpecificCN.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/callHSCN.R
\name{callHaplotypeSpecificCN}
\alias{callHaplotypeSpecificCN}
\title{Call haplotype specific copy number in single cell datasets}
\usage{
callHaplotypeSpecificCN(
CNbins,
haplotypes,
eps = 1e-12,
maskedbins = NULL,
loherror = 0.02,
maxCN = NULL,
selftransitionprob = 0.95,
progressbar = TRUE,
ncores = 1,
phasebyarm = FALSE,
minfrachaplotypes = 0.7,
likelihood = "auto",
minbins = 0,
minbinschr = 0,
phased_haplotypes = NULL,
clustering_method = "copy",
maxloherror = 0.035,
mincells = 7,
overwritemincells = NULL,
cluster_per_chr = TRUE,
viterbiver = "cpp",
filterhaplotypes = 0.1,
firstpassfiltering = TRUE,
smoothsingletons = TRUE,
fillmissing = TRUE,
global_phasing_for_balanced = FALSE,
chr_cell_list = NULL,
chrs_for_global_phasing = NULL,
female = TRUE
)
}
\arguments{
\item{CNbins}{single cell copy number dataframe with the following columns: \code{cell_id}, \code{chr}, \code{start}, \code{end}, \code{state}, \code{copy}}
\item{haplotypes}{single cell haplotypes dataframe with the following columns: \code{cell_id}, \code{chr}, \code{start}, \code{end}, \code{hap_label}, \code{allele1}, \code{allele0}, \code{totalcounts}}
\item{eps}{default 1e-12}
\item{maskedbins}{data.frame with columns chr, start and end. These bins will be masked from the inference and copy number states assigned to these bins based on the states of neighbouring bins.}
\item{loherror}{LOH error rate for initial assignment, this is inferred directly from the data in the second pass, default = 0.02}
\item{maxCN}{maximum copy number to infer allele specific states, default=NULL which will use the maximum state from CNbins}
\item{selftransitionprob}{probability to stay in the same state in the HMM, default = 0.95, set to 0.0 for an IID model}
\item{progressbar}{Boolean to display progressbar or not, default = TRUE, will only show if ncores == 1}
\item{ncores}{Number of cores to use, default = 1}
\item{phasebyarm}{Phasing by chromosome arm, default = FALSE}
\item{minfrachaplotypes}{Minimum proportion of haplotypes to retain when clustering + phasing, default = 0.7}
\item{likelihood}{Likelihood model for HMM, default is \code{binomial}, other option is \code{betabinomial} or use \code{auto} and the algorithm will choose the likelihood that best fits the data. Default \code{auto}}
\item{minbins}{Minimum number of bins containing both haplotype counts and copy number data for a cell to be included}
\item{minbinschr}{Minimum number of bins containing both haplotype counts and copy number data per chromosome for a cell to be included}
\item{phased_haplotypes}{Use this if you want to manually define the haplotypes phasing if for example the default heuristics used by signals does not return a good fit.}
\item{clustering_method}{Method to use to cluster cells for haplotype phasing, default is \code{copy} (using copy column), other option is \code{breakpoints} (using breakpoint for clustering)}
\item{maxloherror}{Maximum value for LOH error rate}
\item{mincells}{Minimum cluster size used for phasing, default = 7}
\item{overwritemincells}{Force the number of cells to use for clustering/phasing rather than use the output of the clustering}
\item{cluster_per_chr}{Whether to cluster per chromosome to rephase alleles or not}
\item{filterhaplotypes}{filter out haplotypes present in less than X fraction, default is 0.1}
\item{firstpassfiltering}{Filter out cells with large discrepancy after first pass state assignment}
\item{smoothsingletons}{Remove singleton bins by smoothing over based on states in adjacent bins}
\item{fillmissing}{For bins with missing counts fill in values based on neighbouring bins, this ensures that the returned object is the same size as input CNbins}
\item{chr_cell_list}{Cells to use for phasing for each chromosome, this should be a named list with a vector of cell_ids for each chromosome eg list("1" = c("cell_id1", "cell_id2)) etc. Default is null. If provided overrides internal phasing.}
\item{chrs_for_global_phasing}{Which chromosomes to phase using all cells for diploid regions, default is NULL which uses all chromosomes}
\item{female}{Default is \code{TRUE}, if set to \code{FALSE} and patient is "XY", X chromosome states are set to A|0 where A=Hmmcopy state}
\item{viterbver}{Version of viterbi algorithm to use (cpp or R)}
\item{global_phasing_for_diploid}{When using cluster_per_chr, use all cells for phasing diploid regions within the cluster}
}
\value{
Haplotype specific copy number object
}
\description{
Call haplotype specific copy number in single cell datasets
}
\details{
The haplotype specific copy number object include the following additional columns
\itemize{
\item \code{A} A allele copy number
\item \code{B} B allele copy number
\item \code{state_AS_phased} A|B
\item \code{state_min} Minor allele copy number
\item \code{LOH} =LOH if bin is LOH, NO otherwise
\item \code{state_phase} Discretized haplotype specific states
\item \code{phase} Whether the A allele or B allele is dominant
\item \code{alleleA} Counts for the A allele
\item \code{alleleB} Counts for the B allele
\item \code{totalcounts} Total number of counts
\item \code{BAF} B-allele frequency (alleleB / totalcounts)
}
}
\examples{
sim_data <- simulate_data_cohort(
clone_num = c(20, 20),
clonal_events = list(
list("1" = c(2, 0), "5" = c(3, 1)),
list("2" = c(6, 3), "3" = c(1, 0))
),
loherror = 0.02,
coverage = 100
)
results <- callHaplotypeSpecificCN(sim_data$CNbins, sim_data$haplotypes)
}