Skip to content

Commit

Permalink
add J as optional argument
Browse files Browse the repository at this point in the history
  • Loading branch information
adamcweiner committed Dec 4, 2023
1 parent bde509b commit dec6ffb
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
5 changes: 3 additions & 2 deletions scdna_replication_tools/infer_scRT.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self, cn_s, cn_g1, input_col='reads', assign_col='copy', library_co
rv_col='rt_value', rs_col='rt_state', frac_rt_col='frac_rt', clone_col='clone_id', rt_prior_col='mcf7rt',
cn_prior_method='hmmcopy', col2='rpm_gc_norm', col3='temp_rt', col4='changepoint_segments', col5='binary_thresh',
max_iter=2000, min_iter=100, max_iter_step1=None, min_iter_step1=None, max_iter_step3=None, min_iter_step3=None,
cn_prior_weight=1e6, learning_rate=0.05, rel_tol=1e-6, cuda=False, seed=0, P=13, K=4, upsilon=6, run_step3=True):
cn_prior_weight=1e6, learning_rate=0.05, rel_tol=1e-6, cuda=False, seed=0, P=13, K=4, J=5, upsilon=6, run_step3=True):
self.cn_s = cn_s
self.cn_g1 = cn_g1

Expand Down Expand Up @@ -82,6 +82,7 @@ def __init__(self, cn_s, cn_g1, input_col='reads', assign_col='copy', library_co
self.seed = seed
self.P = P # number of CN states
self.K = K # polynomial degree
self.J = J # number of G1-phase cells to include in the composite CN prior
self.upsilon = upsilon
self.run_step3 = run_step3

Expand Down Expand Up @@ -157,7 +158,7 @@ def infer_pert_model(self):
rs_col=self.rs_col, frac_rt_col=self.frac_rt_col, cn_prior_method=self.cn_prior_method, cn_prior_weight=self.cn_prior_weight,
learning_rate=self.learning_rate, max_iter=self.max_iter, min_iter=self.min_iter, rel_tol=self.rel_tol,
min_iter_step1=self.min_iter_step1, min_iter_step3=self.min_iter_step3, max_iter_step1=self.max_iter_step1, max_iter_step3=self.max_iter_step3,
cuda=self.cuda, seed=self.seed, P=self.P, K=self.K, upsilon=self.upsilon, run_step3=self.run_step3)
cuda=self.cuda, seed=self.seed, P=self.P, K=self.K, J=self.J, upsilon=self.upsilon, run_step3=self.run_step3)

print('running PERT model')
print('cn_s.dtypes', self.cn_s.dtypes, sep='\n')
Expand Down
13 changes: 10 additions & 3 deletions scdna_replication_tools/pert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, cn_s, cn_g1, input_col='reads', gc_col='gc', rt_prior_col='mc
rs_col='rt_state', frac_rt_col='frac_rt', cn_prior_method='g1_composite',
cn_prior_weight=1e6, learning_rate=0.05, max_iter=2000, min_iter=100, rel_tol=1e-6,
max_iter_step1=None, min_iter_step1=None, max_iter_step3=None, min_iter_step3=None,
cuda=False, seed=0, P=13, K=4, upsilon=6, run_step3=True):
cuda=False, seed=0, P=13, K=4, J=5, upsilon=6, run_step3=True):
'''
initialise the pert_infer_scRT object
:param cn_s: long-form dataframe containing copy number and read count information from S-phase cells. (pandas.DataFrame)
Expand Down Expand Up @@ -72,6 +72,7 @@ def __init__(self, cn_s, cn_g1, input_col='reads', gc_col='gc', rt_prior_col='mc
:param P: number of integer copy number states to include in the model, values range from 0 to P-1. (int)
:param L: number of libraries. (int)
:param K: maximum polynomial degree allowed for the GC bias curve. (int)
:param J: number of G1-phase cells to use for each S-phase cell when building the cn prior concentration matrix. (int)
:param upsilon: value that alpha and beta terms should sum to when creating a beta distribution prior for tau. (int)
:param run_step3: Should the pretrained S-phase model be run on low variance cells to look for misclassifications. (bool)
'''
Expand Down Expand Up @@ -123,6 +124,7 @@ def __init__(self, cn_s, cn_g1, input_col='reads', gc_col='gc', rt_prior_col='mc
self.P = P
self.L = None
self.K = K
self.J = J

self.upsilon = upsilon
self.run_step3 = run_step3
Expand Down Expand Up @@ -294,14 +296,19 @@ def build_clone_cn_prior(self, cn, cn_df, cn_tensor, clone_cn_profiles):
return etas


def build_composite_cn_prior(self, cn, clone_cn_profiles, weight=1e5, J=5):
def build_composite_cn_prior(self, cn, clone_cn_profiles, weight=1e5):
"""
Build a cn prior concentration matrix that uses both the consensus G1 clone profile and the closest G1-phase cell profiles.
J represents the number of matching G1-phase cells to use for each S-phase cell.
"""

J = self.J
temp_cn_g1 = self.cn_g1.copy()

# reduce J if it is larger than the number of G1 cells in the smallest clone
smallest_clone_size = temp_cn_g1[[self.cell_col, self.clone_col]].drop_duplicates().groupby(self.clone_col).size().min()
if smallest_clone_size < J:
J = smallest_clone_size

# remove G1 cells from certain clones that don't belong to the majority ploidy
# i.e. remove tetraploid cells if clone is 90% diploid
ploidy_col = 'ploidy'
Expand Down

0 comments on commit dec6ffb

Please sign in to comment.