From a9753c2cb2ec8678dd9988dc68fcbc1d0b20ab22 Mon Sep 17 00:00:00 2001 From: Jacob Schreiber Date: Thu, 15 Sep 2022 22:32:35 -0700 Subject: [PATCH] v0.0.9 --- modiscolite/__init__.py | 2 +- modiscolite/affinitymat.py | 6 +++++- modiscolite/aggregator.py | 18 +++++++++++------- modiscolite/core.py | 8 ++------ modiscolite/tfmodisco.py | 7 +++---- setup.py | 2 +- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/modiscolite/__init__.py b/modiscolite/__init__.py index 6d59c4d..5391864 100644 --- a/modiscolite/__init__.py +++ b/modiscolite/__init__.py @@ -8,4 +8,4 @@ from . import cluster from . import report -__version__ = '0.0.8' \ No newline at end of file +__version__ = '0.0.9' \ No newline at end of file diff --git a/modiscolite/affinitymat.py b/modiscolite/affinitymat.py index 324f9ad..a03e418 100644 --- a/modiscolite/affinitymat.py +++ b/modiscolite/affinitymat.py @@ -240,8 +240,12 @@ def tsne_probs_calc(self, distances_nn, neighbors_nn): conditional_P = sklearn.manifold._utils._binary_search_perplexity( distances, self.perplexity, verbose=False) + eps = 1e-8 + marginal_sum = conditional_P.sum(axis=-1) + marginal_sum[marginal_sum < eps] = eps + #normalize the conditional_P to sum to 1 across the rows - conditional_P = conditional_P/np.sum(conditional_P, axis=-1)[:,None] + conditional_P = conditional_P / marginal_sum[:,None] data = [] rows = [] diff --git a/modiscolite/aggregator.py b/modiscolite/aggregator.py index c46c85f..5a620a2 100644 --- a/modiscolite/aggregator.py +++ b/modiscolite/aggregator.py @@ -19,6 +19,9 @@ def polish_pattern(pattern, min_frac, min_num, track_set, flank, window_size, bg track_set=track_set, left_flank_to_add=flank, right_flank_to_add=flank) + if pattern is None: + return None + # Trim by IC ppm = pattern.sequence per_pos_ic = util.compute_per_position_ic( @@ -382,13 +385,14 @@ def SimilarPatternsCollapser(patterns, track_set, min_num=min_num, track_set=track_set, flank=flank_to_add, window_size=window_size, bg_freq=bg_freq) - for k in range(len(patterns)): - #Replace EVERY case where the parent or child - # pattern is present with the new pattern. This - # effectively does single-linkage. - if (patterns[k]==parent_pattern or - patterns[k]==child_pattern): - patterns[k]=new_pattern + if new_pattern is not None: + for k in range(len(patterns)): + #Replace EVERY case where the parent or child + # pattern is present with the new pattern. This + # effectively does single-linkage. + if (patterns[k]==parent_pattern or + patterns[k]==child_pattern): + patterns[k]=new_pattern merge_occurred_last_iteration = (len(indices_to_merge) > 0) diff --git a/modiscolite/core.py b/modiscolite/core.py index 98a1f94..c552588 100644 --- a/modiscolite/core.py +++ b/modiscolite/core.py @@ -140,7 +140,6 @@ def compute_subpatterns(self, perplexity, n_seeds, n_iterations=-1): distmat_nn = np.log((1.0/(0.5*np.maximum(affmat_nn, 0.0000001)))-1) distmat_nn = np.maximum(distmat_nn, 0.0) #eliminate tiny neg floats - distmat_sp = scipy.sparse.coo_matrix( (np.concatenate(distmat_nn, axis=0), (np.array([i for i in range(len(seqlet_neighbors)) @@ -151,11 +150,8 @@ def compute_subpatterns(self, perplexity, n_seeds, n_iterations=-1): distmat_sp.sort_indices() #do density adaptation - density_adapted_affmat_transformer =\ - affinitymat.NNTsneConditionalProbs( - perplexity=perplexity) - sp_density_adapted_affmat = density_adapted_affmat_transformer( - affmat_nn, seqlet_neighbors) + sp_density_adapted_affmat = affinitymat.NNTsneConditionalProbs( + perplexity=perplexity)(affmat_nn, seqlet_neighbors) sp_density_adapted_affmat += sp_density_adapted_affmat.T sp_density_adapted_affmat /= np.sum(sp_density_adapted_affmat.data) diff --git a/modiscolite/tfmodisco.py b/modiscolite/tfmodisco.py index e7cb898..23ee898 100644 --- a/modiscolite/tfmodisco.py +++ b/modiscolite/tfmodisco.py @@ -56,8 +56,6 @@ def _density_adaptation(affmat_nn, seqlet_neighbors, tsne_perplexity): affmat_diags = scipy.sparse.diags(1.0 / normfactors) affmat_nn += affmat_diags - - return affmat_nn def _filter_patterns(patterns, min_seqlet_support, window_size, @@ -117,8 +115,9 @@ def _motif_from_clusters(seqlets, track_set, min_overlap, min_num=min_num, track_set=track_set, flank=flank_to_add, window_size=window_size, bg_freq=bg_freq) - if np.sign(np.sum(pattern.contrib_scores)) == track_sign: - cluster_to_motif.append(pattern) + if pattern is not None: + if np.sign(np.sum(pattern.contrib_scores)) == track_sign: + cluster_to_motif.append(pattern) return cluster_to_motif diff --git a/setup.py b/setup.py index 9281126..f6d4783 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='modisco-lite', - version='0.0.8', + version='0.0.9', author='Jacob Schreiber', author_email='jmschreiber91@gmail.com', packages=['modiscolite'],