Skip to content

Commit

Permalink
Merge branch 'master' of github.com:YosefLab/Hotspot
Browse files Browse the repository at this point in the history
  • Loading branch information
deto committed Mar 15, 2021
2 parents 4fd5d76 + 6fa379f commit 5c9c353
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 9 deletions.
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2020 David DeTomaso

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
25 changes: 18 additions & 7 deletions hotspot/hotspot.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,6 @@ def __init__(
If omitted, the sum over genes in the counts matrix is used
"""

self.counts = counts
self.latent = latent
self.distances = distances
self.tree = tree
self.model = model

if latent is None and distances is None and tree is None:
raise ValueError("Neither `latent` or `tree` or `distance` arguments were supplied. One of these is required")

Expand All @@ -71,7 +65,10 @@ def __init__(
raise ValueError("Both `distances` and `tree` provided - only one of these should be provided.")

if latent is not None:
assert counts.shape[1] == latent.shape[0]
if counts.shape[1] != latent.shape[0]:
if counts.shape[0] == latent.shape[0]:
raise ValueError("`counts` input should be a Genes x Cells dataframe. Maybe needs transpose?")
raise ValueError("Size mismatch counts/latent. Columns of `counts` should match rows of `latent`.")

if distances is not None:
assert counts.shape[1] == distances.shape[0]
Expand Down Expand Up @@ -106,6 +103,20 @@ def __init__(
'Input `model` should be one of {}'.format(valid_models)
)

valid_genes = counts.var(axis=1) > 0
n_invalid = counts.shape[0] - valid_genes.sum()
if n_invalid > 0:
counts = counts.loc[valid_genes]
print(
"\nRemoving {} undetected/non-varying genes".format(n_invalid)
)

self.counts = counts
self.latent = latent
self.distances = distances
self.tree = tree
self.model = model

self.umi_counts = umi_counts

self.graph = None
Expand Down
7 changes: 5 additions & 2 deletions hotspot/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,11 @@ def compute_modules(Z_scores, min_gene_threshold=10, fdr_threshold=None, z_thres
allZ = np.sort(allZ)
allP = norm.sf(allZ)
allP_c = multipletests(allP, method='fdr_bh')[1]
ii = np.nonzero(allP_c < fdr_threshold)[0][0]
z_threshold = allZ[ii]
ii = np.nonzero(allP_c < fdr_threshold)[0]
if ii.size > 0:
z_threshold = allZ[ii[0]]
else:
z_threshold = allZ[-1]+1

# Compute the linkage matrix
dd = Z_scores.copy().values
Expand Down
96 changes: 96 additions & 0 deletions tests/test_validations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import numpy as np
import pandas as pd
from hotspot import sim_data
from hotspot import Hotspot


def test_models():
"""
Ensure each model runs
"""

# Simulate some data
N_CELLS = 100
N_DIM = 10
N_GENES = 10

latent = sim_data.sim_latent(N_CELLS, N_DIM)
latent = pd.DataFrame(
latent,
index=['Cell{}'.format(i+1) for i in range(N_CELLS)]
)

umi_counts = sim_data.sim_umi_counts(N_CELLS, 2000, 200)
umi_counts = pd.Series(umi_counts)

gene_exp = np.random.rand(N_GENES, N_CELLS)
gene_exp = pd.DataFrame(
gene_exp,
index=['Gene{}'.format(i+1) for i in range(gene_exp.shape[0])],
columns=latent.index
)

for model in ['danb', 'bernoulli', 'normal', 'none']:
hs = Hotspot(
gene_exp, model=model, latent=latent, umi_counts=umi_counts
)
hs.create_knn_graph(False, n_neighbors=30)
hs.compute_hotspot()

assert isinstance(hs.results, pd.DataFrame)
assert hs.results.shape[0] == N_GENES

hs.compute_autocorrelations()

assert isinstance(hs.results, pd.DataFrame)
assert hs.results.shape[0] == N_GENES

hs.compute_local_correlations(gene_exp.index)

assert isinstance(hs.local_correlation_z, pd.DataFrame)
assert hs.local_correlation_z.shape[0] == N_GENES
assert hs.local_correlation_z.shape[1] == N_GENES

hs.create_modules(min_gene_threshold=2, fdr_threshold=1)

assert isinstance(hs.modules, pd.Series)
assert (hs.modules.index & gene_exp.index).size == N_GENES

assert isinstance(hs.linkage, np.ndarray)
assert hs.linkage.shape == (N_GENES-1, 4)

hs.calculate_module_scores()

assert isinstance(hs.module_scores, pd.DataFrame)
assert (hs.module_scores.index == gene_exp.columns).all()


def test_filter_genes():
"""
Ensure genes with no expression are pre-filtered
"""
# Simulate some data
N_CELLS = 100
N_DIM = 10
N_GENES = 10
N_GENES_ZERO = 5

latent = sim_data.sim_latent(N_CELLS, N_DIM)
latent = pd.DataFrame(latent)

umi_counts = sim_data.sim_umi_counts(N_CELLS, 2000, 200)
umi_counts = pd.Series(umi_counts)

gene_exp = np.random.rand(N_GENES+N_GENES_ZERO, N_CELLS)
gene_exp[N_GENES:] = 0
gene_exp = pd.DataFrame(
gene_exp,
index=['Gene{}'.format(i+1) for i in range(gene_exp.shape[0])],
columns=latent.index
)

hs = Hotspot(
gene_exp, model='normal', latent=latent, umi_counts=umi_counts
)

assert hs.counts.shape[0] == N_GENES

0 comments on commit 5c9c353

Please sign in to comment.