Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.15.0 #12

Merged
merged 30 commits into from
Mar 27, 2024
Merged
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
df498dc
Modify mu unbias to compute no-close mutations without assuming all r…
matthewfallan Mar 5, 2024
d5d2def
Write solver for p_mut_given_span
matthewfallan Mar 5, 2024
94d9eb3
Add unit tests for more mutation unbiasing methods
matthewfallan Mar 8, 2024
7a4e079
Update unit tests for mu.unbias module
matthewfallan Mar 9, 2024
7f1ae04
Add cluster proportions to mutation rate unbiasing
matthewfallan Mar 9, 2024
6c34107
Change _calc_p_ends to accept p_noclose_given_ends instead of min_gap
matthewfallan Mar 9, 2024
0feea97
Fix issue in _calc_p_clust with false warning messages about invalid …
matthewfallan Mar 9, 2024
a3bc590
Make _calc_p_mut_given_span_noclose compatible with numba jit
matthewfallan Mar 10, 2024
f825912
Add numba>=0.59 to dependencies in pyproject.toml
matthewfallan Mar 10, 2024
40a896d
Enable EM clustering to work with end-aware observer bias correction
matthewfallan Mar 13, 2024
ba882ee
Fix bug in clipping coordinates that extend beyond the section for un…
matthewfallan Mar 13, 2024
15d864f
Prevent using discontiguous paired-end reads for clustering
matthewfallan Mar 14, 2024
185b405
Add options to mask by coverage and discontiguous reads; modify finfo…
matthewfallan Mar 14, 2024
4252c61
Fix log of 0 in EM algo; suppress Numba performance warning
matthewfallan Mar 14, 2024
5fcd4a6
Enable complete workflow (including table) with new clustering algorithm
matthewfallan Mar 19, 2024
1a396a3
Fix bug in tabulation where cluster order 1 would not match masked table
matthewfallan Mar 21, 2024
c2a92c2
Remove mu.unbias.frame module and move mu.unbias.algo module to mu.un…
matthewfallan Mar 21, 2024
0de3a22
Fix bug in expectation function with computing the probability of a r…
matthewfallan Mar 22, 2024
1ddc1ba
Correct the formula in the expectation step for computing joint proba…
matthewfallan Mar 24, 2024
34b68fc
Add noclose argument to find_dims and implement throughout
matthewfallan Mar 25, 2024
22f9485
Rearrange functions for mutation rate unbias module
matthewfallan Mar 25, 2024
f68c672
Small adjustments to code
matthewfallan Mar 25, 2024
33825e1
Begin implementing relate step simulator
matthewfallan Mar 26, 2024
076d7e2
Update relate step simulator
matthewfallan Mar 26, 2024
4d29e85
Move each cli function to the end of its main module
matthewfallan Mar 26, 2024
b2fb257
Add checks to ensure that there are no discontiguous reads in masked …
matthewfallan Mar 26, 2024
17396af
Fix bug where every step in each pipeline received the same (maximum)…
matthewfallan Mar 26, 2024
c9b1616
Update cluster documentation
matthewfallan Mar 26, 2024
e9bf9c4
Add ability to limit read lengths in simulate_p_ends
matthewfallan Mar 27, 2024
213fc69
Update mask documentation and rebuild
matthewfallan Mar 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Write solver for p_mut_given_span
matthewfallan committed Mar 5, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit d5d2def0ae5bafabc53555106c8839371d27e6d0
8 changes: 4 additions & 4 deletions src/seismicrna/cluster/em.py
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@
from .names import ADJ_NAME, OBS_NAME
from .uniq import UniqReads
from ..core.header import index_order_clusts
from ..core.mu import calc_p_noclose_given_ends_numpy, calc_mu_adj_numpy, calc_prop_adj_numpy
from ..core.mu import calc_p_noclose_given_ends_numpy, calc_p_mut_p_ends_numpy, calc_prop_adj_numpy

logger = getLogger(__name__)

@@ -221,9 +221,9 @@ def _max_step(self):
/ self.nreads_obs)
# Solve for the real mutation rates that are expected to yield
# the observed mutation rates after considering read drop-out.
self.mus = calc_mu_adj_numpy(self.sparse_mus,
self.uniq_reads.min_mut_gap,
mus_guess)[self.unmasked]
self.mus = calc_p_mut_p_ends_numpy(self.sparse_mus,
self.uniq_reads.min_mut_gap,
mus_guess)[self.unmasked]

def _exp_step(self):
""" Run the Expectation step of the EM algorithm. """
498 changes: 386 additions & 112 deletions src/seismicrna/core/mu/unbias/algo.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/seismicrna/core/mu/unbias/frame.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd

from .algo import calc_p_noclose_given_ends_numpy, calc_mu_adj_numpy, calc_prop_adj_numpy
from .algo import calc_p_noclose_given_ends_numpy, calc_p_mut_p_ends_numpy, calc_prop_adj_numpy
from ...seq import Section


@@ -65,7 +65,7 @@ def calc_mu_adj_frame(mu_obs: pd.DataFrame | pd.Series,
Data frame of the adjusted mutation rates with the same index
and columns as `mu_obs`.
"""
mu_adj = calc_mu_adj_numpy(_mus_to_matrix(mu_obs, section), min_gap)
mu_adj = calc_p_mut_p_ends_numpy(_mus_to_matrix(mu_obs, section), min_gap)
if isinstance(mu_obs, pd.DataFrame):
mu_adj_frame = pd.DataFrame(mu_adj, section.range, mu_obs.columns)
elif isinstance(mu_obs, pd.Series):
10 changes: 5 additions & 5 deletions src/seismicrna/core/mu/unbias/tests/algo_test.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@

from seismicrna.core.mu.unbias.algo import (_calc_p_noclose_given_ends,
_calc_p_mut_given_span_noclose,
calc_mu_adj_numpy,
calc_p_mut_p_ends_numpy,
calc_p_noclose_given_ends_numpy,
_clip,
logger as algo_logger)
@@ -495,7 +495,7 @@ def test_inv_calc_mu_adj(self):
for g in range(max_g + 1):
with self.subTest(k=k, g=g):
# Compute the adjusted mutation rates.
mus_adj = calc_mu_adj_numpy(mus_obs, g)
mus_adj = calc_p_mut_p_ends_numpy(mus_obs, g)
# Recompute the observed mutation rates.
mus_reobs = _calc_mu_obs(mus_adj, g)
# Compare observed and reobserved mutation rates.
@@ -520,12 +520,12 @@ def test_mu_multiplex(self):
# Generate random observed mutation rates.
mus_obs = rng.random((n_pos, k)) * max_m
# Adjust all rates simultaneously.
mus_adj_sim = calc_mu_adj_numpy(mus_obs, g)
mus_adj_sim = calc_p_mut_p_ends_numpy(mus_obs, g)
# Adjust the rates of each cluster (i) separately.
mus_adj_sep = np.empty_like(mus_obs)
for i in range(k):
obs_i = mus_obs[:, i].reshape((n_pos, 1))
adj_i = calc_mu_adj_numpy(obs_i, g).reshape(n_pos)
adj_i = calc_p_mut_p_ends_numpy(obs_i, g).reshape(n_pos)
mus_adj_sep[:, i] = adj_i
# Compare the results.
self.assertTrue(np.allclose(mus_adj_sim, mus_adj_sep))
@@ -546,7 +546,7 @@ def test_inv_calc_mu_obs(self):
# Compute the observed mutation rates.
mus_obs = _calc_mu_obs(mus, g)
# Adjust the observed mutation rates.
mus_adj = calc_mu_adj_numpy(mus_obs, g)
mus_adj = calc_p_mut_p_ends_numpy(mus_obs, g)
# Test if adjusted and initial mutation rates match.
self.assertTrue(np.allclose(mus_adj, mus))

6 changes: 3 additions & 3 deletions src/seismicrna/core/mu/unbias/tests/frame_test.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@
import pandas as pd

from seismicrna.core.mu import (calc_p_noclose_given_ends_numpy,
calc_mu_adj_numpy,
calc_p_mut_p_ends_numpy,
calc_f_obs_frame,
calc_mu_adj_frame)
from seismicrna.core.seq import DNA, Section, seq_pos_to_index
@@ -49,7 +49,7 @@ def test_equals_numpy(self):
# Run calc_mu_adj_df.
mus_adj_df = calc_mu_adj_frame(mus_obs_df, section, gap)
# Run calc_mu_adj_numpy.
mus_adj_np = calc_mu_adj_numpy(mus_obs_np, gap)
mus_adj_np = calc_p_mut_p_ends_numpy(mus_obs_np, gap)
# Compare the results.
self.assertIsInstance(mus_adj_df, pd.DataFrame)
self.assertTrue(np.allclose(mus_adj_df.values,
@@ -101,7 +101,7 @@ def test_equals_numpy(self):
section,
gap)
# Run calc_mu_adj_numpy.
mus_adj_np = calc_mu_adj_numpy(mus_obs_np, gap)
mus_adj_np = calc_p_mut_p_ends_numpy(mus_obs_np, gap)
# Compare the results.
self.assertIsInstance(mus_adj_series, pd.Series)
self.assertTrue(np.array_equal(mus_adj_series.values,