Add jackpotting scatter plots

rouskinlab · Aug 3, 2024 · 269046a · 269046a
1 parent a6470e0
commit 269046a
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 20 deletions.
diff --git a/make_conda_recipe.py b/make_conda_recipe.py
@@ -122,12 +122,13 @@ def list_nonpip_dependencies():
             "rnastructure >=6.2",
             "samtools >=1.17",
             "matplotlib-base >=3.6",
-            "brotli-python >=1.0"]
+            "brotli-python >=1.0",
+            "python-kaleido >=0.2.1"]
 
 
 def supercede_pip_dependencies():
     """ Dependencies with pip that should be superceded by Conda. """
-    return {"matplotlib", "brotli"}
+    return {"matplotlib", "brotli", "kaleido"}
 
 
 def list_conda_dependencies():

diff --git a/src/seismicrna/cluster/jackpot.py b/src/seismicrna/cluster/jackpot.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import pandas as pd
+import plotly.express as px
 
 from .compare import EMRunsK
 from .names import LOG_EXP_NAME, LOG_OBS_NAME
@@ -42,8 +43,26 @@ def calc_log_obs_exp(uniq_reads: UniqReads, ks: list[EMRunsK]):
 
 
 def write_log_obs_exp(log_obs_exp: pd.DataFrame, to_dir: Path):
-    """ Write the expected and observed log counts of unique reads to a
-    CSV file. """
+    """ Write the expected and observed log counts of unique reads. """
     file = to_dir.joinpath(f"read-counts{path.CSVZIP_EXT}")
     log_obs_exp.to_csv(file)
     return file
+
+
+def graph_log_obs_exp(log_obs_exp: pd.DataFrame, to_dir: Path):
+    """ Graph the expected vs. observed log counts of unique reads. """
+    for column in log_obs_exp.columns:
+        if column != LOG_OBS_NAME:
+            k = parse_exp_count_col(column)
+            fig = px.scatter(log_obs_exp,
+                             x=column,
+                             y=LOG_OBS_NAME,
+                             title=f"{LOG_OBS_NAME} vs. {column}")
+            file = to_dir.joinpath(f"log-obs-exp_k{k}{path.PDF_EXT}")
+            fig.write_image(file)
+
+
+def write_jackpotting(uniq_reads: UniqReads, ks: list[EMRunsK], to_dir: Path):
+    log_obs_exp = calc_log_obs_exp(uniq_reads, ks)
+    write_log_obs_exp(log_obs_exp, to_dir)
+    graph_log_obs_exp(log_obs_exp, to_dir)
diff --git a/src/seismicrna/cluster/write.py b/src/seismicrna/cluster/write.py
@@ -10,7 +10,7 @@
 from .params import write_mus, write_pis
 from .em import EMRun
 from .io import write_batches
-from .jackpot import calc_log_obs_exp, write_log_obs_exp
+from .jackpot import write_jackpotting
 from .report import ClusterReport
 from .summary import write_summaries
 from .uniq import UniqReads
@@ -192,8 +192,7 @@ def cluster(mask_report_file: Path, *,
         # Write the observed and expected counts for every best run.
         jackpotting_dir = tmp_clust_dir.joinpath(path.CLUST_JACKPOTTING_DIR)
         jackpotting_dir.mkdir()
-        log_obs_exp = calc_log_obs_exp(uniq_reads, runs_ks_list)
-        write_log_obs_exp(log_obs_exp, jackpotting_dir)
+        write_jackpotting(uniq_reads, runs_ks_list, jackpotting_dir)
         # Summarize the runs in table and graph format.
         summaries_dir = tmp_clust_dir.joinpath(path.CLUST_SUMMARIES_DIR)
         summaries_dir.mkdir()

diff --git a/src/seismicrna/core/arg/cmd.py b/src/seismicrna/core/arg/cmd.py
@@ -1,16 +1,3 @@
-"""
-
-Command Core Module
-
-========================================================================
-
-Define the names of the commands.
-
-------------------------------------------------------------------------
-
-"""
-
-
 CMD_WORKFLOW = "wf"
 CMD_DEMULT = "demult"
 CMD_QC = "qc"