From 9f6211d217d0eb6a52de08cdfdacabc5f6e2fcc7 Mon Sep 17 00:00:00 2001
From: Jacob Schreiber <jmschreiber91@gmail.com>
Date: Mon, 19 Sep 2022 13:32:44 -0700
Subject: [PATCH] v1.0.0

---
 modisco                 | 8 +++++---
 modiscolite/__init__.py | 2 +-
 modiscolite/report.py   | 7 ++++---
 setup.py                | 2 +-
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/modisco b/modisco
index 120e690..45a5201 100755
--- a/modisco
+++ b/modisco
@@ -100,7 +100,7 @@ def save_seqlet_coords(seqlets, dset_name, grp):
 	save_string_list(string_list=coords_strings,
 					 dset_name=dset_name, grp=grp)
 
-def motif_discovery_main(sequences, attributions, output, max_seqlets):
+def motif_discovery_main(sequences, attributions, output, max_seqlets, n_leiden_runs):
 	(multitask_seqlet_creation_results, metaclustering_results, 
 		metacluster_idx_to_submetacluster_results) = modiscolite.tfmodisco.TFMoDISco(
 		hypothetical_contribs=attributions, one_hot=sequences,
@@ -108,7 +108,7 @@ def motif_discovery_main(sequences, attributions, output, max_seqlets):
 		sliding_window_size=20,
 		flank_size=5,
 		target_seqlet_fdr=0.05,
-		n_leiden_runs=2)
+		n_leiden_runs=n_leiden_runs)
 
 	########
 	### Ugly saving code
@@ -213,6 +213,8 @@ motifs_parser.add_argument("-i", "--h5py", type=str,
 	help="A legacy h5py file containing the one-hot encoded sequences and shap scores.")
 motifs_parser.add_argument("-n", "--max_seqlets", type=int, required=True,
 	help="The maximum number of seqlets per metacluster.")
+motifs_parser.add_argument("-l", "--n_leiden", type=int, default=2,
+	help="The number of Leiden clusterings to perform with different random seeds.")
 motifs_parser.add_argument("-w", "--window", type=int, default=400,
 	help="The window surrounding the peak center that will be considered for motif discovery.")
 motifs_parser.add_argument("-o", "--output", type=str, default="modisco_results.h5",
@@ -277,7 +279,7 @@ if args.cmd == "motifs":
 		attributions = attributions[:, :, start:end].transpose(0, 2, 1)
 
 	motif_discovery_main(sequences, attributions, args.output, 
-		args.max_seqlets)
+		args.max_seqlets, args.n_leiden)
 
 elif args.cmd == 'report':
 	modiscolite.report.report_motifs(args.h5py, args.output, suffix=args.suffix, 
diff --git a/modiscolite/__init__.py b/modiscolite/__init__.py
index 5391864..3e48387 100644
--- a/modiscolite/__init__.py
+++ b/modiscolite/__init__.py
@@ -8,4 +8,4 @@
 from . import cluster
 from . import report
 
-__version__ = '0.0.9'
\ No newline at end of file
+__version__ = '1.0.0'
\ No newline at end of file
diff --git a/modiscolite/report.py b/modiscolite/report.py
index 4e892bd..fff9f34 100644
--- a/modiscolite/report.py
+++ b/modiscolite/report.py
@@ -55,6 +55,7 @@ def fetch_tomtom_matches(ppm, cwm, motifs_db,
 	"""
 
 	_, fname = tempfile.mkstemp()
+	_, tomtom_fname = tempfile.mkstemp()
 
 	score = np.sum(np.abs(cwm), axis=1)
 	trim_thresh = np.max(score) * trim_threshold  # Cut off anything less than 30% of max score
@@ -69,11 +70,11 @@ def fetch_tomtom_matches(ppm, cwm, motifs_db,
 	write_meme_file(trimmed, background, fname)
 
 	# run tomtom
-	cmd = '%s -no-ssc -oc . --verbosity 1 -text -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10.0 %s %s > .tomtom.tmp' % (tomtom_exec_path, fname, motifs_db)
+	cmd = '%s -no-ssc -oc . --verbosity 1 -text -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10.0 %s %s > %s' % (tomtom_exec_path, fname, motifs_db, tomtom_fname)
 
 	os.system(cmd)
-	tomtom_results = pandas.read_csv(".tomtom.tmp", sep="\t", usecols=(1, 5))
-	os.system("rm .tomtom.tmp")
+	tomtom_results = pandas.read_csv(tomtom_fname, sep="\t", usecols=(1, 5))
+	os.system('rm ' + tomtom_fname)
 	os.system('rm ' + fname)
 	return tomtom_results
 
diff --git a/setup.py b/setup.py
index f6d4783..24f8d1c 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
 	name='modisco-lite',
-	version='0.0.9',
+	version='1.0.0',
 	author='Jacob Schreiber',
 	author_email='jmschreiber91@gmail.com',
 	packages=['modiscolite'],