From 9f6211d217d0eb6a52de08cdfdacabc5f6e2fcc7 Mon Sep 17 00:00:00 2001 From: Jacob Schreiber Date: Mon, 19 Sep 2022 13:32:44 -0700 Subject: [PATCH] v1.0.0 --- modisco | 8 +++++--- modiscolite/__init__.py | 2 +- modiscolite/report.py | 7 ++++--- setup.py | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/modisco b/modisco index 120e690..45a5201 100755 --- a/modisco +++ b/modisco @@ -100,7 +100,7 @@ def save_seqlet_coords(seqlets, dset_name, grp): save_string_list(string_list=coords_strings, dset_name=dset_name, grp=grp) -def motif_discovery_main(sequences, attributions, output, max_seqlets): +def motif_discovery_main(sequences, attributions, output, max_seqlets, n_leiden_runs): (multitask_seqlet_creation_results, metaclustering_results, metacluster_idx_to_submetacluster_results) = modiscolite.tfmodisco.TFMoDISco( hypothetical_contribs=attributions, one_hot=sequences, @@ -108,7 +108,7 @@ def motif_discovery_main(sequences, attributions, output, max_seqlets): sliding_window_size=20, flank_size=5, target_seqlet_fdr=0.05, - n_leiden_runs=2) + n_leiden_runs=n_leiden_runs) ######## ### Ugly saving code @@ -213,6 +213,8 @@ motifs_parser.add_argument("-i", "--h5py", type=str, help="A legacy h5py file containing the one-hot encoded sequences and shap scores.") motifs_parser.add_argument("-n", "--max_seqlets", type=int, required=True, help="The maximum number of seqlets per metacluster.") +motifs_parser.add_argument("-l", "--n_leiden", type=int, default=2, + help="The number of Leiden clusterings to perform with different random seeds.") motifs_parser.add_argument("-w", "--window", type=int, default=400, help="The window surrounding the peak center that will be considered for motif discovery.") motifs_parser.add_argument("-o", "--output", type=str, default="modisco_results.h5", @@ -277,7 +279,7 @@ if args.cmd == "motifs": attributions = attributions[:, :, start:end].transpose(0, 2, 1) motif_discovery_main(sequences, attributions, args.output, - args.max_seqlets) + args.max_seqlets, args.n_leiden) elif args.cmd == 'report': modiscolite.report.report_motifs(args.h5py, args.output, suffix=args.suffix, diff --git a/modiscolite/__init__.py b/modiscolite/__init__.py index 5391864..3e48387 100644 --- a/modiscolite/__init__.py +++ b/modiscolite/__init__.py @@ -8,4 +8,4 @@ from . import cluster from . import report -__version__ = '0.0.9' \ No newline at end of file +__version__ = '1.0.0' \ No newline at end of file diff --git a/modiscolite/report.py b/modiscolite/report.py index 4e892bd..fff9f34 100644 --- a/modiscolite/report.py +++ b/modiscolite/report.py @@ -55,6 +55,7 @@ def fetch_tomtom_matches(ppm, cwm, motifs_db, """ _, fname = tempfile.mkstemp() + _, tomtom_fname = tempfile.mkstemp() score = np.sum(np.abs(cwm), axis=1) trim_thresh = np.max(score) * trim_threshold # Cut off anything less than 30% of max score @@ -69,11 +70,11 @@ def fetch_tomtom_matches(ppm, cwm, motifs_db, write_meme_file(trimmed, background, fname) # run tomtom - cmd = '%s -no-ssc -oc . --verbosity 1 -text -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10.0 %s %s > .tomtom.tmp' % (tomtom_exec_path, fname, motifs_db) + cmd = '%s -no-ssc -oc . --verbosity 1 -text -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10.0 %s %s > %s' % (tomtom_exec_path, fname, motifs_db, tomtom_fname) os.system(cmd) - tomtom_results = pandas.read_csv(".tomtom.tmp", sep="\t", usecols=(1, 5)) - os.system("rm .tomtom.tmp") + tomtom_results = pandas.read_csv(tomtom_fname, sep="\t", usecols=(1, 5)) + os.system('rm ' + tomtom_fname) os.system('rm ' + fname) return tomtom_results diff --git a/setup.py b/setup.py index f6d4783..24f8d1c 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='modisco-lite', - version='0.0.9', + version='1.0.0', author='Jacob Schreiber', author_email='jmschreiber91@gmail.com', packages=['modiscolite'],