diff --git a/src/sourmash/__init__.py b/src/sourmash/__init__.py index 53ee6e4803..b6107d08c0 100644 --- a/src/sourmash/__init__.py +++ b/src/sourmash/__init__.py @@ -44,10 +44,10 @@ class MinHash - hash sketch class MAX_HASH = get_minhash_max_hash() from .signature import ( - load_signatures as load_signatures_private, - load_one_signature, + load_signatures_from_json, + load_one_signature_from_json, SourmashSignature, - save_signatures, + save_signatures_to_json, ) @@ -69,7 +69,33 @@ def load_signatures(*args, **kwargs): has been removed and the function no longer outputs to stderr. Moreover, do_raise is now True by default. """ - return load_signatures_private(*args, **kwargs) + return load_signatures_from_json(*args, **kwargs) + + +@deprecated( + deprecated_in="4.8.9", + removed_in="5.0", + current_version=VERSION, + details="Use load_file_as_signatures instead.", +) +def load_one_signature(*args, **kwargs): + """Load a JSON string with signatures into classes. + + Returns list of SourmashSignature objects. + + Note, the order is not necessarily the same as what is in the source file. + """ + return load_one_signature_from_json(*args, **kwargs) + + +@deprecated( + deprecated_in="4.8.9", + removed_in="5.0", + current_version=VERSION, + details="use sourmash_args.SaveSignaturesToLocation instead.", +) +def save_signatures(*args, **kwargs): + return save_signatures_to_json(*args, **kwargs) from .sbtmh import load_sbt_index as load_sbt_index_private diff --git a/src/sourmash/index/__init__.py b/src/sourmash/index/__init__.py index ffd698b423..c0577220db 100644 --- a/src/sourmash/index/__init__.py +++ b/src/sourmash/index/__init__.py @@ -46,7 +46,8 @@ ) from sourmash.manifest import CollectionManifest from sourmash.logging import debug_literal -from sourmash.signature import load_signatures, save_signatures +from sourmash.signature import load_signatures_from_json, save_signatures_to_json + from sourmash.minhash import ( flatten_and_downsample_scaled, flatten_and_downsample_num, @@ -425,12 +426,12 @@ def insert(self, node): def save(self, path): with open(path, "w") as fp: - save_signatures(self.signatures(), fp) + save_signatures_to_json(self.signatures(), fp) @classmethod def load(cls, location, filename=None): "Load signatures from a JSON signature file." - si = load_signatures(location, do_raise=True) + si = load_signatures_from_json(location, do_raise=True) if filename is None: filename = location @@ -639,7 +640,7 @@ def _signatures_with_internal(self): or self.traverse_yield_all ): sig_data = self.storage.load(filename) - for ss in load_signatures(sig_data): + for ss in load_signatures_from_json(sig_data): yield ss, filename def signatures(self): @@ -653,7 +654,7 @@ def signatures(self): # yield all signatures found in manifest for filename in manifest.locations(): data = self.storage.load(filename) - for ss in load_signatures(data): + for ss in load_signatures_from_json(data): # in case multiple signatures are in the file, check # to make sure we want to return each one. if ss in manifest: @@ -682,7 +683,7 @@ def select(x): return True data = self.storage.load(filename) - for ss in load_signatures(data): + for ss in load_signatures_from_json(data): if select(ss): yield ss diff --git a/src/sourmash/save_load.py b/src/sourmash/save_load.py index 1f73c116c7..850d7f2b10 100644 --- a/src/sourmash/save_load.py +++ b/src/sourmash/save_load.py @@ -299,8 +299,8 @@ def _get_signatures_from_rust(siglist): # minhash (and hence one md5sum) per signature, while # Rust supports multiple. For now, go through serializing # and deserializing the signature! See issue #1167 for more. - json_str = sourmash.save_signatures(siglist) - yield from sourmash.signature.load_signatures(json_str) + json_str = sigmod.save_signatures_to_json(siglist) + yield from sigmod.load_signatures_from_json(json_str) class SaveSignatures_NoOutput(Base_SaveSignaturesToLocation): @@ -362,7 +362,7 @@ def add(self, ss): i += 1 with open(outname, "wb") as fp: - sigmod.save_signatures([ss], fp, compression=1) + sigmod.save_signatures_to_json([ss], fp, compression=1) class SaveSignatures_SqliteIndex(Base_SaveSignaturesToLocation): @@ -425,7 +425,7 @@ def open(self): def close(self): if self.location == "-": - sourmash.save_signatures(self.keep, sys.stdout) + sigmod.save_signatures_to_json(self.keep, sys.stdout) else: # text mode? encode in utf-8 mode = "w" @@ -437,7 +437,7 @@ def close(self): mode = "wb" with open(self.location, mode, encoding=encoding) as fp: - sourmash.save_signatures(self.keep, fp, compression=self.compress) + sigmod.save_signatures_to_json(self.keep, fp, compression=self.compress) def add(self, ss): super().add(ss) @@ -470,6 +470,7 @@ def close(self): manifest_data = manifest_fp.getvalue().encode("utf-8") self.storage.save(manifest_name, manifest_data, overwrite=True, compress=True) + self.storage.flush() self.storage.close() @@ -523,7 +524,7 @@ def add(self, add_sig): raise ValueError("this output is not open") for ss in _get_signatures_from_rust([add_sig]): - buf = sigmod.save_signatures([ss], compression=1) + buf = sigmod.save_signatures_to_json([ss], compression=1) md5 = ss.md5sum() storage = self.storage diff --git a/src/sourmash/sbt.py b/src/sourmash/sbt.py index 452ca29375..ccb7fc3d78 100644 --- a/src/sourmash/sbt.py +++ b/src/sourmash/sbt.py @@ -162,7 +162,7 @@ def signatures(self): if self.manifest: # if manifest, use it & load using direct path to storage. # this will be faster when using picklists. - from .signature import load_one_signature + from .signature import load_one_signature_from_json manifest = self.manifest @@ -175,7 +175,7 @@ def signatures(self): buf = self.storage.load(loc) # if more than one signature can be in a file, we need # to recheck picklists here. - ss = load_one_signature(buf) + ss = load_one_signature_from_json(buf) yield ss else: # no manifest? iterate over all leaves. diff --git a/src/sourmash/sbt_storage.py b/src/sourmash/sbt_storage.py index 1b7a9e7d78..1bb7e4b7c3 100644 --- a/src/sourmash/sbt_storage.py +++ b/src/sourmash/sbt_storage.py @@ -8,6 +8,7 @@ import zipfile from abc import ABC from pathlib import Path +import warnings from ._lowlevel import ffi, lib from .utils import RustObject, rustcall, decode_str @@ -279,27 +280,31 @@ def _write_to_zf(self, zf, path, content, *, compress=False): zi.external_attr = perms def save(self, path, content, *, overwrite=False, compress=False): - # First try to save to self.zipfile, if it is not writable - # or would introduce duplicates then try to save it in the buffer - if overwrite: - newpath = path - do_write = True - else: - newpath, do_write = self._generate_filename(self.zipfile, path, content) - if do_write: - try: - self._write_to_zf(self.zipfile, newpath, content, compress=compress) - except (ValueError, RuntimeError): - # Can't write in the zipfile, write in buffer instead - # CTB: do we need to generate a new filename wrt to the - # bufferzip, too? Not sure this code is working as intended... - if self.bufferzip: - self._write_to_zf( - self.bufferzip, newpath, content, compress=compress - ) - else: - # Throw error, can't write the data - raise ValueError("can't write data") + # ignore UserWarnings for duplicate filenames. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + # First try to save to self.zipfile, if it is not writable + # or would introduce duplicates then try to save it in the buffer + if overwrite: + newpath = path + do_write = True + else: + newpath, do_write = self._generate_filename(self.zipfile, path, content) + if do_write: + try: + self._write_to_zf(self.zipfile, newpath, content, compress=compress) + except (ValueError, RuntimeError): + # Can't write in the zipfile, write in buffer instead + # CTB: do we need to generate a new filename wrt to the + # bufferzip, too? Not sure this code is working as intended... + if self.bufferzip: + self._write_to_zf( + self.bufferzip, newpath, content, compress=compress + ) + else: + # Throw error, can't write the data + raise ValueError("can't write data") return newpath diff --git a/src/sourmash/sbtmh.py b/src/sourmash/sbtmh.py index 3fa7aa23f2..9bc1586dd4 100644 --- a/src/sourmash/sbtmh.py +++ b/src/sourmash/sbtmh.py @@ -52,7 +52,7 @@ def save(self, path): # content...) self.data - buf = signature.save_signatures([self.data], compression=1) + buf = signature.save_signatures_to_json([self.data], compression=1) return self.storage.save(path, buf) def update(self, parent): @@ -70,7 +70,7 @@ def update(self, parent): def data(self): if self._data is None: buf = BytesIO(self.storage.load(self._path)) - self._data = signature.load_one_signature(buf) + self._data = signature.load_one_signature_from_json(buf) return self._data @data.setter diff --git a/src/sourmash/signature.py b/src/sourmash/signature.py index 3faa5e856b..6f0a9d7b83 100644 --- a/src/sourmash/signature.py +++ b/src/sourmash/signature.py @@ -379,7 +379,7 @@ def _detect_input_type(data): return SigInput.UNKNOWN -def load_signatures( +def load_signatures_from_json( data, ksize=None, select_moltype=None, @@ -469,8 +469,10 @@ def load_signatures( raise -def load_one_signature(data, ksize=None, select_moltype=None, ignore_md5sum=False): - sigiter = load_signatures( +def load_one_signature_from_json( + data, ksize=None, select_moltype=None, ignore_md5sum=False +): + sigiter = load_signatures_from_json( data, ksize=ksize, select_moltype=select_moltype, ignore_md5sum=ignore_md5sum ) @@ -487,7 +489,7 @@ def load_one_signature(data, ksize=None, select_moltype=None, ignore_md5sum=Fals raise ValueError("expected to load exactly one signature") -def save_signatures(siglist, fp=None, compression=0): +def save_signatures_to_json(siglist, fp=None, compression=0): "Save multiple signatures into a JSON string (or into file handle 'fp')" attached_refs = weakref.WeakKeyDictionary() diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py index 04133a6796..432881b3b9 100644 --- a/tests/test_cmd_signature.py +++ b/tests/test_cmd_signature.py @@ -12,7 +12,11 @@ import sourmash_tst_utils as utils import sourmash -from sourmash.signature import load_signatures +from sourmash.signature import ( + load_signatures_from_json, + save_signatures_to_json, + load_one_signature_from_json, +) from sourmash.manifest import CollectionManifest from sourmash_tst_utils import SourmashCommandFailed @@ -68,8 +72,8 @@ def test_sig_merge_1_use_full_signature_in_cmd(runtmp): # stdout should be new signature out = c.last_result.out - test_merge_sig = sourmash.load_one_signature(sig47and63) - actual_merge_sig = sourmash.load_one_signature(out) + test_merge_sig = load_one_signature_from_json(sig47and63) + actual_merge_sig = load_one_signature_from_json(out) print(test_merge_sig.minhash) print(actual_merge_sig.minhash) @@ -101,8 +105,8 @@ def test_sig_merge_1_fromfile_picklist(runtmp): # stdout should be new signature out = c.last_result.out - test_merge_sig = sourmash.load_one_signature(sig47and63) - actual_merge_sig = sourmash.load_one_signature(out) + test_merge_sig = load_one_signature_from_json(sig47and63) + actual_merge_sig = load_one_signature_from_json(out) print(test_merge_sig.minhash) print(actual_merge_sig.minhash) @@ -137,8 +141,8 @@ def test_sig_merge_1_fromfile_picklist_gz(runtmp): # stdout should be new signature out = c.last_result.out - test_merge_sig = sourmash.load_one_signature(sig47and63) - actual_merge_sig = sourmash.load_one_signature(out) + test_merge_sig = load_one_signature_from_json(sig47and63) + actual_merge_sig = load_one_signature_from_json(out) print(test_merge_sig.minhash) print(actual_merge_sig.minhash) @@ -158,8 +162,8 @@ def test_sig_merge_1(c): # stdout should be new signature out = c.last_result.out - test_merge_sig = sourmash.load_one_signature(sig47and63) - actual_merge_sig = sourmash.load_one_signature(out) + test_merge_sig = load_one_signature_from_json(sig47and63) + actual_merge_sig = load_one_signature_from_json(out) print(test_merge_sig.minhash) print(actual_merge_sig.minhash) @@ -178,8 +182,8 @@ def test_sig_merge_1_multisig(c): # stdout should be new signature out = c.last_result.out - test_merge_sig = sourmash.load_one_signature(sig47and63) - actual_merge_sig = sourmash.load_one_signature(out) + test_merge_sig = load_one_signature_from_json(sig47and63) + actual_merge_sig = load_one_signature_from_json(out) print(test_merge_sig.minhash) print(actual_merge_sig.minhash) @@ -211,7 +215,7 @@ def test_sig_merge_1_name(c): assignedSigName, ) - test_merge_sig = sourmash.load_one_signature(outsig) + test_merge_sig = load_one_signature_from_json(outsig) print("outsig", outsig) print("xx_test_merge_sig.name", test_merge_sig.name) @@ -230,8 +234,8 @@ def test_sig_merge_1_ksize_moltype(c): # stdout should be new signature out = c.last_result.out - test_merge_sig = sourmash.load_one_signature(sig2and63) - actual_merge_sig = sourmash.load_one_signature(out) + test_merge_sig = load_one_signature_from_json(sig2and63) + actual_merge_sig = load_one_signature_from_json(out) print(test_merge_sig.minhash) print(actual_merge_sig.minhash) @@ -262,8 +266,8 @@ def test_sig_merge_2(c): # stdout should be new signature out = c.last_result.out - test_merge_sig = sourmash.load_one_signature(sig47) - actual_merge_sig = sourmash.load_one_signature(out) + test_merge_sig = load_one_signature_from_json(sig47) + actual_merge_sig = load_one_signature_from_json(out) print(out) @@ -277,7 +281,7 @@ def test_sig_merge_3_abund_ab_ok(c): sig63abund = utils.get_test_data("track_abund/63.fa.sig") c.run_sourmash("sig", "merge", sig47abund, sig63abund) - sourmash.load_one_signature(c.last_result.out) + load_one_signature_from_json(c.last_result.out) # CTB: should check that this merge did what we think it should do! @@ -323,13 +327,13 @@ def test_sig_filter_1(c): # stdout should be new signature out = c.last_result.out - filtered_sigs = list(load_signatures(out)) + filtered_sigs = list(load_signatures_from_json(out)) filtered_sigs.sort(key=lambda x: str(x)) assert len(filtered_sigs) == 2 - mh47 = sourmash.load_one_signature(sig47).minhash - mh63 = sourmash.load_one_signature(sig63).minhash + mh47 = load_one_signature_from_json(sig47).minhash + mh63 = load_one_signature_from_json(sig63).minhash assert filtered_sigs[0].minhash == mh47 assert filtered_sigs[1].minhash == mh63 @@ -344,8 +348,8 @@ def test_sig_filter_2(c): # stdout should be new signature out = c.last_result.out - filtered_sig = sourmash.load_one_signature(out) - test_sig = sourmash.load_one_signature(sig47) + filtered_sig = load_one_signature_from_json(out) + test_sig = load_one_signature_from_json(sig47) abunds = test_sig.minhash.hashes abunds = {k: v for (k, v) in abunds.items() if v >= 2 and v <= 5} @@ -363,8 +367,8 @@ def test_sig_filter_3(c): # stdout should be new signature out = c.last_result.out - filtered_sig = sourmash.load_one_signature(out) - test_sig = sourmash.load_one_signature(sig47) + filtered_sig = load_one_signature_from_json(out) + test_sig = load_one_signature_from_json(sig47) abunds = test_sig.minhash.hashes abunds = {k: v for (k, v) in abunds.items() if v >= 2} @@ -382,8 +386,8 @@ def test_sig_filter_3_ksize_select(c): # stdout should be new signature out = c.last_result.out - filtered_sig = sourmash.load_one_signature(out) - test_sig = sourmash.load_one_signature(psw_mag, ksize=31) + filtered_sig = load_one_signature_from_json(out) + test_sig = load_one_signature_from_json(psw_mag, ksize=31) abunds = test_sig.minhash.hashes abunds = {k: v for (k, v) in abunds.items() if v >= 2} @@ -404,8 +408,8 @@ def test_sig_merge_flatten(c): print(c.last_result) out = c.last_result.out - test_merge_sig = sourmash.load_one_signature(sig47and63) - actual_merge_sig = sourmash.load_one_signature(out) + test_merge_sig = load_one_signature_from_json(sig47and63) + actual_merge_sig = load_one_signature_from_json(out) print(test_merge_sig.minhash) print(actual_merge_sig.minhash) @@ -426,8 +430,8 @@ def test_sig_merge_flatten_2(c): print(c.last_result) out = c.last_result.out - test_merge_sig = sourmash.load_one_signature(sig47and63) - actual_merge_sig = sourmash.load_one_signature(out) + test_merge_sig = load_one_signature_from_json(sig47and63) + actual_merge_sig = load_one_signature_from_json(out) print(test_merge_sig.minhash) print(actual_merge_sig.minhash) @@ -459,8 +463,8 @@ def test_sig_intersect_1(runtmp): # stdout should be new signature out = c.last_result.out - test_intersect_sig = sourmash.load_one_signature(sig47and63) - actual_intersect_sig = sourmash.load_one_signature(out) + test_intersect_sig = load_one_signature_from_json(sig47and63) + actual_intersect_sig = load_one_signature_from_json(out) print(test_intersect_sig.minhash) print(actual_intersect_sig.minhash) @@ -492,8 +496,8 @@ def test_sig_intersect_1_fromfile_picklist(runtmp): # stdout should be new signature out = c.last_result.out - test_intersect_sig = sourmash.load_one_signature(sig47and63) - actual_intersect_sig = sourmash.load_one_signature(out) + test_intersect_sig = load_one_signature_from_json(sig47and63) + actual_intersect_sig = load_one_signature_from_json(out) print(test_intersect_sig.minhash) print(actual_intersect_sig.minhash) @@ -514,8 +518,8 @@ def test_sig_intersect_2(c): # stdout should be new signature out = c.last_result.out - test_intersect_sig = sourmash.load_one_signature(sig47and63) - actual_intersect_sig = sourmash.load_one_signature(out) + test_intersect_sig = load_one_signature_from_json(sig47and63) + actual_intersect_sig = load_one_signature_from_json(out) print(test_intersect_sig.minhash) print(actual_intersect_sig.minhash) @@ -534,11 +538,11 @@ def test_sig_intersect_3(c): # stdout should be new signature out = c.last_result.out - actual_intersect_sig = sourmash.load_one_signature(out) + actual_intersect_sig = load_one_signature_from_json(out) # actually do an intersection ourselves for the test - mh47 = sourmash.load_one_signature(sig47).minhash - mh63 = sourmash.load_one_signature(sig63).minhash + mh47 = load_one_signature_from_json(sig47).minhash + mh63 = load_one_signature_from_json(sig63).minhash mh47_abunds = mh47.hashes mh63_mins = set(mh63.hashes.keys()) @@ -566,11 +570,11 @@ def test_sig_intersect_4(c): # stdout should be new signature out = c.last_result.out - actual_intersect_sig = sourmash.load_one_signature(out) + actual_intersect_sig = load_one_signature_from_json(out) # actually do an intersection ourselves for the test - mh47 = sourmash.load_one_signature(sig47).minhash - mh63 = sourmash.load_one_signature(sig63).minhash + mh47 = load_one_signature_from_json(sig47).minhash + mh63 = load_one_signature_from_json(sig63).minhash mh47_abunds = mh47.hashes mh63_mins = set(mh63.hashes.keys()) @@ -631,8 +635,8 @@ def test_sig_intersect_7(c): # stdout should be new signature out = c.last_result.out - test_intersect_sig = sourmash.load_one_signature(sig47) - actual_intersect_sig = sourmash.load_one_signature(out) + test_intersect_sig = load_one_signature_from_json(sig47) + actual_intersect_sig = load_one_signature_from_json(out) print(test_intersect_sig.minhash) print(actual_intersect_sig.minhash) @@ -650,7 +654,7 @@ def test_sig_intersect_8_multisig(c): # stdout should be new signature out = c.last_result.out - actual_intersect_sig = sourmash.load_one_signature(out) + actual_intersect_sig = load_one_signature_from_json(out) assert not len(actual_intersect_sig.minhash) @@ -664,11 +668,11 @@ def test_sig_inflate_1(runtmp): # stdout should be new signature out = runtmp.last_result.out - actual_inflate_sig = sourmash.load_one_signature(out) + actual_inflate_sig = load_one_signature_from_json(out) actual_inflate_mh = actual_inflate_sig.minhash # should be identical to track_abund sig - sig47 = sourmash.load_one_signature(sig47_abund) + sig47 = load_one_signature_from_json(sig47_abund) mh47 = sig47.minhash assert actual_inflate_sig.name == sig47.name @@ -684,11 +688,11 @@ def test_sig_inflate_2(runtmp): # stdout should be new signature out = runtmp.last_result.out - actual_inflate_sig = sourmash.load_one_signature(out) + actual_inflate_sig = load_one_signature_from_json(out) # actually do an inflation ourselves for the test - mh47 = sourmash.load_one_signature(sig47).minhash - mh63 = sourmash.load_one_signature(sig63).minhash + mh47 = load_one_signature_from_json(sig47).minhash + mh63 = load_one_signature_from_json(sig63).minhash mh47_abunds = mh47.hashes mh63_mins = set(mh63.hashes.keys()) @@ -723,7 +727,7 @@ def test_sig_inflate_4_picklist(runtmp): sig63 = utils.get_test_data("63.fa.sig") sig47_flat = utils.get_test_data("47.fa.sig") - ss63 = sourmash.load_one_signature(sig63, ksize=31) + ss63 = load_one_signature_from_json(sig63, ksize=31) _write_file(runtmp, "pl.csv", ["md5", ss63.md5sum()]) @@ -736,11 +740,11 @@ def test_sig_inflate_4_picklist(runtmp): # stdout should be new signature out = runtmp.last_result.out - actual_inflate_sig = sourmash.load_one_signature(out) + actual_inflate_sig = load_one_signature_from_json(out) # actually do an inflation ourselves for the test - mh47 = sourmash.load_one_signature(sig47).minhash - mh63 = sourmash.load_one_signature(sig63).minhash + mh47 = load_one_signature_from_json(sig47).minhash + mh63 = load_one_signature_from_json(sig63).minhash mh47_abunds = mh47.hashes mh63_mins = set(mh63.hashes.keys()) @@ -779,9 +783,31 @@ def test_sig_subtract_1(runtmp): # stdout should be new signature out = c.last_result.out - test1_sig = sourmash.load_one_signature(sig47) - test2_sig = sourmash.load_one_signature(sig63) - actual_subtract_sig = sourmash.load_one_signature(out) + test1_sig = load_one_signature_from_json(sig47) + test2_sig = load_one_signature_from_json(sig63) + actual_subtract_sig = load_one_signature_from_json(out) + + mins = set(test1_sig.minhash.hashes.keys()) + mins -= set(test2_sig.minhash.hashes.keys()) + + assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins) + + +def test_sig_subtract_1_sigzip(runtmp): + c = runtmp + # subtract of 63 from 47 + sig47 = utils.get_test_data("47.fa.sig.zip") + sig63 = utils.get_test_data("63.fa.sig.zip") + c.run_sourmash("sig", "subtract", sig47, sig63) + + # stdout should be new signature + out = c.last_result.out + + from sourmash import sourmash_args + + test1_sig = sourmash_args.load_one_signature(sig47) + test2_sig = sourmash_args.load_one_signature(sig63) + actual_subtract_sig = load_one_signature_from_json(out) mins = set(test1_sig.minhash.hashes.keys()) mins -= set(test2_sig.minhash.hashes.keys()) @@ -822,9 +848,9 @@ def test_sig_subtract_1_abund(runtmp): # stdout should be new signature out = c.last_result.out - test1_sig = sourmash.load_one_signature(sig47) - test2_sig = sourmash.load_one_signature(sig63) - actual_subtract_sig = sourmash.load_one_signature(out) + test1_sig = load_one_signature_from_json(sig47) + test2_sig = load_one_signature_from_json(sig63) + actual_subtract_sig = load_one_signature_from_json(out) assert actual_subtract_sig.minhash.track_abundance mins = set(test1_sig.minhash.hashes.keys()) @@ -867,9 +893,9 @@ def test_sig_subtract_1_flatten(runtmp): # stdout should be new signature out = c.last_result.out - test1_sig = sourmash.load_one_signature(sig47) - test2_sig = sourmash.load_one_signature(sig63) - actual_subtract_sig = sourmash.load_one_signature(out) + test1_sig = load_one_signature_from_json(sig47) + test2_sig = load_one_signature_from_json(sig63) + actual_subtract_sig = load_one_signature_from_json(out) assert not actual_subtract_sig.minhash.track_abundance mins = set(test1_sig.minhash.hashes.keys()) @@ -888,7 +914,7 @@ def test_sig_subtract_1_multisig(runtmp): # stdout should be new signature out = c.last_result.out - actual_subtract_sig = sourmash.load_one_signature(out) + actual_subtract_sig = load_one_signature_from_json(out) assert not set(actual_subtract_sig.minhash.hashes.keys()) @@ -954,8 +980,8 @@ def test_sig_rename_1(runtmp): # stdout should be new signature out = c.last_result.out - test_rename_sig = sourmash.load_one_signature(sig47) - actual_rename_sig = sourmash.load_one_signature(out) + test_rename_sig = load_one_signature_from_json(sig47) + actual_rename_sig = load_one_signature_from_json(out) print(test_rename_sig.minhash) print(actual_rename_sig.minhash) @@ -987,8 +1013,8 @@ def test_sig_rename_1_fromfile_picklist(runtmp): # stdout should be new signature out = c.last_result.out - test_rename_sig = sourmash.load_one_signature(sig47) - actual_rename_sig = sourmash.load_one_signature(out) + test_rename_sig = load_one_signature_from_json(sig47) + actual_rename_sig = load_one_signature_from_json(out) print(test_rename_sig.minhash) print(actual_rename_sig.minhash) @@ -1009,7 +1035,7 @@ def test_sig_rename_1_multisig(c): out = c.last_result.out n = 0 - for sig in load_signatures(out): + for sig in load_signatures_from_json(out): assert sig.name == "fiz bar" n += 1 @@ -1027,7 +1053,7 @@ def test_sig_rename_1_multisig_ksize(c): out = c.last_result.out n = 0 - for sig in load_signatures(out): + for sig in load_signatures_from_json(out): assert sig.name == "fiz bar" n += 1 @@ -1045,7 +1071,7 @@ def test_sig_rename_2_output_to_same(c): c.run_sourmash("sig", "rename", "-d", inplace, "fiz bar", "-o", inplace) - actual_rename_sig = sourmash.load_one_signature(inplace) + actual_rename_sig = load_one_signature_from_json(inplace) assert actual_rename_sig.name == "fiz bar" @@ -1104,8 +1130,8 @@ def test_sig_cat_1(c): # stdout should be same signature out = c.last_result.out - test_cat_sig = sourmash.load_one_signature(sig47) - actual_cat_sig = sourmash.load_one_signature(out) + test_cat_sig = load_one_signature_from_json(sig47) + actual_cat_sig = load_one_signature_from_json(out) assert actual_cat_sig == test_cat_sig @@ -1119,8 +1145,8 @@ def test_sig_cat_1_no_unique(c): # stdout should be same signature out = c.last_result.out - test_cat_sig = sourmash.load_one_signature(sig47) - actual_cat_sigs = load_signatures(out) + test_cat_sig = load_one_signature_from_json(sig47) + actual_cat_sigs = load_signatures_from_json(out) for n, sig in enumerate(actual_cat_sigs): assert sig == test_cat_sig @@ -1139,8 +1165,8 @@ def test_sig_cat_1_unique(c): out = c.last_result.out err = c.last_result.err - test_cat_sig = sourmash.load_one_signature(sig47) - actual_cat_sigs = load_signatures(out) + test_cat_sig = load_one_signature_from_json(sig47) + actual_cat_sigs = load_signatures_from_json(out) for n, sig in enumerate(actual_cat_sigs): assert sig == test_cat_sig @@ -1161,7 +1187,7 @@ def test_sig_cat_2(c): # stdout should be same signatures out = c.last_result.out - siglist = list(load_signatures(out)) + siglist = list(load_signatures_from_json(out)) print(len(siglist)) assert ( @@ -1181,7 +1207,7 @@ def test_sig_cat_2_out(c): # stdout should be same signatures out = c.output("out.sig") - siglist = list(load_signatures(out)) + siglist = list(load_signatures_from_json(out)) print(len(siglist)) assert ( @@ -1206,7 +1232,7 @@ def test_sig_cat_2_out_inplace(c): # stdout should be same signatures out = input_sig - siglist = list(load_signatures(out)) + siglist = list(load_signatures_from_json(out)) print(len(siglist)) assert ( @@ -1234,14 +1260,14 @@ def test_sig_cat_3_filelist(c): # make this a list, not a set, because a set will collapse identical # signatures. `sig cat` does not collapse identical signatures, although # the pathlist function will ignore duplicate files. - siglist = list(load_signatures(out)) + siglist = list(load_signatures_from_json(out)) # verify the number of signatures matches what we expect to see based # on the input files all_sigs = [] - all_sigs += list(load_signatures(sig47)) - all_sigs += list(load_signatures(sig47abund)) - all_sigs += list(load_signatures(multisig)) + all_sigs += list(load_signatures_from_json(sig47)) + all_sigs += list(load_signatures_from_json(sig47abund)) + all_sigs += list(load_signatures_from_json(multisig)) assert len(all_sigs) == len(siglist) @@ -1270,7 +1296,7 @@ def test_sig_cat_4_filelist_with_dbs(c): # stdout should be same signatures out = c.output("out.sig") - siglist = list(load_signatures(out)) + siglist = list(load_signatures_from_json(out)) print(len(siglist)) # print("siglist: ",siglist) # print("\n") @@ -1278,8 +1304,8 @@ def test_sig_cat_4_filelist_with_dbs(c): # verify the number of signatures matches what we expect to see based # on the input files all_sigs = [] - all_sigs += list(load_signatures(sig47)) - all_sigs += list(load_signatures(sig47abund)) + all_sigs += list(load_signatures_from_json(sig47)) + all_sigs += list(load_signatures_from_json(sig47abund)) all_sigs += list(sourmash.load_file_as_signatures(sbt)) assert len(all_sigs) == len(siglist) @@ -1309,7 +1335,7 @@ def test_sig_cat_5_from_file(c): # stdout should be same signatures out = c.output("out.sig") - siglist = list(load_signatures(out)) + siglist = list(load_signatures_from_json(out)) print(len(siglist)) # print("siglist: ",siglist) # print("\n") @@ -1317,8 +1343,8 @@ def test_sig_cat_5_from_file(c): # verify the number of signatures matches what we expect to see based # on the input files all_sigs = [] - all_sigs += list(load_signatures(sig47)) - all_sigs += list(load_signatures(sig47abund)) + all_sigs += list(load_signatures_from_json(sig47)) + all_sigs += list(load_signatures_from_json(sig47abund)) all_sigs += list(sourmash.load_file_as_signatures(sbt)) assert len(all_sigs) == len(siglist) @@ -1359,7 +1385,7 @@ def test_sig_cat_5_from_file_picklist(runtmp): # stdout should be same signatures out = c.output("out.sig") - siglist = list(load_signatures(out)) + siglist = list(load_signatures_from_json(out)) print(len(siglist)) # print("siglist: ",siglist) # print("\n") @@ -1367,7 +1393,7 @@ def test_sig_cat_5_from_file_picklist(runtmp): # verify the number of signatures matches what we expect to see based # on the input files all_sigs = [] - all_sigs += list(load_signatures(sig47, ksize=31)) + all_sigs += list(load_signatures_from_json(sig47, ksize=31)) assert len(all_sigs) == len(siglist) @@ -1426,8 +1452,8 @@ def test_sig_split_1(runtmp): assert os.path.exists(c.output(outname)) - test_split_sig = sourmash.load_one_signature(sig47) - actual_split_sig = sourmash.load_one_signature(c.output(outname)) + test_split_sig = load_one_signature_from_json(sig47) + actual_split_sig = load_one_signature_from_json(c.output(outname)) assert actual_split_sig == test_split_sig @@ -1453,8 +1479,8 @@ def test_sig_split_1_fromfile_picklist(runtmp): assert os.path.exists(c.output(outname)) - test_split_sig = sourmash.load_one_signature(sig47) - actual_split_sig = sourmash.load_one_signature(c.output(outname)) + test_split_sig = load_one_signature_from_json(sig47) + actual_split_sig = load_one_signature_from_json(c.output(outname)) assert actual_split_sig == test_split_sig @@ -1487,12 +1513,12 @@ def test_sig_split_2(c): assert os.path.exists(c.output(outname1)) assert os.path.exists(c.output(outname2)) - test_split_sig = sourmash.load_one_signature(sig47) + test_split_sig = load_one_signature_from_json(sig47) - actual_split_sig = sourmash.load_one_signature(c.output(outname1)) + actual_split_sig = load_one_signature_from_json(c.output(outname1)) assert actual_split_sig == test_split_sig - actual_split_sig = sourmash.load_one_signature(c.output(outname2)) + actual_split_sig = load_one_signature_from_json(c.output(outname2)) assert actual_split_sig == test_split_sig @@ -1509,12 +1535,12 @@ def test_sig_split_2_outdir(c): assert os.path.exists(c.output(outname1)) assert os.path.exists(c.output(outname2)) - test_split_sig = sourmash.load_one_signature(sig47) + test_split_sig = load_one_signature_from_json(sig47) - actual_split_sig = sourmash.load_one_signature(c.output(outname1)) + actual_split_sig = load_one_signature_from_json(c.output(outname1)) assert actual_split_sig == test_split_sig - actual_split_sig = sourmash.load_one_signature(c.output(outname2)) + actual_split_sig = load_one_signature_from_json(c.output(outname2)) assert actual_split_sig == test_split_sig @@ -1531,12 +1557,12 @@ def test_sig_split_2_output_dir(c): assert os.path.exists(c.output(outname1)) assert os.path.exists(c.output(outname2)) - test_split_sig = sourmash.load_one_signature(sig47) + test_split_sig = load_one_signature_from_json(sig47) - actual_split_sig = sourmash.load_one_signature(c.output(outname1)) + actual_split_sig = load_one_signature_from_json(c.output(outname1)) assert actual_split_sig == test_split_sig - actual_split_sig = sourmash.load_one_signature(c.output(outname2)) + actual_split_sig = load_one_signature_from_json(c.output(outname2)) assert actual_split_sig == test_split_sig @@ -1679,8 +1705,8 @@ def test_sig_extract_1(runtmp): # stdout should be new signature out = c.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -1697,8 +1723,8 @@ def test_sig_extract_1_from_file(runtmp): # stdout should be new signature out = c.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -1713,8 +1739,8 @@ def test_sig_extract_2(c): # stdout should be new signature out = c.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) print(test_extract_sig.minhash) print(actual_extract_sig.minhash) @@ -1733,8 +1759,8 @@ def test_sig_extract_2_zipfile(c): # stdout should be new signature out = c.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) print(test_extract_sig.minhash) print(actual_extract_sig.minhash) @@ -1760,8 +1786,8 @@ def test_sig_extract_4(c): # stdout should be new signature out = c.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) print(test_extract_sig.minhash) print(actual_extract_sig.minhash) @@ -1787,7 +1813,7 @@ def test_sig_extract_6(c): # stdout should be new signature out = c.last_result.out - siglist = load_signatures(out) + siglist = load_signatures_from_json(out) siglist = list(siglist) assert len(siglist) == 2 @@ -1802,7 +1828,7 @@ def test_sig_extract_7(c): # stdout should be new signature out = c.last_result.out - siglist = load_signatures(out) + siglist = load_signatures_from_json(out) siglist = list(siglist) assert len(siglist) == 1 @@ -1817,7 +1843,7 @@ def test_sig_extract_7_no_ksize(c): # stdout should be new signature out = c.last_result.out - siglist = load_signatures(out) + siglist = load_signatures_from_json(out) siglist = list(siglist) assert len(siglist) == 3 @@ -1889,8 +1915,8 @@ def test_sig_extract_8_picklist_md5(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -1931,8 +1957,8 @@ def test_sig_extract_8_picklist_md5_zipfile(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2014,8 +2040,8 @@ def test_sig_extract_8_picklist_md5_include(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2055,8 +2081,8 @@ def test_sig_extract_8_picklist_md5_exclude(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig63) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig63) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2115,8 +2141,8 @@ def test_sig_extract_8_picklist_md5_require_all(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2158,8 +2184,8 @@ def test_sig_extract_8_picklist_name(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2191,8 +2217,8 @@ def test_sig_extract_8_picklist_name_exclude(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig63) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig63) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2224,8 +2250,8 @@ def test_sig_extract_8_picklist_ident(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2257,8 +2283,8 @@ def test_sig_extract_8_picklist_ident_exclude(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig63) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig63) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2290,8 +2316,8 @@ def test_sig_extract_8_picklist_ident_dot(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2323,8 +2349,8 @@ def test_sig_extract_8_picklist_ident_dot_exclude(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig63) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig63) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2356,8 +2382,8 @@ def test_sig_extract_8_picklist_md5_short(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2389,8 +2415,8 @@ def test_sig_extract_8_picklist_md5_short_exclude(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig63) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig63) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2422,8 +2448,8 @@ def test_sig_extract_8_picklist_md5_short_alias(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2455,8 +2481,8 @@ def test_sig_extract_8_picklist_md5_short_alias_exclude(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig63) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig63) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2561,8 +2587,8 @@ def test_sig_extract_8_picklist_md5_short_alias_with_md5_selector(runtmp): # stdout should be new signature out = runtmp.last_result.out - test_extract_sig = sourmash.load_one_signature(sig47) - actual_extract_sig = sourmash.load_one_signature(out) + test_extract_sig = load_one_signature_from_json(sig47) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig == test_extract_sig @@ -2651,7 +2677,7 @@ def test_sig_extract_8_picklist_md5_nomatch_exclude(runtmp): # stdout should be both signatures out = runtmp.last_result.out - extract_siglist = list(load_signatures(out)) + extract_siglist = list(load_signatures_from_json(out)) print(len(extract_siglist)) s47 = sourmash.load_file_as_signatures(sig47) s63 = sourmash.load_file_as_signatures(sig63) @@ -2687,7 +2713,7 @@ def test_sig_extract_9_picklist_md5_ksize_hp_select(runtmp): # stdout should be new signature out = runtmp.last_result.out - actual_extract_sig = sourmash.load_one_signature(out) + actual_extract_sig = load_one_signature_from_json(out) print(actual_extract_sig.md5sum) assert str(actual_extract_sig) == "GCA_001593925" @@ -2715,7 +2741,7 @@ def test_sig_extract_9_picklist_md5_ksize_hp_select_exclude(runtmp): # stdout should be new signature out = runtmp.last_result.out - actual_extract_sig = sourmash.load_one_signature(out) + actual_extract_sig = load_one_signature_from_json(out) print(actual_extract_sig.md5sum) assert str(actual_extract_sig) == "GCA_001593935" @@ -2745,7 +2771,7 @@ def test_sig_extract_10_picklist_md5_dups_and_empty(runtmp): # stdout should be new signature out = runtmp.last_result.out - actual_extract_sig = sourmash.load_one_signature(out) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig.minhash.ksize == 19 assert actual_extract_sig.minhash.moltype == "hp" @@ -2779,7 +2805,7 @@ def test_sig_extract_10_picklist_md5_dups_and_empty_exclude(runtmp): # stdout should be new signature out = runtmp.last_result.out - actual_extract_sig = sourmash.load_one_signature(out) + actual_extract_sig = load_one_signature_from_json(out) assert actual_extract_sig.minhash.ksize == 19 assert actual_extract_sig.minhash.moltype == "hp" @@ -2966,20 +2992,20 @@ def test_sig_extract_11_pattern_exclude(runtmp): def test_sig_extract_identical_md5s(runtmp): # test that we properly handle different signatures with identical md5s sig47 = utils.get_test_data("47.fa.sig") - ss = load_signatures(sig47) + ss = load_signatures_from_json(sig47) sig = list(ss)[0] new_sig = sig.to_mutable() new_sig.name = "foo" sig47foo = runtmp.output("foo.sig") # this was only a problem when the signatures are stored in the same file with open(sig47foo, "w") as fp: - sourmash.save_signatures([new_sig, sig], fp) + save_signatures_to_json([new_sig, sig], fp) runtmp.run_sourmash("sig", "extract", "--name", "foo", sig47foo) out = runtmp.last_result.out print(out) - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -2999,12 +3025,12 @@ def test_sig_flatten_1(runtmp): # stdout should be new signature out = c.last_result.out - siglist = load_signatures(out) + siglist = load_signatures_from_json(out) siglist = list(siglist) assert len(siglist) == 1 - test_flattened = sourmash.load_one_signature(sig47) + test_flattened = load_one_signature_from_json(sig47) assert test_flattened.minhash == siglist[0].minhash @@ -3030,12 +3056,12 @@ def test_sig_flatten_1_from_file(runtmp): # stdout should be new signature out = c.last_result.out - siglist = load_signatures(out) + siglist = load_signatures_from_json(out) siglist = list(siglist) assert len(siglist) == 1 - test_flattened = sourmash.load_one_signature(sig47) + test_flattened = load_one_signature_from_json(sig47) assert test_flattened.minhash == siglist[0].minhash @@ -3050,12 +3076,12 @@ def test_sig_flatten_1_select_name(c): # stdout should be new signature out = c.last_result.out - siglist = load_signatures(out) + siglist = load_signatures_from_json(out) siglist = list(siglist) assert len(siglist) == 1 - test_flattened = sourmash.load_one_signature(sig47) + test_flattened = load_one_signature_from_json(sig47) assert test_flattened.minhash == siglist[0].minhash @@ -3071,12 +3097,12 @@ def test_sig_flatten_1_select_md5(runtmp): # stdout should be new signature out = c.last_result.out - siglist = load_signatures(out) + siglist = load_signatures_from_json(out) siglist = list(siglist) assert len(siglist) == 1 - test_flattened = sourmash.load_one_signature(sig47) + test_flattened = load_one_signature_from_json(sig47) assert test_flattened.minhash == siglist[0].minhash @@ -3089,7 +3115,7 @@ def test_sig_flatten_2_ksize(runtmp): # stdout should be new signature out = c.last_result.out - siglist = load_signatures(out) + siglist = load_signatures_from_json(out) siglist = list(siglist) assert len(siglist) == 1 @@ -3104,8 +3130,8 @@ def test_sig_downsample_1_scaled(c): # stdout should be new signature out = c.last_result.out - test_downsample_sig = sourmash.load_one_signature(sig47) - actual_downsample_sig = sourmash.load_one_signature(out) + test_downsample_sig = load_one_signature_from_json(sig47) + actual_downsample_sig = load_one_signature_from_json(out) test_mh = test_downsample_sig.minhash.downsample(scaled=10000) @@ -3121,7 +3147,7 @@ def test_sig_downsample_1_scaled_downsample_multisig(c): # stdout should be new signatures out = c.last_result.out - for sig in load_signatures(out): + for sig in load_signatures_from_json(out): assert sig.minhash.scaled == 10000 @@ -3134,12 +3160,12 @@ def test_sig_downsample_1_scaled_to_num(c): # stdout should be new signature out = c.last_result.out - actual_downsample_sig = sourmash.load_one_signature(out) + actual_downsample_sig = load_one_signature_from_json(out) actual_mins = actual_downsample_sig.minhash.hashes.keys() actual_mins = list(actual_mins) actual_mins.sort() - test_downsample_sig = sourmash.load_one_signature(sig47) + test_downsample_sig = load_one_signature_from_json(sig47) test_mins = test_downsample_sig.minhash.hashes.keys() test_mins = list(test_mins) test_mins.sort() @@ -3206,10 +3232,10 @@ def test_sig_downsample_2_num(c): # stdout should be new signature out = c.last_result.out - test_downsample_sig = sourmash.load_one_signature( + test_downsample_sig = load_one_signature_from_json( sigs11, ksize=21, select_moltype="DNA" ) - actual_downsample_sig = sourmash.load_one_signature(out) + actual_downsample_sig = load_one_signature_from_json(out) test_mh = test_downsample_sig.minhash.downsample(num=500) assert actual_downsample_sig.minhash == test_mh @@ -3226,10 +3252,10 @@ def test_sig_downsample_2_num_to_scaled(c): # stdout should be new signature out = c.last_result.out - test_downsample_sig = sourmash.load_one_signature( + test_downsample_sig = load_one_signature_from_json( sigs11, ksize=21, select_moltype="DNA" ) - actual_downsample_sig = sourmash.load_one_signature(out) + actual_downsample_sig = load_one_signature_from_json(out) test_mins = test_downsample_sig.minhash.hashes.keys() actual_mins = actual_downsample_sig.minhash.hashes.keys() @@ -3648,7 +3674,7 @@ def test_sig_describe_empty(c): outsig = c.output("xxx.sig") with open(outsig, "w") as fp: - sourmash.save_signatures([ss], fp) + save_signatures_to_json([ss], fp) ss = sourmash.load_file_as_signatures(outsig) ss = list(ss) @@ -3906,8 +3932,8 @@ def test_import_export_1(c): c.run_sourmash("sig", "export", inp, "-o", outp, "-k", "21", "--dna") c.run_sourmash("sig", "import", outp) - original = sourmash.load_one_signature(inp, ksize=21, select_moltype="DNA") - roundtrip = sourmash.load_one_signature(c.last_result.out) + original = load_one_signature_from_json(inp, ksize=21, select_moltype="DNA") + roundtrip = load_one_signature_from_json(c.last_result.out) assert original.minhash == roundtrip.minhash @@ -3921,8 +3947,8 @@ def test_import_export_1_by_md5(c): c.run_sourmash("sig", "export", inp, "-o", outp, "--md5", "1437d8eae6") c.run_sourmash("sig", "import", outp) - original = sourmash.load_one_signature(inp, ksize=21, select_moltype="DNA") - roundtrip = sourmash.load_one_signature(c.last_result.out) + original = load_one_signature_from_json(inp, ksize=21, select_moltype="DNA") + roundtrip = load_one_signature_from_json(c.last_result.out) assert original.minhash == roundtrip.minhash @@ -3938,8 +3964,8 @@ def test_import_export_2(c): msh_sig = utils.get_test_data("genome-s11.fa.gz.msh.json_dump") c.run_sourmash("sig", "import", msh_sig) - imported = sourmash.load_one_signature(c.last_result.out) - compare = sourmash.load_one_signature(sig1, ksize=21, select_moltype="DNA") + imported = load_one_signature_from_json(c.last_result.out) + compare = load_one_signature_from_json(sig1, ksize=21, select_moltype="DNA") assert imported.minhash == compare.minhash @@ -4240,7 +4266,7 @@ def test_sig_kmers_1_dna(runtmp): seqfile = utils.get_test_data("short.fa") runtmp.sourmash("sketch", "dna", seqfile, "-p", "scaled=1") - ss = sourmash.load_one_signature(runtmp.output("short.fa.sig")) + ss = load_one_signature_from_json(runtmp.output("short.fa.sig")) mh = ss.minhash assert mh.moltype == "DNA" @@ -4298,7 +4324,7 @@ def test_sig_kmers_1_dna_more_in_query(runtmp): seqfile = utils.get_test_data("short.fa") runtmp.sourmash("sketch", "dna", seqfile, "-p", "scaled=1") - ss = sourmash.load_one_signature(runtmp.output("short.fa.sig")) + ss = load_one_signature_from_json(runtmp.output("short.fa.sig")) mh = ss.minhash assert mh.moltype == "DNA" @@ -4326,7 +4352,7 @@ def test_sig_kmers_1_dna_empty_seq(runtmp): seqfile = utils.get_test_data("short.fa") runtmp.sourmash("sketch", "dna", seqfile, "-p", "scaled=1") - ss = sourmash.load_one_signature(runtmp.output("short.fa.sig")) + ss = load_one_signature_from_json(runtmp.output("short.fa.sig")) mh = ss.minhash assert mh.moltype == "DNA" @@ -4353,7 +4379,7 @@ def test_sig_kmers_1_dna_empty_sig(runtmp): mh = sourmash.MinHash(ksize=31, n=0, scaled=1) ss = sourmash.SourmashSignature(mh, name="empty") with open(runtmp.output("empty.sig"), "w") as fp: - sourmash.save_signatures([ss], fp) + save_signatures_to_json([ss], fp) with pytest.raises(SourmashCommandFailed): runtmp.sourmash("sig", "kmers", "--sig", "empty.sig", "--seq", seqfile) @@ -4374,7 +4400,7 @@ def test_sig_kmers_1_dna_single_sig(runtmp): mh.add_hash(1070961951490202715) ss = sourmash.SourmashSignature(mh, name="small") with open(runtmp.output("small.sig"), "w") as fp: - sourmash.save_signatures([ss], fp) + save_signatures_to_json([ss], fp) runtmp.sourmash("sig", "kmers", "--sig", "small.sig", "--seq", seqfile) @@ -4392,7 +4418,7 @@ def test_sig_kmers_1_dna_lowscaled(runtmp): seqfile = utils.get_test_data("short.fa") runtmp.sourmash("sketch", "dna", seqfile, "-p", "scaled=100") - ss = sourmash.load_one_signature(runtmp.output("short.fa.sig")) + ss = load_one_signature_from_json(runtmp.output("short.fa.sig")) mh = ss.minhash assert mh.moltype == "DNA" @@ -4450,7 +4476,7 @@ def test_sig_kmers_1_dna_num(runtmp): seqfile = utils.get_test_data("short.fa") runtmp.sourmash("sketch", "dna", seqfile, "-p", "num=50") - ss = sourmash.load_one_signature(runtmp.output("short.fa.sig")) + ss = load_one_signature_from_json(runtmp.output("short.fa.sig")) mh = ss.minhash assert mh.moltype == "DNA" @@ -4508,7 +4534,7 @@ def test_sig_kmers_1_dna_translate_protein(runtmp): seqfile = utils.get_test_data("short.fa") runtmp.sourmash("sketch", "translate", seqfile, "-p", "scaled=1") - ss = sourmash.load_one_signature(runtmp.output("short.fa.sig")) + ss = load_one_signature_from_json(runtmp.output("short.fa.sig")) mh = ss.minhash assert mh.moltype == "protein" @@ -4567,7 +4593,7 @@ def test_sig_kmers_1_dna_translate_dayhoff(runtmp): seqfile = utils.get_test_data("short.fa") runtmp.sourmash("sketch", "translate", seqfile, "-p", "scaled=1,dayhoff") - ss = sourmash.load_one_signature(runtmp.output("short.fa.sig")) + ss = load_one_signature_from_json(runtmp.output("short.fa.sig")) mh = ss.minhash assert mh.moltype == "dayhoff" @@ -4626,7 +4652,7 @@ def test_sig_kmers_1_dna_translate_hp(runtmp): seqfile = utils.get_test_data("short.fa") runtmp.sourmash("sketch", "translate", seqfile, "-p", "scaled=1,hp") - ss = sourmash.load_one_signature(runtmp.output("short.fa.sig")) + ss = load_one_signature_from_json(runtmp.output("short.fa.sig")) mh = ss.minhash assert mh.moltype == "hp" @@ -4685,7 +4711,7 @@ def test_sig_kmers_2_protein(runtmp): seqfile = utils.get_test_data("ecoli.faa") runtmp.sourmash("sketch", "protein", seqfile, "-p", "scaled=1") - ss = sourmash.load_one_signature(runtmp.output("ecoli.faa.sig")) + ss = load_one_signature_from_json(runtmp.output("ecoli.faa.sig")) mh = ss.minhash assert mh.moltype == "protein" @@ -4744,7 +4770,7 @@ def test_sig_kmers_2_dayhoff(runtmp): seqfile = utils.get_test_data("ecoli.faa") runtmp.sourmash("sketch", "protein", seqfile, "-p", "scaled=1,dayhoff") - ss = sourmash.load_one_signature(runtmp.output("ecoli.faa.sig")) + ss = load_one_signature_from_json(runtmp.output("ecoli.faa.sig")) mh = ss.minhash assert mh.moltype == "dayhoff" @@ -4803,7 +4829,7 @@ def test_sig_kmers_2_hp(runtmp): seqfile = utils.get_test_data("ecoli.faa") runtmp.sourmash("sketch", "protein", seqfile, "-p", "scaled=1,hp") - ss = sourmash.load_one_signature(runtmp.output("ecoli.faa.sig")) + ss = load_one_signature_from_json(runtmp.output("ecoli.faa.sig")) mh = ss.minhash assert mh.moltype == "hp" diff --git a/tests/test_cmd_signature_grep.py b/tests/test_cmd_signature_grep.py index fa1a5b7dfb..4e5a26f289 100644 --- a/tests/test_cmd_signature_grep.py +++ b/tests/test_cmd_signature_grep.py @@ -11,7 +11,7 @@ import sourmash_tst_utils as utils import sourmash from sourmash_tst_utils import SourmashCommandFailed -from sourmash.signature import load_signatures +from sourmash.signature import load_signatures_from_json, save_signatures_to_json ## command line tests @@ -23,7 +23,7 @@ def test_grep_1_sig_name(runtmp): runtmp.run_sourmash("sig", "grep", "Shewanella", sig47) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -46,7 +46,7 @@ def test_grep_1_sig_name_case_insensitive(runtmp): runtmp.run_sourmash("sig", "grep", "-i", "shewanella", sig47) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -70,7 +70,7 @@ def test_grep_2_sig_md5(runtmp): runtmp.run_sourmash("sig", "grep", "ce52952152f0", sig47) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -92,7 +92,7 @@ def test_grep_2_sig_md5_case_insensitive(runtmp): runtmp.run_sourmash("sig", "grep", "-i", "CE52952152f0", sig47) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -106,7 +106,7 @@ def test_grep_3_filename(runtmp): runtmp.run_sourmash("sig", "grep", "47.fa", sig47) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -121,7 +121,7 @@ def test_grep_3_filename_regexp(runtmp): runtmp.run_sourmash("sig", "grep", "^47.fa", sig47) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -150,7 +150,7 @@ def test_grep_4_no_manifest_ok(runtmp): runtmp.run_sourmash("sig", "grep", "e60265", sbt, "--no-require-manifest") - ss = load_signatures(runtmp.last_result.out) + ss = load_signatures_from_json(runtmp.last_result.out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -164,7 +164,7 @@ def test_grep_5_zip_include(runtmp): runtmp.run_sourmash("sig", "grep", "--dna", "OS223", allzip) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -193,7 +193,7 @@ def test_grep_5_zip_include_picklist(runtmp): print(err) assert "for given picklist, found 2 matches to 2 distinct values" in err - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -208,7 +208,7 @@ def test_grep_5_zip_include_case_insensitive(runtmp): runtmp.run_sourmash("sig", "grep", "--dna", "-i", "os223", allzip) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -223,7 +223,7 @@ def test_grep_5_zip_exclude(runtmp): runtmp.run_sourmash("sig", "grep", "--dna", "-v", "OS185", allzip) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -238,7 +238,7 @@ def test_grep_5_zip_exclude_case_insensitive(runtmp): runtmp.run_sourmash("sig", "grep", "--dna", "-vi", "os185", allzip) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -253,7 +253,7 @@ def test_grep_6_zip_manifest_csv(runtmp): runtmp.run_sourmash("sig", "grep", "--dna", "OS223", allzip, "--csv", "match.csv") out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -264,7 +264,7 @@ def test_grep_6_zip_manifest_csv(runtmp): runtmp.run_sourmash("sig", "cat", allzip, "--picklist", "match.csv::manifest") out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -281,7 +281,7 @@ def test_grep_6_zip_manifest_csv_gz(runtmp): ) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -296,7 +296,7 @@ def test_grep_6_zip_manifest_csv_gz(runtmp): runtmp.run_sourmash("sig", "cat", allzip, "--picklist", "match.csv.gz::manifest") out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] @@ -414,19 +414,19 @@ def test_sig_grep_8_count(runtmp): def test_sig_grep_identical_md5s(runtmp): # test that we properly handle different signatures with identical md5s sig47 = utils.get_test_data("47.fa.sig") - ss = load_signatures(sig47) + ss = load_signatures_from_json(sig47) sig = list(ss)[0] new_sig = sig.to_mutable() new_sig.name = "foo" sig47foo = runtmp.output("foo.sig") # this was only a problem when the signatures are stored in the same file with open(sig47foo, "w") as fp: - sourmash.save_signatures([new_sig, sig], fp) + save_signatures_to_json([new_sig, sig], fp) runtmp.run_sourmash("sig", "grep", "-i", "foo", sig47foo) out = runtmp.last_result.out - ss = load_signatures(out) + ss = load_signatures_from_json(out) ss = list(ss) assert len(ss) == 1 ss = ss[0] diff --git a/tests/test_deprecated.py b/tests/test_deprecated.py index 34097dd695..a28e9c0bcf 100644 --- a/tests/test_deprecated.py +++ b/tests/test_deprecated.py @@ -9,6 +9,6 @@ def test_load_textmode(track_abundance): # to make sure we still support it =/ sigfile = utils.get_test_data("genome-s10+s11.sig") with open(sigfile) as sigfp: - siglist = list(signature.load_signatures(sigfp)) + siglist = list(signature.load_signatures_from_json(sigfp)) loaded_sig = siglist[0] assert loaded_sig.name == "genome-s10+s11" diff --git a/tests/test_index.py b/tests/test_index.py index b207376443..2c1799d113 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -18,6 +18,7 @@ MultiIndex, StandaloneManifestIndex, ) +from sourmash.signature import load_one_signature_from_json, save_signatures_to_json from sourmash.index.revindex import RevIndex from sourmash.sbt import SBT, GraphFactory from sourmash import sourmash_args @@ -96,7 +97,7 @@ def test_simple_index(n_children): def test_linear_index_prefetch_empty(): # check that an exception is raised upon for an empty LinearIndex sig2 = utils.get_test_data("2.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) + ss2 = load_one_signature_from_json(sig2, ksize=31) lidx = LinearIndex() @@ -119,8 +120,8 @@ def minhash(self): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) fake = FakeSignature() lidx = LinearIndex() @@ -150,8 +151,8 @@ def test_linear_index_search_subj_has_abundance(): queryfile = utils.get_test_data("47.fa.sig") subjfile = utils.get_test_data("track_abund/47.fa.sig") - qs = sourmash.load_one_signature(queryfile) - ss = sourmash.load_one_signature(subjfile) + qs = load_one_signature_from_json(queryfile) + ss = load_one_signature_from_json(subjfile) linear = LinearIndex() linear.insert(ss) @@ -167,8 +168,8 @@ def test_linear_index_gather_subj_has_abundance(): queryfile = utils.get_test_data("47.fa.sig") subjfile = utils.get_test_data("track_abund/47.fa.sig") - qs = sourmash.load_one_signature(queryfile) - ss = sourmash.load_one_signature(subjfile) + qs = load_one_signature_from_json(queryfile) + ss = load_one_signature_from_json(subjfile) linear = LinearIndex() linear.insert(ss) @@ -186,7 +187,7 @@ def test_index_search_subj_scaled_is_lower(): sigfile = utils.get_test_data( "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz" ) - ss = sourmash.load_one_signature(sigfile) + ss = load_one_signature_from_json(sigfile) # double check :) assert ss.minhash.scaled == 100 @@ -209,7 +210,7 @@ def test_index_search_subj_num_is_lower(): # check that subject sketches are appropriately downsampled for num # sketches sigfile = utils.get_test_data("num/47.fa.sig") - ss = sourmash.load_one_signature(sigfile, ksize=31) + ss = load_one_signature_from_json(sigfile, ksize=31) # double check :) assert ss.minhash.num == 500 @@ -231,7 +232,7 @@ def test_index_search_subj_num_is_lower(): def test_index_search_query_num_is_lower(): # check that query sketches are appropriately downsampled for num. sigfile = utils.get_test_data("num/47.fa.sig") - qs = sourmash.load_one_signature(sigfile, ksize=31) + qs = load_one_signature_from_json(sigfile, ksize=31) # double check :) assert qs.minhash.num == 500 @@ -254,8 +255,8 @@ def test_linear_index_search_abund(): sig47 = utils.get_test_data("track_abund/47.fa.sig") sig63 = utils.get_test_data("track_abund/63.fa.sig") - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) lidx = LinearIndex() lidx.insert(ss47) @@ -272,8 +273,8 @@ def test_linear_index_search_abund_downsample_query(): sig47 = utils.get_test_data("track_abund/47.fa.sig") sig63 = utils.get_test_data("track_abund/63.fa.sig") - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) # forcibly downsample ss47 for the purpose of this test :) ss47 = ss47.to_mutable() @@ -295,8 +296,8 @@ def test_linear_index_search_abund_downsample_subj(): sig47 = utils.get_test_data("track_abund/47.fa.sig") sig63 = utils.get_test_data("track_abund/63.fa.sig") - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) # forcibly downsample ss63 for the purpose of this test :) ss63 = ss63.to_mutable() @@ -318,8 +319,8 @@ def test_linear_index_search_abund_requires_threshold(): sig47 = utils.get_test_data("track_abund/47.fa.sig") sig63 = utils.get_test_data("track_abund/63.fa.sig") - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) lidx = LinearIndex() lidx.insert(ss47) @@ -336,8 +337,8 @@ def test_linear_index_search_abund_query_flat(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("track_abund/63.fa.sig") - ss47 = sourmash.load_one_signature(sig47, ksize=31) - ss63 = sourmash.load_one_signature(sig63) + ss47 = load_one_signature_from_json(sig47, ksize=31) + ss63 = load_one_signature_from_json(sig63) lidx = LinearIndex() lidx.insert(ss47) @@ -356,8 +357,8 @@ def test_linear_index_search_abund_subj_flat(): sig47 = utils.get_test_data("track_abund/47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) lidx = LinearIndex() lidx.insert(ss47) @@ -378,9 +379,9 @@ def test_linear_index_save(runtmp): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) linear = LinearIndex() linear.insert(ss2) @@ -409,13 +410,13 @@ def test_linear_index_load(runtmp): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) filename = runtmp.output("foo") with open(filename, "w") as fp: - sourmash.save_signatures([ss2, ss47, ss63], fp) + save_signatures_to_json([ss2, ss47, ss63], fp) linear = LinearIndex.load(filename) @@ -430,9 +431,9 @@ def test_linear_index_save_load(runtmp): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) linear = LinearIndex() linear.insert(ss2) @@ -1081,9 +1082,9 @@ def test_multi_index_search(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) lidx1 = LinearIndex.load(sig2) lidx2 = LinearIndex.load(sig47) @@ -1135,9 +1136,9 @@ def test_multi_index_gather(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47) - sourmash.load_one_signature(sig63) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47) + load_one_signature_from_json(sig63) lidx1 = LinearIndex.load(sig2) lidx2 = LinearIndex.load(sig47) @@ -1165,9 +1166,9 @@ def test_multi_index_signatures(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) lidx1 = LinearIndex.load(sig2) lidx2 = LinearIndex.load(sig47) @@ -1478,9 +1479,9 @@ def test_linear_index_gather_ignore(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47, ksize=31) - ss63 = sourmash.load_one_signature(sig63, ksize=31) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47, ksize=31) + ss63 = load_one_signature_from_json(sig63, ksize=31) # construct an index... lidx = LinearIndex([ss2, ss47, ss63]) @@ -1511,9 +1512,9 @@ def test_lca_index_gather_ignore(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47, ksize=31) - ss63 = sourmash.load_one_signature(sig63, ksize=31) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47, ksize=31) + ss63 = load_one_signature_from_json(sig63, ksize=31) # construct an index... db = LCA_Database(ksize=31, scaled=1000) @@ -1545,9 +1546,9 @@ def test_sbt_index_gather_ignore(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47, ksize=31) - ss63 = sourmash.load_one_signature(sig63, ksize=31) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47, ksize=31) + ss63 = load_one_signature_from_json(sig63, ksize=31) # construct an index... factory = GraphFactory(5, 100, 3) @@ -1715,9 +1716,9 @@ def test_lazy_index_1(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) lidx = LinearIndex() lidx.insert(ss2) @@ -1776,7 +1777,7 @@ def minhash(self): def test_lazy_index_4_bool(): # test some basic features of LazyLinearIndex sig2 = utils.get_test_data("2.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) + ss2 = load_one_signature_from_json(sig2, ksize=31) # test bool false/true lidx = LinearIndex() @@ -1816,9 +1817,9 @@ def test_revindex_index_search(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) lidx = RevIndex(template=ss2.minhash) lidx.insert(ss2) @@ -1861,9 +1862,9 @@ def test_revindex_gather(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) lidx = RevIndex(template=ss2.minhash) lidx.insert(ss2) @@ -1887,9 +1888,9 @@ def test_revindex_gather_ignore(): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss2 = sourmash.load_one_signature(sig2, ksize=31) - ss47 = sourmash.load_one_signature(sig47, ksize=31) - ss63 = sourmash.load_one_signature(sig63, ksize=31) + ss2 = load_one_signature_from_json(sig2, ksize=31) + ss47 = load_one_signature_from_json(sig47, ksize=31) + ss63 = load_one_signature_from_json(sig63, ksize=31) # construct an index... lidx = RevIndex(template=ss2.minhash, signatures=[ss2, ss47, ss63]) @@ -1919,8 +1920,8 @@ def test_standalone_manifest_signatures(runtmp): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss47 = sourmash.load_one_signature(sig47) - ss63 = sourmash.load_one_signature(sig63) + ss47 = load_one_signature_from_json(sig47) + ss63 = load_one_signature_from_json(sig63) lidx1 = LinearIndex.load(sig47) lidx2 = LinearIndex.load(sig63) @@ -1943,8 +1944,8 @@ def test_standalone_manifest_signatures_prefix(runtmp): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - sourmash.load_one_signature(sig47) - sourmash.load_one_signature(sig63) + load_one_signature_from_json(sig47) + load_one_signature_from_json(sig63) lidx1 = LinearIndex.load(sig47) lidx2 = LinearIndex.load(sig63) @@ -1967,8 +1968,8 @@ def test_standalone_manifest_signatures_prefix_fail(runtmp): sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - sourmash.load_one_signature(sig47) - sourmash.load_one_signature(sig63) + load_one_signature_from_json(sig47) + load_one_signature_from_json(sig63) lidx1 = LinearIndex.load(sig47) lidx2 = LinearIndex.load(sig63) @@ -2147,7 +2148,7 @@ def test_standalone_manifest_prefetch_lazy(runtmp): # ok! now test prefetch... should get one match legit, to 47, # and then no matches to 2, and then error. - ss47 = sourmash.load_one_signature(sig47) + ss47 = load_one_signature_from_json(sig47) idx = idx.select(ksize=31) g = idx.prefetch(ss47, threshold_bp=0) diff --git a/tests/test_jaccard.py b/tests/test_jaccard.py index 87093ee194..6fd9b7a1b1 100644 --- a/tests/test_jaccard.py +++ b/tests/test_jaccard.py @@ -5,6 +5,7 @@ import pytest from sourmash import MinHash +from sourmash.signature import load_signatures_from_json import sourmash_tst_utils as utils @@ -172,16 +173,14 @@ def test_abund_similarity_zero(): def test_jaccard_on_real_data(): - from sourmash.signature import load_signatures - afile = "n10000/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz" a = utils.get_test_data(afile) - sig1 = list(load_signatures(a))[0] + sig1 = list(load_signatures_from_json(a))[0] mh1 = sig1.minhash bfile = "n10000/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz" b = utils.get_test_data(bfile) - sig2 = list(load_signatures(b))[0] + sig2 = list(load_signatures_from_json(b))[0] mh2 = sig2.minhash assert mh1.similarity(mh2) == 0.0183 @@ -204,16 +203,14 @@ def test_jaccard_on_real_data(): def test_scaled_on_real_data(): - from sourmash.signature import load_signatures - afile = "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz" a = utils.get_test_data(afile) - sig1 = list(load_signatures(a))[0] + sig1 = list(load_signatures_from_json(a))[0] mh1 = sig1.minhash bfile = "scaled100/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz" b = utils.get_test_data(bfile) - sig2 = list(load_signatures(b))[0] + sig2 = list(load_signatures_from_json(b))[0] mh2 = sig2.minhash assert round(mh1.similarity(mh2), 5) == 0.01644 @@ -237,16 +234,14 @@ def test_scaled_on_real_data(): def test_scaled_on_real_data_2(): - from sourmash.signature import load_signatures - afile = "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz" a = utils.get_test_data(afile) - sig1 = list(load_signatures(a))[0] + sig1 = list(load_signatures_from_json(a))[0] mh1 = sig1.minhash bfile = "scaled100/GCF_000006945.1_ASM694v1_genomic.fna.gz.sig.gz" b = utils.get_test_data(bfile) - sig2 = list(load_signatures(b))[0] + sig2 = list(load_signatures_from_json(b))[0] mh2 = sig2.minhash assert round(mh1.similarity(mh2), 5) == 0.01644 @@ -270,11 +265,9 @@ def test_scaled_on_real_data_2(): def test_downsample_scaled_with_num(): - from sourmash.signature import load_signatures - afile = "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz" a = utils.get_test_data(afile) - sig1 = list(load_signatures(a))[0] + sig1 = list(load_signatures_from_json(a))[0] mh1 = sig1.minhash with pytest.raises(ValueError) as exc: diff --git a/tests/test_lca.py b/tests/test_lca.py index 7db105628e..f844422ef3 100644 --- a/tests/test_lca.py +++ b/tests/test_lca.py @@ -10,7 +10,8 @@ import sourmash_tst_utils as utils import sourmash -from sourmash import load_one_signature, SourmashSignature, sourmash_args +from sourmash import SourmashSignature, sourmash_args +from sourmash.signature import save_signatures_to_json, load_one_signature_from_json from sourmash.search import make_jaccard_search_query from sourmash.lca import lca_utils @@ -21,7 +22,7 @@ def test_api_create_search(): # create a database and then search for result. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) assert len(lca_db) == 0 @@ -43,8 +44,8 @@ def test_api_create_search(): def test_api_find_picklist_select(): # does 'find' respect picklists? - sig47 = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) - sig63 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31) + sig47 = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) + sig63 = load_one_signature_from_json(utils.get_test_data("63.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(sig47) @@ -75,8 +76,8 @@ def test_api_find_picklist_select(): def test_api_find_picklist_select_exclude(): # does 'find' respect picklists? - sig47 = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) - sig63 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31) + sig47 = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) + sig63 = load_one_signature_from_json(utils.get_test_data("63.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(sig47) @@ -106,7 +107,7 @@ def test_api_find_picklist_select_exclude(): def test_api_create_insert(): # test some internal implementation stuff: create & then insert a sig. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(ss) @@ -133,7 +134,7 @@ def test_api_create_insert(): def test_api_create_insert_bad_ksize(): # can we insert a ksize=21 signature into a ksize=31 DB? hopefully not. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=21, scaled=1000) with pytest.raises(ValueError): @@ -142,8 +143,8 @@ def test_api_create_insert_bad_ksize(): def test_api_create_insert_bad_ident(): # can we insert a signature with no/empty ident? - ss1 = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) - ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31) + ss1 = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) + ss2 = load_one_signature_from_json(utils.get_test_data("63.fa.sig"), ksize=31) ss1 = ss1.to_mutable() ss2 = ss2.to_mutable() @@ -162,7 +163,7 @@ def test_api_create_insert_bad_ident(): def test_api_create_insert_bad_scaled(): # can we insert a scaled=1000 signature into a scaled=500 DB? # hopefully not. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) assert ss.minhash.scaled == 1000 lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=500) @@ -173,7 +174,7 @@ def test_api_create_insert_bad_scaled(): def test_api_create_insert_bad_moltype(): # can we insert a DNAsignature into a protein DB? # hopefully not. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) assert ss.minhash.moltype == "DNA" lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=500, moltype="protein") @@ -184,7 +185,7 @@ def test_api_create_insert_bad_moltype(): def test_api_create_insert_ident(): # test some internal implementation stuff: signature inserted with # different ident than name. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(ss, ident="foo") @@ -213,8 +214,8 @@ def test_api_create_insert_ident(): def test_api_create_insert_two(): # check internal details if multiple signatures are inserted. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) - ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) + ss2 = load_one_signature_from_json(utils.get_test_data("63.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(ss, ident="foo") @@ -254,7 +255,7 @@ def test_api_create_insert_two(): def test_api_create_insert_w_lineage(): # test some internal implementation stuff - insert signature w/lineage - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lineage = (LineagePair("rank1", "name1"), LineagePair("rank2", "name2")) @@ -292,7 +293,7 @@ def test_api_create_insert_w_lineage(): def test_api_create_insert_w_bad_lineage(): # test some internal implementation stuff - insert signature w/bad lineage - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lineage = ([LineagePair("rank1", "name1"), LineagePair("rank2", "name2")],) @@ -303,7 +304,7 @@ def test_api_create_insert_w_bad_lineage(): def test_api_create_insert_w_bad_lineage_2(): # test some internal implementation stuff - insert signature w/bad lineage - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lineage = 1 # something non-iterable... @@ -314,7 +315,7 @@ def test_api_create_insert_w_bad_lineage_2(): def test_api_create_gather(): # create a database, and then run gather on it. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(ss) @@ -328,7 +329,7 @@ def test_api_create_gather(): def test_api_add_genome_lineage(): # LCA_Databases can store/retrieve arbitrary lineages/taxonomies. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lineage = (LineagePair("rank1", "name1"), (LineagePair("rank2", "name2"))) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) @@ -344,8 +345,8 @@ def test_api_add_genome_lineage(): def test_api_insert_update(): # check that cached parts of LCA_Database are updated when a new # signature is inserted. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) - ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) + ss2 = load_one_signature_from_json(utils.get_test_data("63.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(ss) @@ -369,7 +370,7 @@ def test_api_insert_update(): def test_api_insert_retrieve_check_name(): # check that signatures retrieved from LCA_Database objects have the # right name. - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(ss) @@ -383,8 +384,8 @@ def test_api_insert_retrieve_check_name(): def test_api_create_insert_two_then_scale(): # construct database, THEN downsample - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) - ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) + ss2 = load_one_signature_from_json(utils.get_test_data("63.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(ss) @@ -404,8 +405,8 @@ def test_api_create_insert_two_then_scale(): def test_api_create_insert_two_then_scale_then_add(): # construct database, THEN downsample, then add another - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) - ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) + ss2 = load_one_signature_from_json(utils.get_test_data("63.fa.sig"), ksize=31) lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) lca_db.insert(ss) @@ -431,8 +432,8 @@ def test_api_create_insert_two_then_scale_then_add(): def test_api_create_insert_scale_two(): # downsample while constructing database - ss = sourmash.load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31) - ss2 = sourmash.load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31) + ss = load_one_signature_from_json(utils.get_test_data("47.fa.sig"), ksize=31) + ss2 = load_one_signature_from_json(utils.get_test_data("63.fa.sig"), ksize=31) # downsample to 5000 while inserting: lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=5000) @@ -584,7 +585,7 @@ def test_lca_index_find_picklist_check_overlap(): # (bug #1638) query_fn = utils.get_test_data("47.fa.sig") - query_sig = sourmash.load_one_signature(query_fn, ksize=31) + query_sig = load_one_signature_from_json(query_fn, ksize=31) db_fn = utils.get_test_data("lca/47+63.lca.json") db, ksize, scaled = lca_utils.load_single_database(db_fn) @@ -641,7 +642,7 @@ def test_lca_index_select_picklist_twice(): def test_search_db_scaled_gt_sig_scaled(): dbfile = utils.get_test_data("lca/47+63.lca.json") db, ksize, scaled = lca_utils.load_single_database(dbfile) - sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig")) + sig = load_one_signature_from_json(utils.get_test_data("47.fa.sig")) results = db.search(sig, threshold=0.01, ignore_abundance=True) match_sig = results[0][1] @@ -653,7 +654,7 @@ def test_search_db_scaled_gt_sig_scaled(): def test_search_db_scaled_lt_sig_scaled(): dbfile = utils.get_test_data("lca/47+63.lca.json") db, ksize, scaled = lca_utils.load_single_database(dbfile) - sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig")) + sig = load_one_signature_from_json(utils.get_test_data("47.fa.sig")) sig = sig.to_mutable() sig.minhash = sig.minhash.downsample(scaled=100000) @@ -663,14 +664,14 @@ def test_search_db_scaled_lt_sig_scaled(): assert results[0].score == 1.0 match = results[0].signature - orig_sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig")) + orig_sig = load_one_signature_from_json(utils.get_test_data("47.fa.sig")) assert orig_sig.minhash.jaccard(match.minhash, downsample=True) == 1.0 def test_gather_db_scaled_gt_sig_scaled(): dbfile = utils.get_test_data("lca/47+63.lca.json") db, ksize, scaled = lca_utils.load_single_database(dbfile) - sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig")) + sig = load_one_signature_from_json(utils.get_test_data("47.fa.sig")) result = db.best_containment(sig, threshold=0.01, ignore_abundance=True) match_sig = result[1] @@ -682,7 +683,7 @@ def test_gather_db_scaled_gt_sig_scaled(): def test_gather_db_scaled_lt_sig_scaled(): dbfile = utils.get_test_data("lca/47+63.lca.json") db, ksize, scaled = lca_utils.load_single_database(dbfile) - sig = sourmash.load_one_signature(utils.get_test_data("47.fa.sig")) + sig = load_one_signature_from_json(utils.get_test_data("47.fa.sig")) sig_minhash = sig.minhash.downsample(scaled=100000) result = db.best_containment(sig, threshold=0.01, ignore_abundance=True) @@ -1383,7 +1384,7 @@ def test_single_classify_zip_query(runtmp): db1 = utils.get_test_data("lca/delmont-1.lca.json") input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig") - query_ss = sourmash.load_one_signature(input_sig, ksize=31) + query_ss = load_one_signature_from_json(input_sig, ksize=31) query_zipfile = runtmp.output("query.zip") with sourmash_args.SaveSignaturesToLocation(query_zipfile) as save_sig: save_sig.add(query_ss) @@ -1436,13 +1437,13 @@ def test_single_classify_to_output(runtmp): def test_single_classify_to_output_no_name(runtmp): db1 = utils.get_test_data("lca/delmont-1.lca.json") input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig") - ss = sourmash.load_one_signature(input_sig, ksize=31) + ss = load_one_signature_from_json(input_sig, ksize=31) outsig_filename = runtmp.output("q.sig") with open(outsig_filename, "w") as fp: # remove name from signature here -- new_sig = sourmash.SourmashSignature(ss.minhash, filename="xyz") - sourmash.save_signatures([new_sig], fp) + save_signatures_to_json([new_sig], fp) cmd = [ "lca", @@ -2255,7 +2256,7 @@ def test_single_summarize_scaled_zip_query(runtmp): db1 = utils.get_test_data("lca/delmont-1.lca.json") input_sig = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig") - query_ss = sourmash.load_one_signature(input_sig, ksize=31) + query_ss = load_one_signature_from_json(input_sig, ksize=31) query_zipfile = runtmp.output("query.zip") with sourmash_args.SaveSignaturesToLocation(query_zipfile) as save_sig: save_sig.add(query_ss) @@ -2399,9 +2400,9 @@ def test_multi_summarize_with_zip_unassigned_singleton(runtmp, lca_db_format): query_zipfile = runtmp.output("query.zip") with sourmash_args.SaveSignaturesToLocation(query_zipfile) as save_sig: input_sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig") - sig1 = sourmash.load_one_signature(input_sig1, ksize=31) + sig1 = load_one_signature_from_json(input_sig1, ksize=31) input_sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig") - sig2 = sourmash.load_one_signature(input_sig2, ksize=31) + sig2 = load_one_signature_from_json(input_sig2, ksize=31) save_sig.add(sig1) save_sig.add(sig2) @@ -2914,7 +2915,7 @@ def test_lca_index_empty(runtmp, lca_db_format): sig47file = utils.get_test_data("47.fa.sig") sig63file = utils.get_test_data("63.fa.sig") - sig63 = load_one_signature(sig63file, ksize=31) + sig63 = load_one_signature_from_json(sig63file, ksize=31) # create an empty spreadsheet with open(c.output("empty.csv"), "w") as fp: @@ -2955,9 +2956,9 @@ def test_lca_gather_threshold_1(): sig47file = utils.get_test_data("47.fa.sig") sig63file = utils.get_test_data("63.fa.sig") - sig2 = load_one_signature(sig2file, ksize=31) - sig47 = load_one_signature(sig47file, ksize=31) - sig63 = load_one_signature(sig63file, ksize=31) + sig2 = load_one_signature_from_json(sig2file, ksize=31) + sig47 = load_one_signature_from_json(sig47file, ksize=31) + sig63 = load_one_signature_from_json(sig63file, ksize=31) # construct LCA Database db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) @@ -3015,9 +3016,9 @@ def test_lca_gather_threshold_5(): sig47file = utils.get_test_data("47.fa.sig") sig63file = utils.get_test_data("63.fa.sig") - sig2 = load_one_signature(sig2file, ksize=31) - sig47 = load_one_signature(sig47file, ksize=31) - sig63 = load_one_signature(sig63file, ksize=31) + sig2 = load_one_signature_from_json(sig2file, ksize=31) + sig47 = load_one_signature_from_json(sig47file, ksize=31) + sig63 = load_one_signature_from_json(sig63file, ksize=31) # construct LCA Database db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) @@ -3061,9 +3062,9 @@ def test_gather_multiple_return(): sig47file = utils.get_test_data("47.fa.sig") sig63file = utils.get_test_data("63.fa.sig") - sig2 = load_one_signature(sig2file, ksize=31) - sig47 = load_one_signature(sig47file, ksize=31) - sig63 = load_one_signature(sig63file, ksize=31) + sig2 = load_one_signature_from_json(sig2file, ksize=31) + sig47 = load_one_signature_from_json(sig47file, ksize=31) + sig63 = load_one_signature_from_json(sig63file, ksize=31) # construct LCA Database db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) @@ -3088,8 +3089,8 @@ def test_lca_db_protein_build(): "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig" ) - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="protein") assert db.insert(sig1) @@ -3119,8 +3120,8 @@ def test_lca_db_protein_save_load(c): "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig" ) - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="protein") assert db.insert(sig1) @@ -3187,8 +3188,8 @@ def test_lca_db_protein_command_index(runtmp, lca_db_format): db2 = x[0] assert db2.moltype == "protein" - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) # check reconstruction -- mh_list = [x.minhash for x in db2.signatures()] @@ -3230,8 +3231,8 @@ def test_lca_db_hp_build(): "prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig" ) - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="hp") assert db.insert(sig1) @@ -3261,8 +3262,8 @@ def test_lca_db_hp_save_load(c): "prot/hp/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig" ) - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="hp") assert db.insert(sig1) @@ -3327,8 +3328,8 @@ def test_lca_db_hp_command_index(runtmp, lca_db_format): db2 = x[0] assert db2.moltype == "hp" - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) # check reconstruction -- mh_list = [x.minhash for x in db2.signatures()] @@ -3370,8 +3371,8 @@ def test_lca_db_dayhoff_build(): "prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig" ) - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="dayhoff") assert db.insert(sig1) @@ -3401,8 +3402,8 @@ def test_lca_db_dayhoff_save_load(c): "prot/dayhoff/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig" ) - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="dayhoff") assert db.insert(sig1) @@ -3467,8 +3468,8 @@ def test_lca_db_dayhoff_command_index(runtmp, lca_db_format): db2 = x[0] assert db2.moltype == "dayhoff" - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) # check reconstruction -- mh_list = [x.minhash for x in db2.signatures()] @@ -3701,8 +3702,8 @@ def test_lca_db_protein_save_twice(runtmp, lca_db_format): "prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig" ) - sig1 = sourmash.load_one_signature(sigfile1) - sig2 = sourmash.load_one_signature(sigfile2) + sig1 = load_one_signature_from_json(sigfile1) + sig2 = load_one_signature_from_json(sigfile2) db = sourmash.lca.LCA_Database(ksize=19, scaled=100, moltype="protein") assert db.insert(sig1) diff --git a/tests/test_minhash.py b/tests/test_minhash.py index 474f1e231a..f181381485 100644 --- a/tests/test_minhash.py +++ b/tests/test_minhash.py @@ -1723,7 +1723,8 @@ def test_distance_matrix(track_abundance): import numpy siglist = [ - next(signature.load_signatures(utils.get_test_data(f))) for f in utils.SIG_FILES + next(signature.load_signatures_from_json(utils.get_test_data(f))) + for f in utils.SIG_FILES ] D1 = numpy.zeros([len(siglist), len(siglist)]) diff --git a/tests/test_np_utils.py b/tests/test_np_utils.py index e23ca361a0..d9edcb171f 100644 --- a/tests/test_np_utils.py +++ b/tests/test_np_utils.py @@ -1,5 +1,5 @@ import numpy as np -from sourmash.signature import SourmashSignature +from sourmash import SourmashSignature import sourmash from sourmash.np_utils import to_memmap diff --git a/tests/test_prefetch.py b/tests/test_prefetch.py index 44c6b4aac5..fc73a2d724 100644 --- a/tests/test_prefetch.py +++ b/tests/test_prefetch.py @@ -12,7 +12,12 @@ import sourmash_tst_utils as utils import sourmash from sourmash_tst_utils import SourmashCommandFailed -from sourmash import signature, sourmash_args +from sourmash import SourmashSignature, sourmash_args +from sourmash.signature import ( + save_signatures_to_json, + load_signatures_from_json, + load_one_signature_from_json, +) def approx_eq(val1, val2): @@ -332,7 +337,7 @@ def test_prefetch_matches(runtmp, linear_gather): expected_matches = [sig63, sig47] for match, expected in zip(sigs.signatures(), expected_matches): - ss = sourmash.load_one_signature(expected, ksize=31) + ss = load_one_signature_from_json(expected, ksize=31) assert match == ss @@ -343,8 +348,8 @@ def test_prefetch_matches_to_dir(runtmp, linear_gather): sig2 = utils.get_test_data("2.fa.sig") sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss63 = sourmash.load_one_signature(sig63) - ss47 = sourmash.load_one_signature(sig47) + ss63 = load_one_signature_from_json(sig63) + ss47 = load_one_signature_from_json(sig47) matches_out = c.output("matches_dir/") @@ -385,8 +390,8 @@ def test_prefetch_matches_to_sig_gz(runtmp, linear_gather): sig2 = utils.get_test_data("2.fa.sig") sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss63 = sourmash.load_one_signature(sig63) - ss47 = sourmash.load_one_signature(sig47) + ss63 = load_one_signature_from_json(sig63) + ss47 = load_one_signature_from_json(sig47) matches_out = c.output("matches.sig.gz") @@ -431,8 +436,8 @@ def test_prefetch_matches_to_zip(runtmp, linear_gather): sig2 = utils.get_test_data("2.fa.sig") sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") - ss63 = sourmash.load_one_signature(sig63) - ss47 = sourmash.load_one_signature(sig47) + ss63 = load_one_signature_from_json(sig63) + ss47 = load_one_signature_from_json(sig47) matches_out = c.output("matches.zip") @@ -496,14 +501,14 @@ def test_prefetch_matching_hashes(runtmp, linear_gather): assert c.last_result.status == 0 assert os.path.exists(matches_out) - ss47 = sourmash.load_one_signature(sig47, ksize=31) - ss63 = sourmash.load_one_signature(sig63, ksize=31) + ss47 = load_one_signature_from_json(sig47, ksize=31) + ss63 = load_one_signature_from_json(sig63, ksize=31) matches = set(ss47.minhash.hashes) & set(ss63.minhash.hashes) intersect = ss47.minhash.copy_and_clear() intersect.add_many(matches) - ss = sourmash.load_one_signature(matches_out) + ss = load_one_signature_from_json(matches_out) assert ss.name.endswith("-known") assert ss.minhash == intersect @@ -536,13 +541,13 @@ def test_prefetch_nomatch_hashes(runtmp, linear_gather): assert c.last_result.status == 0 assert os.path.exists(nomatch_out) - ss47 = sourmash.load_one_signature(sig47, ksize=31) - ss63 = sourmash.load_one_signature(sig63, ksize=31) + ss47 = load_one_signature_from_json(sig47, ksize=31) + ss63 = load_one_signature_from_json(sig63, ksize=31) remain = ss47.minhash.to_mutable() remain.remove_many(ss63.minhash.hashes) - ss = sourmash.load_one_signature(nomatch_out) + ss = load_one_signature_from_json(nomatch_out) assert ss.name.endswith("-unknown") assert ss.minhash == remain @@ -756,7 +761,7 @@ def test_prefetch_downsample_multiple(runtmp, linear_gather): query_sig = utils.get_test_data("GCF_000006945.2-s500.sig") # load in the hashes and do split them into four bins, randomly. - ss = sourmash.load_one_signature(query_sig) + ss = load_one_signature_from_json(query_sig) hashes = list(ss.minhash.hashes) random.seed(a=1) # fix seed so test is reproducible @@ -775,10 +780,10 @@ def test_prefetch_downsample_multiple(runtmp, linear_gather): gathersigs = [] for i in range(4): - binsig = signature.SourmashSignature(mh_bins[i], name=f"bin{i}") + binsig = SourmashSignature(mh_bins[i], name=f"bin{i}") with open(runtmp.output(f"bin{i}.sig"), "wb") as fp: - sourmash.save_signatures([binsig], fp) + save_signatures_to_json([binsig], fp) gathersigs.append(f"bin{i}.sig") diff --git a/tests/test_signature.py b/tests/test_signature.py index b82a02364e..6365b58856 100644 --- a/tests/test_signature.py +++ b/tests/test_signature.py @@ -5,11 +5,13 @@ import sourmash from sourmash.signature import ( SourmashSignature, - save_signatures, - load_signatures, - load_one_signature, FrozenSourmashSignature, ) +from sourmash.signature import ( + save_signatures_to_json, + load_signatures_from_json, + load_one_signature_from_json, +) import sourmash_tst_utils as utils from sourmash.minhash import MinHash, FrozenMinHash from sourmash_tst_utils import SourmashCommandFailed @@ -137,8 +139,8 @@ def test_roundtrip(track_abundance): e = MinHash(n=1, ksize=20, track_abundance=track_abundance) e.add_kmer("AT" * 10) sig = SourmashSignature(e) - s = save_signatures([sig]) - siglist = list(load_signatures(s)) + s = save_signatures_to_json([sig]) + siglist = list(load_signatures_from_json(s)) sig2 = siglist[0] assert sig.similarity(sig2) == 1.0 @@ -167,8 +169,8 @@ def test_load_signature_ksize_nonint(track_abundance): e = MinHash(n=1, ksize=20, track_abundance=track_abundance) e.add_kmer("AT" * 10) sig = SourmashSignature(e) - s = save_signatures([sig]) - siglist = list(load_signatures(s, ksize="20")) + s = save_signatures_to_json([sig]) + siglist = list(load_signatures_from_json(s, ksize="20")) sig2 = siglist[0] assert sig.similarity(sig2) == 1.0 @@ -180,8 +182,8 @@ def test_roundtrip_empty(track_abundance): e = MinHash(n=1, ksize=20, track_abundance=track_abundance) sig = SourmashSignature(e) - s = save_signatures([sig]) - siglist = list(load_signatures(s)) + s = save_signatures_to_json([sig]) + siglist = list(load_signatures_from_json(s)) sig2 = siglist[0] assert sig.similarity(sig2) == 0 @@ -192,8 +194,8 @@ def test_roundtrip_scaled(track_abundance): e = MinHash(n=0, ksize=20, track_abundance=track_abundance, max_hash=10) e.add_hash(5) sig = SourmashSignature(e) - s = save_signatures([sig]) - siglist = list(load_signatures(s)) + s = save_signatures_to_json([sig]) + siglist = list(load_signatures_from_json(s)) sig2 = siglist[0] e2 = sig2.minhash @@ -207,8 +209,8 @@ def test_roundtrip_seed(track_abundance): e = MinHash(n=1, ksize=20, track_abundance=track_abundance, seed=10) e.add_hash(5) sig = SourmashSignature(e) - s = save_signatures([sig]) - siglist = list(load_signatures(s)) + s = save_signatures_to_json([sig]) + siglist = list(load_signatures_from_json(s)) sig2 = siglist[0] e2 = sig2.minhash @@ -291,8 +293,8 @@ def test_save_load_multisig(track_abundance): e2 = MinHash(n=1, ksize=25, track_abundance=track_abundance) sig2 = SourmashSignature(e2) - x = save_signatures([sig1, sig2]) - y = list(load_signatures(x)) + x = save_signatures_to_json([sig1, sig2]) + y = list(load_signatures_from_json(x)) print(x) @@ -303,19 +305,19 @@ def test_save_load_multisig(track_abundance): def test_load_one_fail_nosig(track_abundance): - x = save_signatures([]) + x = save_signatures_to_json([]) print((x,)) with pytest.raises(ValueError): - load_one_signature(x) + load_one_signature_from_json(x) def test_load_one_succeed(track_abundance): e1 = MinHash(n=1, ksize=20, track_abundance=track_abundance) sig1 = SourmashSignature(e1) - x = save_signatures([sig1]) + x = save_signatures_to_json([sig1]) - y = load_one_signature(x) + y = load_one_signature_from_json(x) assert sig1 == y @@ -326,10 +328,10 @@ def test_load_one_fail_multisig(track_abundance): e2 = MinHash(n=1, ksize=20, track_abundance=track_abundance) sig2 = SourmashSignature(e2) - x = save_signatures([sig1, sig2]) + x = save_signatures_to_json([sig1, sig2]) with pytest.raises(ValueError): - load_one_signature(x) + load_one_signature_from_json(x) def test_save_minified(track_abundance): @@ -339,11 +341,11 @@ def test_save_minified(track_abundance): e2 = MinHash(n=1, ksize=25, track_abundance=track_abundance) sig2 = SourmashSignature(e2, name="bar baz") - x = save_signatures([sig1, sig2]) + x = save_signatures_to_json([sig1, sig2]) assert b"\n" not in x assert len(x.split(b"\n")) == 1 - y = list(load_signatures(x)) + y = list(load_signatures_from_json(x)) assert len(y) == 2 assert any(sig.name == "foo" for sig in y) assert any(sig.name == "bar baz" for sig in y) @@ -351,9 +353,9 @@ def test_save_minified(track_abundance): def test_load_minified(track_abundance): sigfile = utils.get_test_data("genome-s10+s11.sig") - sigs = load_signatures(sigfile) + sigs = load_signatures_from_json(sigfile) - minified = save_signatures(sigs) + minified = save_signatures_to_json(sigs) with open(sigfile) as f: orig_file = f.read() assert len(minified) < len(orig_file) @@ -364,13 +366,13 @@ def test_load_compressed(track_abundance): e1 = MinHash(n=1, ksize=20, track_abundance=track_abundance) sig1 = SourmashSignature(e1) - x = save_signatures([sig1], compression=5) + x = save_signatures_to_json([sig1], compression=5) - y = load_one_signature(x) + y = load_one_signature_from_json(x) assert sig1 == y sigfile = utils.get_test_data("genome-s10+s11.sig.gz") - load_signatures(sigfile) + load_signatures_from_json(sigfile) def test_binary_fp(tmpdir, track_abundance): @@ -380,19 +382,19 @@ def test_binary_fp(tmpdir, track_abundance): path = tmpdir.join("1.sig") with open(str(path), "wb") as fp: sig = SourmashSignature(e) - save_signatures([sig], fp) + save_signatures_to_json([sig], fp) -def test_load_signatures_no_file_do_raise(tmpdir): +def test_load_signatures_from_json_no_file_do_raise(tmpdir): path = tmpdir.join("dne.sig") - siglist = load_signatures(path, do_raise=True) + siglist = load_signatures_from_json(path, do_raise=True) with pytest.raises(Exception): list(siglist) -def test_load_signatures_no_file_do_not_raise(tmpdir): +def test_load_signatures_from_json_no_file_do_not_raise(tmpdir): path = tmpdir.join("dne.sig") - siglist = load_signatures(path) + siglist = load_signatures_from_json(path) siglist = list(siglist) assert not siglist @@ -446,8 +448,8 @@ def test_max_containment_equal(): def test_containment_ANI(): f1 = utils.get_test_data("2.fa.sig") f2 = utils.get_test_data("2+63.fa.sig") - ss1 = sourmash.load_one_signature(f1, ksize=31) - ss2 = sourmash.load_one_signature(f2, ksize=31) + ss1 = sourmash.load_one_signature_from_json(f1, ksize=31) + ss2 = sourmash.load_one_signature_from_json(f2, ksize=31) s1_cont_s2 = ss1.containment_ani(ss2, estimate_ci=True) s2_cont_s1 = ss2.containment_ani(ss1, estimate_ci=True) @@ -482,8 +484,8 @@ def test_containment_ANI(): def test_containment_ANI_precalc_containment(): f1 = utils.get_test_data("47+63.fa.sig") f2 = utils.get_test_data("2+63.fa.sig") - ss1 = sourmash.load_one_signature(f1, ksize=31) - ss2 = sourmash.load_one_signature(f2, ksize=31) + ss1 = sourmash.load_one_signature_from_json(f1, ksize=31) + ss2 = sourmash.load_one_signature_from_json(f2, ksize=31) # precalc containments and assert same results s1c = ss1.contained_by(ss2) s2c = ss2.contained_by(ss1) @@ -505,8 +507,8 @@ def test_containment_ANI_precalc_containment(): def test_avg_containment(): f1 = utils.get_test_data("47+63.fa.sig") f2 = utils.get_test_data("2+63.fa.sig") - ss1 = sourmash.load_one_signature(f1, ksize=31) - ss2 = sourmash.load_one_signature(f2, ksize=31) + ss1 = sourmash.load_one_signature_from_json(f1, ksize=31) + ss2 = sourmash.load_one_signature_from_json(f2, ksize=31) # check average_containment_ani ac_s1 = ss1.avg_containment(ss2) ac_s2 = ss2.avg_containment(ss1) @@ -521,8 +523,8 @@ def test_avg_containment(): def test_avg_containment_ani(): f1 = utils.get_test_data("47+63.fa.sig") f2 = utils.get_test_data("2+63.fa.sig") - ss1 = sourmash.load_one_signature(f1, ksize=31) - ss2 = sourmash.load_one_signature(f2, ksize=31) + ss1 = sourmash.load_one_signature_from_json(f1, ksize=31) + ss2 = sourmash.load_one_signature_from_json(f2, ksize=31) # check average_containment_ani ac_s1 = ss1.avg_containment_ani(ss2) ac_s2 = ss2.avg_containment_ani(ss1) @@ -536,8 +538,8 @@ def test_avg_containment_ani(): def test_containment_ANI_downsample(): f2 = utils.get_test_data("2+63.fa.sig") f3 = utils.get_test_data("47+63.fa.sig") - ss2 = sourmash.load_one_signature(f2, ksize=31) - ss3 = sourmash.load_one_signature(f3, ksize=31) + ss2 = sourmash.load_one_signature_from_json(f2, ksize=31) + ss3 = sourmash.load_one_signature_from_json(f3, ksize=31) # check that downsampling works properly print(ss2.minhash.scaled) @@ -571,8 +573,8 @@ def test_containment_ANI_downsample(): def test_jaccard_ANI(): f1 = utils.get_test_data("2.fa.sig") f2 = utils.get_test_data("2+63.fa.sig") - ss1 = sourmash.load_one_signature(f1, ksize=31) - ss2 = sourmash.load_one_signature(f2) + ss1 = sourmash.load_one_signature_from_json(f1, ksize=31) + ss2 = sourmash.load_one_signature_from_json(f2) print("\nJACCARD_ANI", ss1.jaccard_ani(ss2)) @@ -590,8 +592,8 @@ def test_jaccard_ANI(): def test_jaccard_ANI_untrustworthy(): f1 = utils.get_test_data("2.fa.sig") f2 = utils.get_test_data("2+63.fa.sig") - ss1 = sourmash.load_one_signature(f1, ksize=31) - ss2 = sourmash.load_one_signature(f2) + ss1 = sourmash.load_one_signature_from_json(f1, ksize=31) + ss2 = sourmash.load_one_signature_from_json(f2) print("\nJACCARD_ANI", ss1.jaccard_ani(ss2)) @@ -608,8 +610,8 @@ def test_jaccard_ANI_untrustworthy(): def test_jaccard_ANI_precalc_jaccard(): f1 = utils.get_test_data("47+63.fa.sig") f2 = utils.get_test_data("2+63.fa.sig") - ss1 = sourmash.load_one_signature(f1, ksize=31) - ss2 = sourmash.load_one_signature(f2) + ss1 = sourmash.load_one_signature_from_json(f1, ksize=31) + ss2 = sourmash.load_one_signature_from_json(f2) # precalc jaccard and assert same result jaccard = ss1.jaccard(ss2) print("\nJACCARD_ANI", ss1.jaccard_ani(ss2, jaccard=jaccard)) @@ -626,8 +628,8 @@ def test_jaccard_ANI_precalc_jaccard(): def test_jaccard_ANI_downsample(): f1 = utils.get_test_data("47+63.fa.sig") f2 = utils.get_test_data("2+63.fa.sig") - ss1 = sourmash.load_one_signature(f1, ksize=31) - ss2 = sourmash.load_one_signature(f2) + ss1 = sourmash.load_one_signature_from_json(f1, ksize=31) + ss2 = sourmash.load_one_signature_from_json(f2) print(ss1.minhash.scaled) ss1 = ss1.to_mutable() diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py index 23647e517b..c3f0858395 100644 --- a/tests/test_sourmash.py +++ b/tests/test_sourmash.py @@ -23,6 +23,8 @@ from sourmash.sbt import SBT, Node from sourmash.sbtmh import SigLeaf, load_sbt_index from sourmash.search import SearchResult, GatherResult +from sourmash.signature import load_one_signature_from_json as load_one_signature +from sourmash.signature import load_signatures_from_json try: import matplotlib @@ -161,7 +163,7 @@ def test_compare_serial(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) cmp_calc = numpy.zeros([len(sigs), len(sigs)]) for i, si in enumerate(sigs): @@ -170,7 +172,7 @@ def test_compare_serial(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) assert (cmp_out == cmp_calc).all() @@ -189,7 +191,7 @@ def test_compare_serial_distance(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) cmp_calc = numpy.zeros([len(sigs), len(sigs)]) for i, si in enumerate(sigs): @@ -198,7 +200,7 @@ def test_compare_serial_distance(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) assert (cmp_out == cmp_calc).all() @@ -219,7 +221,7 @@ def test_compare_parallel(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) cmp_calc = numpy.zeros([len(sigs), len(sigs)]) for i, si in enumerate(sigs): @@ -228,7 +230,7 @@ def test_compare_parallel(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) assert (cmp_out == cmp_calc).all() @@ -252,7 +254,7 @@ def test_compare_do_serial_compare_with_from_file(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) cmp_calc = numpy.zeros([len(sigs), len(sigs)]) for i, si in enumerate(sigs): @@ -261,7 +263,7 @@ def test_compare_do_serial_compare_with_from_file(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) assert numpy.array_equal(numpy.sort(cmp_out.flat), numpy.sort(cmp_calc.flat)) @@ -281,7 +283,7 @@ def test_compare_do_basic_compare_using_rna_arg(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) cmp_calc = numpy.zeros([len(sigs), len(sigs)]) for i, si in enumerate(sigs): @@ -305,7 +307,7 @@ def test_compare_do_basic_using_nucleotide_arg(runtmp): sigs = [] for fn in testsigs: - sigs.append(sourmash.load_one_signature(fn, ksize=21, select_moltype="dna")) + sigs.append(load_one_signature(fn, ksize=21, select_moltype="dna")) cmp_calc = numpy.zeros([len(sigs), len(sigs)]) for i, si in enumerate(sigs): @@ -638,7 +640,7 @@ def _load_compare_matrix_and_sigs(compare_csv, sigfiles, *, ksize=31): # load in all the input signatures idx_to_sig = {} for idx, filename in enumerate(sigfiles): - ss = sourmash.load_one_signature(filename, ksize=ksize) + ss = load_one_signature(filename, ksize=ksize) idx_to_sig[idx] = ss return mat, idx_to_sig @@ -2198,9 +2200,9 @@ def test_search_containment_abund(runtmp): # save! with open(runtmp.output("a.sig"), "w") as fp: - sourmash.save_signatures([x], fp) + signature.save_signatures_to_json([x], fp) with open(runtmp.output("b.sig"), "w") as fp: - sourmash.save_signatures([y], fp) + signature.save_signatures_to_json([y], fp) # run sourmash search --containment with pytest.raises(SourmashCommandFailed) as exc: @@ -2242,9 +2244,9 @@ def test_search_containment_abund_ignore(runtmp): # save! with open(runtmp.output("a.sig"), "w") as fp: - sourmash.save_signatures([x], fp) + signature.save_signatures_to_json([x], fp) with open(runtmp.output("b.sig"), "w") as fp: - sourmash.save_signatures([y], fp) + signature.save_signatures_to_json([y], fp) # run sourmash search runtmp.sourmash( @@ -3313,7 +3315,7 @@ def test_do_sourmash_index_abund(c): testdata2 = utils.get_test_data("lca-root/TOBG_MED-875.fna.gz.sig") with open(testdata2): - ss = sourmash.load_one_signature(testdata2, ksize=31) + ss = load_one_signature(testdata2, ksize=31) assert ss.minhash.track_abundance == True sbtname = "foo" @@ -3688,7 +3690,7 @@ def test_do_sourmash_check_sbt_filenames(runtmp): sig_names = set() sig_md5s = set() for f in files: - sig = signature.load_one_signature(f) + sig = load_one_signature(f) sig_names.add(sig.name) sig_md5s.add(sig.md5sum()) @@ -3839,10 +3841,10 @@ def test_compare_with_abundance_1(runtmp): s2 = signature.SourmashSignature(E2, filename="e2", name="e2") with open(runtmp.output("e1.sig"), "w") as f: - signature.save_signatures([s1], f) + signature.save_signatures_to_json([s1], f) with open(runtmp.output("e2.sig"), "w") as f: - signature.save_signatures([s2], f) + signature.save_signatures_to_json([s2], f) runtmp.sourmash("search", "e1.sig", "e2.sig", "-k", "5") @@ -3863,10 +3865,10 @@ def test_compare_with_abundance_2(runtmp): s2 = signature.SourmashSignature(E2, filename="e2", name="e2") with open(runtmp.output("e1.sig"), "w") as f: - signature.save_signatures([s1], f) + signature.save_signatures_to_json([s1], f) with open(runtmp.output("e2.sig"), "w") as f: - signature.save_signatures([s2], f) + signature.save_signatures_to_json([s2], f) runtmp.sourmash("search", "e1.sig", "e2.sig", "-k", "5") @@ -3888,10 +3890,10 @@ def test_compare_with_abundance_3(runtmp): s2 = signature.SourmashSignature(E2, filename="e2", name="e2") with open(runtmp.output("e1.sig"), "w") as f: - signature.save_signatures([s1], f) + signature.save_signatures_to_json([s1], f) with open(runtmp.output("e2.sig"), "w") as f: - signature.save_signatures([s2], f) + signature.save_signatures_to_json([s2], f) runtmp.sourmash("search", "e1.sig", "e2.sig", "-k", "5") @@ -4417,7 +4419,7 @@ def test_gather_f_match_orig(runtmp, linear_gather, prefetch_gather): print(runtmp.last_result.out) print(runtmp.last_result.err) - combined_sig = sourmash.load_one_signature(testdata_combined, ksize=21) + combined_sig = load_one_signature(testdata_combined, ksize=21) remaining_mh = combined_sig.minhash.to_mutable() def approx_equal(a, b, n=5): @@ -4434,7 +4436,7 @@ def approx_equal(a, b, n=5): # double check -- should match 'search --containment'. # (this is kind of useless for a 1.0 contained_by, I guess) filename = row["filename"] - match = sourmash.load_one_signature(filename, ksize=21) + match = load_one_signature(filename, ksize=21) assert match.contained_by(combined_sig) == 1.0 # check other fields, too. @@ -5439,12 +5441,12 @@ def test_multigather_metagenome_output_unique(runtmp): # change 'filename' on 'combined.sig' to something else orig_query_sig = utils.get_test_data("gather/combined.sig") - sketch = sourmash.load_one_signature(orig_query_sig) + sketch = load_one_signature(orig_query_sig) ss = signature.SourmashSignature(sketch.minhash, filename="named_query") query_sig = runtmp.output("the_query.sig") with open(query_sig, "w") as f: - signature.save_signatures([ss], f) + signature.save_signatures_to_json([ss], f) cmd = f"multigather --query {query_sig} --db {testdata_sigs_arg} -k 21 --threshold-bp=0 -U" cmd = cmd.split(" ") @@ -5736,8 +5738,8 @@ def test_gather_metagenome_output_unassigned_nomatches( print(c.last_result.out) assert "No matches found for --threshold-bp at 50.0 kbp." in c.last_result.err - x = sourmash.load_one_signature(query_sig, ksize=31) - y = sourmash.load_one_signature(c.output("foo.sig")) + x = load_one_signature(query_sig, ksize=31) + y = load_one_signature(c.output("foo.sig")) assert x.minhash == y.minhash @@ -5771,8 +5773,8 @@ def test_gather_metagenome_output_unassigned_nomatches_protein( c.run_sourmash("sig", "describe", c.output("foo.sig")) print(c.last_result.out) - x = sourmash.load_one_signature(query_sig, ksize=57) - y = sourmash.load_one_signature(c.output("foo.sig")) + x = load_one_signature(query_sig, ksize=57) + y = load_one_signature(c.output("foo.sig")) assert x.minhash == y.minhash assert y.minhash.moltype == "protein" @@ -5979,7 +5981,7 @@ def test_gather_downsample_multiple(runtmp, linear_gather, prefetch_gather): query_sig = utils.get_test_data("GCF_000006945.2-s500.sig") # load in the hashes and do split them into four bins, randomly. - ss = sourmash.load_one_signature(query_sig) + ss = load_one_signature(query_sig) hashes = list(ss.minhash.hashes) random.seed(a=1) # fix seed so test is reproducible @@ -6001,7 +6003,7 @@ def test_gather_downsample_multiple(runtmp, linear_gather, prefetch_gather): binsig = signature.SourmashSignature(mh_bins[i], name=f"bin{i}") with open(runtmp.output(f"bin{i}.sig"), "wb") as fp: - sourmash.save_signatures([binsig], fp) + sourmash.save_signatures_to_json([binsig], fp) gathersigs.append(f"bin{i}.sig") @@ -6461,7 +6463,7 @@ def test_gather_abund_10_1(runtmp, prefetch_gather, linear_gather): for prod, f_weighted in zip(weighted_calc, f_weighted_list): assert prod / total_weighted == f_weighted, (prod, f_weighted) - query_sig = sourmash.load_one_signature(query) + query_sig = load_one_signature(query) query_mh = query_sig.minhash total_bp_analyzed = sum(unique_overlaps) + remaining_bps[-1] @@ -6569,11 +6571,11 @@ def test_gather_output_unassigned_with_abundance( assert os.path.exists(c.output("unassigned.sig")) - nomatch = sourmash.load_one_signature(c.output("unassigned.sig")) + nomatch = load_one_signature(c.output("unassigned.sig")) assert nomatch.minhash.track_abundance - query_ss = sourmash.load_one_signature(query) - against_ss = sourmash.load_one_signature(against) + query_ss = load_one_signature(query) + against_ss = load_one_signature(against) # unassigned should have nothing that is in the database nomatch_mh = nomatch.minhash @@ -6663,8 +6665,8 @@ def test_multigather_output_unassigned_with_abundance(runtmp, sig_save_extension nomatch = list(nomatch)[0] assert nomatch.minhash.track_abundance - query_ss = sourmash.load_one_signature(query) - against_ss = sourmash.load_one_signature(against) + query_ss = load_one_signature(query) + against_ss = load_one_signature(against) # unassigned should have nothing that is in the database nomatch_mh = nomatch.minhash @@ -7208,7 +7210,7 @@ def test_license_cc0(runtmp): sigfile = runtmp.output("short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert str(sig).endswith("short.fa") assert sig.license == "CC0" @@ -7231,7 +7233,7 @@ def test_license_load_non_cc0(): sigfile = utils.get_test_data("bad-license.sig") try: - next(signature.load_signatures(sigfile, do_raise=True)) + next(load_signatures_from_json(sigfile, do_raise=True)) except Exception as e: assert "sourmash only supports CC0-licensed signatures" in str(e) @@ -8146,8 +8148,8 @@ def test_search_jaccard_ani_downsample(runtmp): sig47 = utils.get_test_data("47.fa.sig") sig4763 = utils.get_test_data("47+63.fa.sig") - ss47 = sourmash.load_one_signature(sig47) - ss4763 = sourmash.load_one_signature(sig4763) + ss47 = load_one_signature(sig47) + ss4763 = load_one_signature(sig4763) print(f"SCALED: sig1: {ss47.minhash.scaled}, sig2: {ss4763.minhash.scaled}") c.run_sourmash("search", sig47, sig4763, "-o", "xxx.csv") @@ -8188,7 +8190,7 @@ def test_search_jaccard_ani_downsample(runtmp): assert round(float(row["ani"]), 3) == 0.993 # downsample manually and assert same ANI - ss47_ds = signature.load_one_signature(ds_sig47) + ss47_ds = load_one_signature(ds_sig47) print("SCALED:", ss47_ds.minhash.scaled, ss4763.minhash.scaled) ani_info = ss47_ds.jaccard_ani(ss4763, downsample=True) print(ani_info) @@ -8403,7 +8405,7 @@ def test_compare_containment_ani_asymmetry(runtmp): # load in all the input signatures idx_to_sig = {} for idx, filename in enumerate(testdata_sigs): - ss = sourmash.load_one_signature(filename, ksize=31) + ss = load_one_signature(filename, ksize=31) idx_to_sig[idx] = ss # check explicit containment against output of compare diff --git a/tests/test_sourmash_args.py b/tests/test_sourmash_args.py index 8f58a0a25f..916cabb1e3 100644 --- a/tests/test_sourmash_args.py +++ b/tests/test_sourmash_args.py @@ -19,6 +19,8 @@ from sourmash.index import LinearIndex from sourmash.cli.utils import add_ksize_arg +from sourmash.signature import load_signatures_from_json, save_signatures_to_json + def test_save_signatures_api_none(): # save to sigfile @@ -69,7 +71,7 @@ def test_save_signatures_to_location_1_stdout(): output = output_capture.getvalue() - saved = list(sourmash.signature.load_signatures(output)) + saved = list(load_signatures_from_json(output)) assert ss2 in saved assert ss47 in saved assert len(saved) == 2 @@ -88,7 +90,7 @@ def test_save_signatures_to_location_1_sig_is_default(runtmp): save_sig.add(ss2) save_sig.add(ss47) - saved = list(sourmash.signature.load_signatures(outloc)) + saved = list(load_signatures_from_json(outloc)) assert ss2 in saved assert ss47 in saved assert len(saved) == 2 @@ -281,7 +283,7 @@ def test_save_signatures_to_location_3_zip_add_fail(runtmp): outloc = runtmp.output("foo.zip") with zipfile.ZipFile(outloc, "x") as zf: with zf.open("xyz.sig", "w") as fp: - sourmash.save_signatures([ss2], fp=fp, compression=1) + save_signatures_to_json([ss2], fp=fp, compression=1) # verify it can be loaded, yada yada saved = list(sourmash.load_file_as_signatures(outloc)) @@ -307,7 +309,7 @@ def test_save_signatures_to_location_3_zip_add_with_manifest(runtmp): outloc = runtmp.output("foo.zip") with zipfile.ZipFile(outloc, "x") as zf: with zf.open("xyz.sig", "w") as fp: - sourmash.save_signatures([ss2], fp=fp, compression=1) + save_signatures_to_json([ss2], fp=fp, compression=1) # make a manifest row... row = manifest.CollectionManifest.make_manifest_row( diff --git a/tests/test_sourmash_compute.py b/tests/test_sourmash_compute.py index f6f6370785..ddab496cbb 100644 --- a/tests/test_sourmash_compute.py +++ b/tests/test_sourmash_compute.py @@ -24,6 +24,8 @@ from sourmash import VERSION from sourmash_tst_utils import SourmashCommandFailed +from sourmash.signature import load_signatures_from_json + def test_do_sourmash_compute(): with utils.TempDirectory() as location: @@ -35,7 +37,7 @@ def test_do_sourmash_compute(): sigfile = os.path.join(location, "short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert str(sig).endswith("short.fa") @@ -127,7 +129,7 @@ def test_do_sourmash_compute_outdir(c): sigfile = os.path.join(c.location, "short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert str(sig).endswith("short.fa") @@ -269,7 +271,7 @@ def test_do_sourmash_compute_singleton(): sigfile = os.path.join(location, "short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert sig.name.endswith("shortName") @@ -285,7 +287,7 @@ def test_do_sourmash_compute_name(): sigfile = os.path.join(location, "foo.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert sig.name == "foo" status, out, err = utils.runscript( @@ -297,7 +299,7 @@ def test_do_sourmash_compute_name(): sigfile2 = os.path.join(location, "foo2.sig") assert os.path.exists(sigfile2) - sig2 = next(signature.load_signatures(sigfile)) + sig2 = next(load_signatures_from_json(sigfile)) assert sig2.name == "foo" assert sig.name == sig2.name @@ -346,7 +348,7 @@ def test_do_sourmash_compute_name_from_first(): sigfile = os.path.join(location, "short3.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert sig.name == "firstname" @@ -359,7 +361,7 @@ def test_do_sourmash_compute_multik(): outfile = os.path.join(location, "short.fa.sig") assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 21 in ksizes @@ -380,7 +382,7 @@ def test_do_sourmash_compute_multik_with_protein(): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 4 ksizes = set([x.minhash.ksize for x in siglist]) assert 21 in ksizes @@ -407,7 +409,7 @@ def test_do_sourmash_compute_multik_with_dayhoff(): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -429,7 +431,7 @@ def test_do_sourmash_compute_multik_with_dayhoff_and_dna(): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 4 ksizes = set([x.minhash.ksize for x in siglist]) assert 21 in ksizes @@ -458,7 +460,7 @@ def test_do_sourmash_compute_multik_with_hp(): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -480,7 +482,7 @@ def test_do_sourmash_compute_multik_with_hp_and_dna(): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 4 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -503,7 +505,7 @@ def test_do_sourmash_compute_multik_with_dayhoff_dna_protein(): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 6 ksizes = set([x.minhash.ksize for x in siglist]) assert 21 in ksizes @@ -529,7 +531,7 @@ def test_do_sourmash_compute_multik_with_dayhoff_hp_dna_protein(): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 8 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -581,7 +583,7 @@ def test_do_sourmash_compute_multik_only_protein(c): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -622,7 +624,7 @@ def test_do_sourmash_compute_multik_only_protein_no_rna(c): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -644,7 +646,7 @@ def test_do_sourmash_compute_protein_bad_sequences(): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -665,7 +667,7 @@ def test_do_sourmash_compute_multik_input_is_protein(): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -688,7 +690,7 @@ def test_do_sourmash_compute_multik_outfile(): ) assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 21 in ksizes @@ -706,7 +708,7 @@ def test_do_sourmash_compute_with_scaled_1(): ) assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 scaled_vals = [x.minhash.scaled for x in siglist] @@ -725,7 +727,7 @@ def test_do_sourmash_compute_with_scaled_2(): ) assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 max_hashes = [x.minhash._max_hash for x in siglist] @@ -744,7 +746,7 @@ def test_do_sourmash_compute_with_scaled(): ) assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 max_hashes = [x.minhash._max_hash for x in siglist] @@ -797,7 +799,7 @@ def test_do_sourmash_compute_with_seed(): ) assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 seeds = [x.minhash.seed for x in siglist] @@ -826,11 +828,11 @@ def test_do_sourmash_check_protein_comparisons(): sig2 = os.path.join(location, "ecoli.genes.fna.sig") assert os.path.exists(sig2) - # I'm not sure why load_signatures is randomizing order, but ok. - x = list(signature.load_signatures(sig1)) + # I'm not sure why load_signatures_from_json is randomizing order, but ok. + x = list(load_signatures_from_json(sig1)) sig1_aa, sig2_aa = sorted(x, key=lambda x: x.name) - x = list(signature.load_signatures(sig2)) + x = list(load_signatures_from_json(sig2)) sig1_trans, sig2_trans = sorted(x, key=lambda x: x.name) name1 = sig1_aa.name.split()[0] @@ -862,13 +864,13 @@ def test_do_sourmash_check_knowngood_dna_comparisons(c): sig1 = c.output("ecoli.genes.fna.sig") assert os.path.exists(sig1) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) sig1, sig2 = sorted(x, key=lambda x: x.name) print(sig1.name) print(sig2.name) knowngood = utils.get_test_data("benchmark.dna.sig") - good = list(signature.load_signatures(knowngood))[0] + good = list(load_signatures_from_json(knowngood))[0] assert sig2.similarity(good) == 1.0 @@ -881,11 +883,11 @@ def test_do_sourmash_check_knowngood_dna_comparisons_use_rna(c): sig1 = c.output("ecoli.genes.fna.sig") assert os.path.exists(sig1) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) sig1, sig2 = sorted(x, key=lambda x: x.name) knowngood = utils.get_test_data("benchmark.dna.sig") - good = list(signature.load_signatures(knowngood))[0] + good = list(load_signatures_from_json(knowngood))[0] assert sig2.similarity(good) == 1.0 @@ -903,11 +905,11 @@ def test_do_sourmash_check_knowngood_input_protein_comparisons(): sig1 = os.path.join(location, "ecoli.faa.sig") assert os.path.exists(sig1) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) sig1_aa, sig2_aa = sorted(x, key=lambda x: x.name) knowngood = utils.get_test_data("benchmark.input_prot.sig") - good_aa = list(signature.load_signatures(knowngood))[0] + good_aa = list(load_signatures_from_json(knowngood))[0] assert sig1_aa.similarity(good_aa) == 1.0 @@ -925,11 +927,11 @@ def test_do_sourmash_check_knowngood_protein_comparisons(): sig1 = os.path.join(location, "ecoli.genes.fna.sig") assert os.path.exists(sig1) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) sig1_trans, sig2_trans = sorted(x, key=lambda x: x.name) knowngood = utils.get_test_data("benchmark.prot.sig") - good_trans = list(signature.load_signatures(knowngood))[0] + good_trans = list(load_signatures_from_json(knowngood))[0] assert sig2_trans.similarity(good_trans) == 1.0 diff --git a/tests/test_sourmash_sketch.py b/tests/test_sourmash_sketch.py index 98448e4d6b..550b24a948 100644 --- a/tests/test_sourmash_sketch.py +++ b/tests/test_sourmash_sketch.py @@ -26,6 +26,8 @@ from sourmash.command_sketch import _signatures_for_sketch_factory from sourmash_tst_utils import SourmashCommandFailed +from sourmash.signature import load_one_signature_from_json as load_one_signature +from sourmash.signature import load_signatures_from_json def test_do_sourmash_sketch_check_scaled_bounds_negative(runtmp): @@ -601,7 +603,7 @@ def test_do_sourmash_sketchdna(runtmp): sigfile = runtmp.output("short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert str(sig).endswith("short.fa") @@ -612,7 +614,7 @@ def test_do_sourmash_sketchdna_check_sequence_succeed(runtmp): sigfile = runtmp.output("short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert str(sig).endswith("short.fa") @@ -656,7 +658,7 @@ def test_do_sourmash_sketchdna_from_file(runtmp): sigfile = runtmp.output("short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert str(sig).endswith("short.fa") @@ -709,7 +711,7 @@ def test_do_sourmash_sketchdna_outdir(c): sigfile = os.path.join(c.location, "short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert str(sig).endswith("short.fa") @@ -724,7 +726,7 @@ def test_do_sourmash_sketchdna_output_dir(c): sigfile = os.path.join(c.location, "short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert str(sig).endswith("short.fa") @@ -865,7 +867,7 @@ def test_do_sourmash_sketchdna_singleton(runtmp): sigfile = runtmp.output("short.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert str(sig).endswith("shortName") @@ -876,7 +878,7 @@ def test_do_sourmash_sketchdna_name(runtmp): sigfile = runtmp.output("foo.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert sig.name == "foo" runtmp.sourmash("sketch", "dna", "--name", "foo", testdata1, "-o", "foo2.sig") @@ -884,7 +886,7 @@ def test_do_sourmash_sketchdna_name(runtmp): sigfile2 = runtmp.output("foo2.sig") assert os.path.exists(sigfile2) - sig2 = next(signature.load_signatures(sigfile)) + sig2 = next(load_signatures_from_json(sigfile)) assert sig2.name == "foo" assert sig.name == sig2.name @@ -918,7 +920,7 @@ def test_do_sourmash_sketchdna_name_from_first(runtmp): sigfile = runtmp.output("short3.fa.sig") assert os.path.exists(sigfile) - sig = next(signature.load_signatures(sigfile)) + sig = next(load_signatures_from_json(sigfile)) assert sig.name == "firstname" @@ -929,7 +931,7 @@ def test_do_sourmash_sketchdna_multik(runtmp): outfile = runtmp.output("short.fa.sig") assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 21 in ksizes @@ -984,7 +986,7 @@ def test_do_sketch_translate_multik_with_protein(runtmp): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -1007,7 +1009,7 @@ def test_do_sketch_translate_multik_with_protein_from_file(runtmp): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -1025,7 +1027,7 @@ def test_do_sketch_translate_multik_with_dayhoff(runtmp): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -1042,7 +1044,7 @@ def test_do_sketch_translate_multik_with_hp(runtmp): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -1060,7 +1062,7 @@ def test_do_sourmash_sketch_translate_multik_only_protein(c): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -1077,7 +1079,7 @@ def test_do_sourmash_sketch_translate_bad_sequences(runtmp): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -1093,7 +1095,7 @@ def test_do_sketch_protein_multik_input(runtmp): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -1120,7 +1122,7 @@ def test_do_sketch_protein_multik_input_from_file(runtmp): with open(outfile) as fp: sigdata = fp.read() - siglist = list(signature.load_signatures(sigdata)) + siglist = list(load_signatures_from_json(sigdata)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 7 in ksizes @@ -1139,7 +1141,7 @@ def test_do_sourmash_sketchdna_multik_outfile(runtmp): assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 ksizes = set([x.minhash.ksize for x in siglist]) assert 21 in ksizes @@ -1156,7 +1158,7 @@ def test_do_sourmash_sketchdna_with_scaled_1(runtmp): assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 scaled_vals = [x.minhash.scaled for x in siglist] @@ -1174,7 +1176,7 @@ def test_do_sourmash_sketchdna_with_scaled_2(runtmp): assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 max_hashes = [x.minhash._max_hash for x in siglist] @@ -1192,7 +1194,7 @@ def test_do_sourmash_sketchdna_with_scaled(runtmp): assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 max_hashes = [x.minhash._max_hash for x in siglist] @@ -1244,7 +1246,7 @@ def test_do_sketch_with_seed(runtmp): assert os.path.exists(outfile) - siglist = list(signature.load_signatures(outfile)) + siglist = list(load_signatures_from_json(outfile)) assert len(siglist) == 2 seeds = [x.minhash.seed for x in siglist] @@ -1269,11 +1271,11 @@ def test_do_sourmash_check_protein_comparisons(runtmp): sig2 = runtmp.output("ecoli.genes.fna.sig") assert os.path.exists(sig2) - # I'm not sure why load_signatures is randomizing order, but ok. - x = list(signature.load_signatures(sig1)) + # I'm not sure why load_signatures_from_json is randomizing order, but ok. + x = list(load_signatures_from_json(sig1)) sig1_aa, sig2_aa = sorted(x, key=lambda x: x.name) - x = list(signature.load_signatures(sig2)) + x = list(load_signatures_from_json(sig2)) sig1_trans, sig2_trans = sorted(x, key=lambda x: x.name) name1 = sig1_aa.name.split()[0] @@ -1305,14 +1307,14 @@ def test_do_sourmash_check_knowngood_dna_comparisons(c): sig1 = c.output("ecoli.genes.fna.sig") assert os.path.exists(sig1) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) sig1, sig2 = sorted(x, key=lambda x: x.name) print(sig1.name) print(sig2.name) knowngood = utils.get_test_data("benchmark.dna.sig") - good = list(signature.load_signatures(knowngood))[0] + good = list(load_signatures_from_json(knowngood))[0] assert sig2.similarity(good) == 1.0 @@ -1325,11 +1327,11 @@ def test_do_sourmash_check_knowngood_dna_comparisons_use_rna(c): sig1 = c.output("ecoli.genes.fna.sig") assert os.path.exists(sig1) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) sig1, sig2 = sorted(x, key=lambda x: x.name) knowngood = utils.get_test_data("benchmark.dna.sig") - good = list(signature.load_signatures(knowngood))[0] + good = list(load_signatures_from_json(knowngood))[0] assert sig2.similarity(good) == 1.0 @@ -1344,11 +1346,11 @@ def test_do_sourmash_check_knowngood_input_protein_comparisons(runtmp): sig1 = runtmp.output("ecoli.faa.sig") assert os.path.exists(sig1) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) sig1_aa, sig2_aa = sorted(x, key=lambda x: x.name) knowngood = utils.get_test_data("benchmark.input_prot.sig") - good_aa = list(signature.load_signatures(knowngood))[0] + good_aa = list(load_signatures_from_json(knowngood))[0] assert sig1_aa.similarity(good_aa) == 1.0 @@ -1365,11 +1367,11 @@ def test_do_sourmash_check_knowngood_protein_comparisons(runtmp): sig1 = runtmp.output("ecoli.genes.fna.sig") assert os.path.exists(sig1) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) sig1_trans, sig2_trans = sorted(x, key=lambda x: x.name) knowngood = utils.get_test_data("benchmark.prot.sig") - good_trans = list(signature.load_signatures(knowngood))[0] + good_trans = list(load_signatures_from_json(knowngood))[0] assert sig2_trans.similarity(good_trans) == 1.0 @@ -1399,11 +1401,11 @@ def test_do_sourmash_singleton_multiple_files_no_out_specified(runtmp): sig2 = runtmp.output("shewanella.faa.sig") assert os.path.exists(sig2) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) for ss in x: print(ss.name) - y = list(signature.load_signatures(sig2)) + y = list(load_signatures_from_json(sig2)) for ss in y: print(ss.name) @@ -1447,7 +1449,7 @@ def test_do_sourmash_singleton_multiple_files_output(runtmp): sig1 = runtmp.output("output.sig") assert os.path.exists(sig1) - x = list(signature.load_signatures(sig1)) + x = list(load_signatures_from_json(sig1)) for ss in x: print(ss.name) @@ -1512,7 +1514,7 @@ def test_protein_with_stop_codons(runtmp): sig1 = runtmp.output("ecoli.faa.sig") assert os.path.exists(sig1) - x = signature.load_one_signature(sig1) + x = load_one_signature(sig1) cli_mh1 = x.minhash # second, via CLI w/stop codons @@ -1525,7 +1527,7 @@ def test_protein_with_stop_codons(runtmp): sig2 = runtmp.output("ecoli.stop.faa.sig") assert os.path.exists(sig2) - x = signature.load_one_signature(sig2) + x = load_one_signature(sig2) cli_mh2 = x.minhash # now calculate sketch with MinHash... @@ -1578,7 +1580,7 @@ def test_hp_with_stop_codons(runtmp): sig1 = runtmp.output("ecoli.faa.sig") assert os.path.exists(sig1) - x = signature.load_one_signature(sig1) + x = load_one_signature(sig1) cli_mh1 = x.minhash # second, via CLI w/stop codons @@ -1591,7 +1593,7 @@ def test_hp_with_stop_codons(runtmp): sig2 = runtmp.output("ecoli.stop.faa.sig") assert os.path.exists(sig2) - x = signature.load_one_signature(sig2) + x = load_one_signature(sig2) cli_mh2 = x.minhash # now calculate sketch with MinHash... @@ -1644,7 +1646,7 @@ def test_dayhoff_with_stop_codons(runtmp): sig1 = runtmp.output("ecoli.faa.sig") assert os.path.exists(sig1) - x = signature.load_one_signature(sig1) + x = load_one_signature(sig1) cli_mh1 = x.minhash # second, via CLI w/stop codons @@ -1657,7 +1659,7 @@ def test_dayhoff_with_stop_codons(runtmp): sig2 = runtmp.output("ecoli.stop.faa.sig") assert os.path.exists(sig2) - x = signature.load_one_signature(sig2) + x = load_one_signature(sig2) cli_mh2 = x.minhash # now calculate sketch with MinHash...