Skip to content

Commit

Permalink
add sbt prepare test data
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Oct 31, 2018
1 parent 6ad60b7 commit f97ae1f
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 13 deletions.
49 changes: 38 additions & 11 deletions sourmash/sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,12 +513,15 @@ def _load_v1(jnodes, leaf_loader, dirname, storage, print_version_warning=True):
raise ValueError("Empty tree!")

sbt_nodes = {}
sbt_leaves = {}

max_node = 0

sample_bf = os.path.join(dirname, jnodes[0]['filename'])
ksize, tablesize, ntables = khmer.extract_nodegraph_info(sample_bf)[:3]
factory = GraphFactory(ksize, tablesize, ntables)

for i, jnode in enumerate(jnodes):
for k, jnode in enumerate(jnodes):
if jnode is None:
continue

Expand All @@ -527,13 +530,24 @@ def _load_v1(jnodes, leaf_loader, dirname, storage, print_version_warning=True):
if 'internal' in jnode['name']:
jnode['factory'] = factory
sbt_node = Node.load(jnode, storage)
sbt_nodes[k] = sbt_node
else:
sbt_node = leaf_loader(jnode, storage)
sbt_leaves[k] = sbt_node

sbt_nodes[i] = sbt_node
max_node = max(max_node, k)

tree = SBT(factory)
tree._nodes = sbt_nodes
tree._leaves = sbt_leaves
tree._missing_nodes = {i for i in range(max_node)
if i not in sbt_nodes and i not in sbt_leaves}

if print_version_warning:
error("WARNING: this is an old index version, please run `sourmash migrate` to update it.")
error("WARNING: proceeding with execution, but it will take longer to finish!")

tree._fill_min_n_below()

return tree

Expand All @@ -547,6 +561,8 @@ def _load_v2(cls, info, leaf_loader, dirname, storage, print_version_warning=Tru
sbt_nodes = {}
sbt_leaves = {}

max_node = 0

sample_bf = os.path.join(dirname, nodes[0]['filename'])
k, size, ntables = khmer.extract_nodegraph_info(sample_bf)[:3]
factory = GraphFactory(k, size, ntables)
Expand All @@ -565,9 +581,19 @@ def _load_v2(cls, info, leaf_loader, dirname, storage, print_version_warning=Tru
sbt_node = leaf_loader(node, storage)
sbt_leaves[k] = sbt_node

max_node = max(max_node, k)

tree = cls(factory, d=info['d'])
tree._nodes = sbt_nodes
tree._leaves = sbt_leaves
tree._missing_nodes = {i for i in range(max_node)
if i not in sbt_nodes and i not in sbt_leaves}

if print_version_warning:
error("WARNING: this is an old index version, please run `sourmash migrate` to update it.")
error("WARNING: proceeding with execution, but it will take longer to finish!")

tree._fill_min_n_below()

return tree

Expand Down Expand Up @@ -608,7 +634,7 @@ def _load_v3(cls, info, leaf_loader, dirname, storage, print_version_warning=Tru
tree._nodes = sbt_nodes
tree._leaves = sbt_leaves
tree._missing_nodes = {i for i in range(max_node)
if i not in sbt_nodes and i not in sbt_leaves}
if i not in sbt_nodes and i not in sbt_leaves}

if print_version_warning:
error("WARNING: this is an old index version, please run `sourmash migrate` to update it.")
Expand All @@ -625,7 +651,8 @@ def _load_v4(cls, info, leaf_loader, dirname, storage, print_version_warning=Tru
if not nodes:
raise ValueError("Empty tree!")

sbt_nodes = defaultdict(lambda: None)
sbt_nodes = {}
sbt_leaves = {}

klass = STORAGES[info['storage']['backend']]
if info['storage']['backend'] == "FSStorage":
Expand All @@ -643,18 +670,18 @@ def _load_v4(cls, info, leaf_loader, dirname, storage, print_version_warning=Tru
if 'internal' in node['name']:
node['factory'] = factory
sbt_node = Node.load(node, storage)
sbt_nodes[k] = sbt_node
else:
sbt_node = leaf_loader(node, storage)
sbt_leaves[k] = sbt_node

sbt_nodes[k] = sbt_node
max_node = max(max_node, k)

tree = cls(factory, d=info['d'], storage=storage)
tree.nodes = sbt_nodes
tree.missing_nodes = {i for i in range(max_node)
if i not in sbt_nodes}
# TODO: this might not be true with combine...
tree.next_node = max_node
tree._nodes = sbt_nodes
tree._leaves = sbt_leaves
tree._missing_nodes = {i for i in range(max_node)
if i not in sbt_nodes and i not in sbt_leaves}

if print_version_warning:
error("WARNING: this is an old index version, please run `sourmash migrate` to update it.")
Expand Down Expand Up @@ -703,7 +730,7 @@ def _load_v5(cls, info, leaf_loader, dirname, storage, print_version_warning=Tru
tree._nodes = sbt_nodes
tree._leaves = sbt_leaves
tree._missing_nodes = {i for i in range(max_node)
if i not in sbt_nodes and i not in sbt_leaves}
if i not in sbt_nodes and i not in sbt_leaves}

return tree

Expand Down
4 changes: 3 additions & 1 deletion sourmash/sbt_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import abc
from io import BytesIO
import os
import urllib
import tarfile


Expand Down Expand Up @@ -108,7 +109,8 @@ def __init__(self, host='https://ipfs.io'):
# Backup host: https://cloudflare-ipfs.com

def cat(self, multihash):
return '{}/ipfs/{}'.format(self.host, multihash)
response = urllib.request.urlopen('{}/ipfs/{}'.format(self.host, multihash))
return response.read()

def add_bytes(self, content):
raise NotImplementedError('This is a read-only client.')
Expand Down
2 changes: 1 addition & 1 deletion tests/test-data/ipfs_leaves.sbt.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"d": 2, "version": 5, "storage": {"backend": "IPFSStorage", "args": {}}, "factory": {"class": "GraphFactory", "args": [1, 100000, 4]}, "leaves": {"6": {"filename": "QmNcYsqpwFkuDV4SxaqADRfLar5cFuD8z8TXKD5pQ3K8fr", "name": "6d6e87e1154e95b279e5e7db414bc37b", "metadata": "6d6e87e1154e95b279e5e7db414bc37b"}, "7": {"filename": "QmVhjgBv8wa1HZG4omMDTP9bXrFW78iFYD2iNgWrA9W8MB", "name": "60f7e23c24a8d94791cc7a8680c493f9", "metadata": "60f7e23c24a8d94791cc7a8680c493f9"}, "8": {"filename": "QmPnqKQyfaU7KrG3rjLhqsXxPDUNWzYS5HTmu1C4RX2sff", "name": "0107d767a345eff67ecdaed2ee5cd7ba", "metadata": "0107d767a345eff67ecdaed2ee5cd7ba"}, "9": {"filename": "QmYUL5eJtp1BkXR2JhN57919V4exzFujr1WQkAUNdFEmyh", "name": "f71e78178af9e45e6f1d87a0c53c465c", "metadata": "f71e78178af9e45e6f1d87a0c53c465c"}, "10": {"filename": "QmZP932izJhyMpe1RDmVb4jtofTQCyDxqA69nasvcGacNB", "name": "f0c834bc306651d2b9321fb21d3e8d8f", "metadata": "f0c834bc306651d2b9321fb21d3e8d8f"}, "11": {"filename": "QmPwoUHFEP1kBwcD1r5YMBkdNja9E8DCToHPDW91r7y45c", "name": "4e94e60265e04f0763142e20b52c0da1", "metadata": "4e94e60265e04f0763142e20b52c0da1"}, "12": {"filename": "QmS8y5ab4bAtjPgFjLwmf81LPdt4cN6SYLqekCC4e1u3cX", "name": "b59473c94ff2889eca5d7165936e64b3", "metadata": "b59473c94ff2889eca5d7165936e64b3"}}}
{"d": 2, "version": 5, "storage": {"backend": "IPFSStorage", "args": {}}, "factory": {"class": "GraphFactory", "args": [1, 100000, 4]}, "leaves": {"6": {"filename": "QmaW8C75VMgdrAVF8evR1FukPh3M4qCEPZK4ZLL7yJyMT8", "name": "6d6e87e1154e95b279e5e7db414bc37b", "metadata": "6d6e87e1154e95b279e5e7db414bc37b"}, "7": {"filename": "QmPArMUvrAK7spyH8WBp1gpExfbwG2Z4PMmCQVSaRNH5WE", "name": "60f7e23c24a8d94791cc7a8680c493f9", "metadata": "60f7e23c24a8d94791cc7a8680c493f9"}, "8": {"filename": "QmXd9x4L331soVHPBHMNeDbK73ugH2cXYoYCY8amFB2LZE", "name": "0107d767a345eff67ecdaed2ee5cd7ba", "metadata": "0107d767a345eff67ecdaed2ee5cd7ba"}, "9": {"filename": "QmWkR7Gj126x8u1kEf7z5WkGQzt4ib3yJyRYaGXL5Sk8t2", "name": "f71e78178af9e45e6f1d87a0c53c465c", "metadata": "f71e78178af9e45e6f1d87a0c53c465c"}, "10": {"filename": "QmesxNDT3P7bNhTLTaipFd6Gx6wx5SbbgW2jQfvUBdYYcS", "name": "f0c834bc306651d2b9321fb21d3e8d8f", "metadata": "f0c834bc306651d2b9321fb21d3e8d8f"}, "11": {"filename": "QmSKvxmECayQLwz77KwhsnFS8LevHZboHb3echuNxQYbWo", "name": "4e94e60265e04f0763142e20b52c0da1", "metadata": "4e94e60265e04f0763142e20b52c0da1"}, "12": {"filename": "QmVbeK97GQ6BYrv4wcn42RoiMKZonQLMY9THDgGujfAAo2", "name": "b59473c94ff2889eca5d7165936e64b3", "metadata": "b59473c94ff2889eca5d7165936e64b3"}}}

0 comments on commit f97ae1f

Please sign in to comment.