Skip to content

Commit

Permalink
to_query is a terrible variable name
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Nov 5, 2017
1 parent c212f97 commit c0ce378
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 27 deletions.
6 changes: 3 additions & 3 deletions sourmash_lib/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,9 +822,9 @@ def categorize(args):
search_fn = sourmash_lib.sbtmh.SearchMinHashesFindBest().search

for leaf in tree.find(search_fn, query, args.threshold):
to_query = select_signature(leaf, query)
if to_query.md5sum() != query.md5sum(): # ignore self.
results.append((query.similarity(to_query), to_query))
node_sig = select_signature(leaf, query)
if node_sig.md5sum() != query.md5sum(): # ignore self.
results.append((query.similarity(node_sig), node_sig))

best_hit_sim = 0.0
best_hit_query_name = ""
Expand Down
30 changes: 15 additions & 15 deletions sourmash_lib/sbtmh.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,13 @@ def search_minhashes(node, sig, threshold, results=None, downsample=True):
mins = sig.minhash.get_mins()

if isinstance(node, SigLeaf):
to_query = select_signature(node, sig).minhash
node_mh = select_signature(node, sig).minhash
try:
matches = to_query.count_common(sig.minhash)
matches = node_mh.count_common(sig.minhash)
except Exception as e:
if 'mismatch in max_hash' in str(e) and downsample:
xx = sig.minhash.downsample_max_hash(to_query)
yy = to_query.downsample_max_hash(sig.minhash)
xx = sig.minhash.downsample_max_hash(node_mh)
yy = node_mh.downsample_max_hash(sig.minhash)

matches = yy.count_common(xx)
else:
Expand All @@ -129,13 +129,13 @@ def search(self, node, sig, threshold, results=None):
score = 0

if isinstance(node, SigLeaf):
to_query = select_signature(node, sig).minhash
node_mh = select_signature(node, sig).minhash
try:
score = to_query.similarity(sig.minhash)
score = node_mh.similarity(sig.minhash)
except Exception as e:
if 'mismatch in max_hash' in str(e) and self.downsample:
xx = sig.minhash.downsample_max_hash(to_query.minhash)
yy = to_query.minhash.downsample_max_hash(sig.minhash)
xx = sig.minhash.downsample_max_hash(node_mh)
yy = node_mh.downsample_max_hash(sig.minhash)

score = yy.similarity(xx)
else:
Expand Down Expand Up @@ -167,13 +167,13 @@ def search_minhashes_containment(node, sig, threshold,
mins = sig.minhash.get_mins()

if isinstance(node, SigLeaf):
to_query = select_signature(node, sig).minhash
node_mh = select_signature(node, sig).minhash
try:
matches = to_query.count_common(sig.minhash)
matches = node_mh.count_common(sig.minhash)
except Exception as e:
if 'mismatch in max_hash' in str(e) and downsample:
xx = sig.minhash.downsample_max_hash(to_query)
yy = to_query.downsample_max_hash(sig.minhash)
xx = sig.minhash.downsample_max_hash(node_mh)
yy = node_mh.downsample_max_hash(sig.minhash)

matches = yy.count_common(xx)
else:
Expand All @@ -198,10 +198,10 @@ def search(self, node, sig, threshold, results=None):
mins = sig.minhash.get_mins()

if isinstance(node, SigLeaf):
to_query = select_signature(node, sig).minhash
max_scaled = max(to_query.scaled, sig.minhash.scaled)
node_mh = select_signature(node, sig).minhash
max_scaled = max(node_mh.scaled, sig.minhash.scaled)

mh1 = to_query.downsample_scaled(max_scaled)
mh1 = node_mh.downsample_scaled(max_scaled)
mh2 = sig.minhash.downsample_scaled(max_scaled)
matches = mh1.count_common(mh2)
else: # Node or Leaf, Nodegraph by minhash comparison
Expand Down
18 changes: 9 additions & 9 deletions sourmash_lib/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ def search_databases(query, databases, threshold, do_containment, best_only):

tree = sbt_or_siglist
for leaf in tree.find(search_fn, query, threshold):
to_query = select_signature(leaf, query)
similarity = query_match(to_query)
leafdata = select_signature(leaf, query)
similarity = query_match(leafdata)
if similarity >= threshold and \
to_query.md5sum() not in found_md5:
leafdata.md5sum() not in found_md5:
sr = SearchResult(similarity=similarity,
match_sig=to_query,
md5=to_query.md5sum(),
match_sig=leafdata,
md5=leafdata.md5sum(),
filename=filename,
name=to_query.name())
name=leafdata.name())
found_md5.add(sr.md5)
results.append(sr)

Expand Down Expand Up @@ -79,11 +79,11 @@ def find_best(dblist, query):
tree = sbt_or_siglist

for leaf in tree.find(search_fn, query, 0.0):
to_query = select_signature(leaf, query)
leaf_e = to_query.minhash
leafdata = select_signature(leaf, query)
leaf_e = leafdata.minhash
similarity = query.minhash.similarity_ignore_maxhash(leaf_e)
if similarity > 0.0:
results.append((similarity, to_query))
results.append((similarity, leafdata))
else:
for ss in sbt_or_siglist:
similarity = query.minhash.similarity_ignore_maxhash(ss.minhash)
Expand Down

0 comments on commit c0ce378

Please sign in to comment.