Skip to content

Commit

Permalink
fix nodegraph containment
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Apr 18, 2021
1 parent 556b101 commit 9531a23
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 11 deletions.
2 changes: 1 addition & 1 deletion src/core/src/sketch/nodegraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ impl Comparable<Nodegraph> for Nodegraph {
.zip(&other.bs)
.map(|(bs, bs_other)| bs.intersection(bs_other).count())
.sum();
let size: usize = self.bs.iter().map(|bs| bs.len()).sum();
let size: usize = self.bs.iter().map(|bs| bs.ones().count()).sum();
result as f64 / size as f64
}
}
Expand Down
13 changes: 3 additions & 10 deletions src/sourmash/sbtmh.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,6 @@ def _max_jaccard_underneath_internal_node(node, query):
# J(A, B) = |A intersection B| / |A union B|
# If we use only |A| as denominator, it is the containment
# Because |A| <= |A union B|, it is also an upper bound on the max jaccard

# count the maximum number of hash matches beneath this node
#matches = node.data.matches(mh)
#max_score = float(matches) / len(mh)

max_score = query_bf.containment(node.data)

return max_score
Expand Down Expand Up @@ -151,7 +146,6 @@ def search_minhashes_containment(node, sig, threshold, results=None, downsample=
else: # Node or Leaf, Nodegraph by minhash comparison
bf = _get_bf(node, sig)
matches = bf.containment(node.data) * len(mh)
#matches = node.data.matches(mh)

if len(mh) and float(matches) / len(mh) >= threshold:
return 1
Expand All @@ -166,14 +160,14 @@ def search_minhashes_max_containment(node, sig, threshold, results=None,

if isinstance(node, SigLeaf):
node_mh = node.data.minhash

matches = node_mh.count_common(mh, downsample)
node_size = len(node_mh)
else: # Node or Leaf, Nodegraph by minhash comparison
bf = _get_bf(node, sig)
matches = bf.containment(node.data) * len(mh)
node_size = len(mh) # FIXME

#matches = node.data.matches(mh)
bf = _get_bf(node, sig)
matches = bf.containment(node.data) * len(mh)

denom = min((len(mh), node_size))

Expand All @@ -199,7 +193,6 @@ def search(self, node, query, threshold, results=None):
else: # Nodegraph by minhash comparison
bf = _get_bf(node, query)
matches = bf.containment(node.data) * len(mh)
#matches = node.data.matches(mh)

if not matches:
return 0
Expand Down

0 comments on commit 9531a23

Please sign in to comment.