Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

keep track of strategy index #275

Merged
merged 2 commits into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions blocklib/pprlpsig.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def build_reversed_index(self, data: Sequence[Sequence], header: Optional[List[s

signatures = generate_signatures(self.signature_strategies, dtuple, self.null_sentinel, feature_to_index)

for i, signature in enumerate(signatures):
reversed_index_per_strategy[i][signature].append(rec_id)
for strategy_index, signature in signatures:
reversed_index_per_strategy[strategy_index][signature].append(rec_id)

reversed_index_per_strategy = [self.filter_reversed_index(data, reversed_index) for reversed_index in
reversed_index_per_strategy]
Expand Down
4 changes: 2 additions & 2 deletions blocklib/signature_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def generate_signatures(
:param feature_to_index:
Mapping from feature name to feature index

:return signatures: set of str
:return signatures: list of tuples (strategy_index, signature)
"""
# signatures to return
signatures = []
Expand Down Expand Up @@ -152,7 +152,7 @@ def generate_signatures(
sig.append(s)
if len(sig) > 0:
signatures.append(
"{}_{}".format(i, "_".join([x for x in sig if x is not None]))
(i, "{}_{}".format(i, "_".join([x for x in sig if x is not None])))
)

return signatures
10 changes: 5 additions & 5 deletions tests/test_signature_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_feature_value(self):
)
]
signatures = generate_signatures(signatures, dtuple, "")
assert signatures == ["0_Joyce_Wang"]
assert signatures == [(0, "0_Joyce_Wang")]

def test_char_at(self):
"""Test signatures generated by characters-at."""
Expand All @@ -40,7 +40,7 @@ def test_char_at(self):
print(signatures)

signatures = generate_signatures(signatures, dtuple, "")
assert signatures == ["0_oyc_ang"]
assert signatures == [(0, "0_oyc_ang")]

# test :end_ind
strategy = [
Expand All @@ -49,7 +49,7 @@ def test_char_at(self):
]
]
signature = generate_signatures(strategy, dtuple, "")
assert signature == ["0_Joceyy"]
assert signature == [(0, "0_Joceyy")]

res = generate_signatures(strategy, ('', ''), "")
assert res == []
Expand All @@ -72,7 +72,7 @@ def test_metaphone(self):
]
]
signatures = generate_signatures(signature_strategies, dtuple, "")
assert signatures == ["0_SM0XMT"]
assert signatures == [(0, "0_SM0XMT")]

def test_generate_signatures(self):
"""Test a multi-strategy signatures."""
Expand All @@ -91,7 +91,7 @@ def test_generate_signatures(self):
]
)
signatures = generate_signatures(signatures, dtuple, "")
assert signatures == ["0_Joyce_Wang", "1_JSAS_ANKFNK"]
assert signatures == [(0, "0_Joyce_Wang"), (1, "1_JSAS_ANKFNK")]

def test_generate_signatures_with_null(self):
signature_strats = parse_obj_as(
Expand Down