Skip to content

Commit

Permalink
detector: add startswith matching for StringDetector base class (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jmartin-tech committed Jan 17, 2025
2 parents 57dd0d8 + 31db52d commit 81bf441
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 23 deletions.
3 changes: 3 additions & 0 deletions garak/detectors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ def detect(
elif self.matchtype == "word":
if re.search(r"\b" + s + r"\b", output):
match = True
elif self.matchtype == "startswith":
if output.startswith(s):
match = True
else:
raise ValueError(
f"Don't know how to process matchtype: {self.matchtype}"
Expand Down
68 changes: 45 additions & 23 deletions tests/detectors/test_detectors_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,32 +110,39 @@ def test_none_outputs(string_detector):
assert results == [1.0], "Failed to handle None outputs correctly"


@pytest.mark.parametrize("matchtype", ["str", "word"])
MATCHTYPE_AND_CASING_CASES = {
"str": [
(f"begin {TEST_STRINGS[0]}ing", True), # should match
(
f"begin {TEST_STRINGS[0].upper()}ING",
False,
), # shouldn't match case sensitive
],
"word": [
(f"{TEST_STRINGS[0]} word", True), # should match
(
f"{TEST_STRINGS[0].upper()} word",
False,
), # shouldn't match case sensitive
],
"startswith": [
(f"{TEST_STRINGS[0]} start", True), # should match
(
f"{TEST_STRINGS[0].upper()} start",
False,
), # shouldn't match case sensitive
],
}


@pytest.mark.parametrize("matchtype", MATCHTYPE_AND_CASING_CASES.keys())
def test_matchtype_with_case_sensitivity(matchtype):
"""Test case sensitivity with different matchtypes"""

lowercase_test_word = "test"
detector = garak.detectors.base.StringDetector([lowercase_test_word])
detector = garak.detectors.base.StringDetector([TEST_STRINGS[0]])
detector.matchtype = matchtype

test_cases = {
"str": [
(f"begin {lowercase_test_word}ing", True), # should match
(
f"begin {lowercase_test_word.upper()}ING",
False,
), # shouldn't match case sensitive
],
"word": [
(f"{lowercase_test_word} word", True), # should match
(
f"{lowercase_test_word.upper()} word",
False,
), # shouldn't match case sensitive
],
}

for text, should_match_sensitive in test_cases[matchtype]:
for text, should_match_sensitive in MATCHTYPE_AND_CASING_CASES[matchtype]:
attempt = Attempt(prompt="Hello")
attempt.outputs = [text]

Expand All @@ -152,7 +159,7 @@ def test_matchtype_with_case_sensitivity(matchtype):
], f"Case insensitive {matchtype} match failed for '{text}'"


def test_word_boundary_matching():
def test_stringdetector_word_boundary_matching():
detector = garak.detectors.base.StringDetector(TEST_STRINGS)
detector.matchtype = "word"

Expand All @@ -173,4 +180,19 @@ def test_word_boundary_matching():
], f"Word boundary match failed for '{text}', expected {match}"


TEST_STRINGS
def test_stringdetector_startswith():
detector = garak.detectors.base.StringDetector(TEST_STRINGS)
detector.matchtype = "startswith"

test_cases = [
("this is a test", False),
("tests always succeed", True),
]

for text, match in test_cases:
attempt = Attempt(prompt="")
attempt.outputs = [text]
results = detector.detect(attempt, case_sensitive=True)
assert results == [
1.0 if match else 0.0
], f"startswith match failed for '{text}', expected {match}"

0 comments on commit 81bf441

Please sign in to comment.