diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py index b4299331ed..08e96617d7 100644 --- a/server/routes/shared_api/autocomplete/helpers.py +++ b/server/routes/shared_api/autocomplete/helpers.py @@ -73,6 +73,40 @@ def execute_maps_request(query: str, language: str) -> Dict: return json.loads(response.text) +def bag_of_letters(text: str) -> Dict: + """Creates a bag-of-letters representation of a given string. + Returns: + dict: A dictionary where keys are letters and values are their counts. + """ + bag = {} + for char in text.lower(): + if char.isalpha(): + bag[char] = bag.get(char, 0) + 1 + return bag + + +# TODO(gmechali): Look into a better typo algo e.g Levenshtein distance. +def off_by_one_letter(str1_word: str, name_word: str) -> bool: + """Function to do off by one check. + Returns whether the two strings are off by at most one letter. + """ + offby = 0 + str1_bag = bag_of_letters(str1_word) + str2_bag = bag_of_letters(name_word) + for key, value in str1_bag.items(): + if key in str2_bag: + offby += abs(str2_bag[key] - value) + else: + offby += value + + # Add to offby for letters in str2 but not str1. + for key, value in str2_bag.items(): + if key not in str1_bag: + offby += value + + return offby <= 1 + + def get_match_score(match_string: str, name: str) -> float: """Computes a 'score' based on the matching words in two strings. Lowest score is best match. @@ -86,6 +120,7 @@ def get_match_score(match_string: str, name: str) -> float: start_index = 0 for str1_word in words_in_str1: str1_word = str1_word.lower() + found_match = False for idx, name_word in enumerate(words_in_name): if idx < start_index: continue @@ -94,13 +129,20 @@ def get_match_score(match_string: str, name: str) -> float: if str1_word == name_word: start_index = idx + 1 score -= 1 + found_match = True break elif str1_word in name_word: start_index = idx + 1 score -= 0.5 + found_match = True break - else: - score += 1 + elif off_by_one_letter(str1_word, name_word): + start_index = idx + 1 + found_match = True + score -= 0.25 + + if not found_match: + score += 1 return score diff --git a/server/tests/routes/api/autocomplete_test.py b/server/tests/routes/api/autocomplete_test.py index 72c9c48a31..ee19c7f9ca 100644 --- a/server/tests/routes/api/autocomplete_test.py +++ b/server/tests/routes/api/autocomplete_test.py @@ -15,6 +15,7 @@ import unittest from unittest.mock import patch +from server.routes.shared_api.autocomplete import helpers import server.tests.routes.api.mock_data as mock_data from web_app import app @@ -64,4 +65,23 @@ def mock_predict_effect(query, lang): self.assertEqual(response.status_code, 200) response_dict = json.loads(response.data.decode("utf-8")) - self.assertEqual(len(response_dict["predictions"]), 5) \ No newline at end of file + self.assertEqual(len(response_dict["predictions"]), 5) + + # Tests for helpers within autocomplete. + def test_bag_of_words_same(self): + """Tests that bag of words passes for same letters.""" + text = "San" + reordered_text = "Sna" + self.assertTrue(helpers.off_by_one_letter(text, reordered_text)) + + def test_bag_of_words_off_by_one(self): + """Tests that bag of words passes when off by one.""" + text = "Diego" + off_by_one_text = "Digo" + self.assertTrue(helpers.off_by_one_letter(text, off_by_one_text)) + + def test_bag_of_words_off_by_two(self): + """Tests that bag of words passes when off by two.""" + text = "Diego" + off_by_one_text = "Diaga" + self.assertFalse(helpers.off_by_one_letter(text, off_by_one_text))