Skip to content

Commit

Permalink
Improves Typo recognition for autocomplete (#4690)
Browse files Browse the repository at this point in the history
This PR modifies the scoring algorithm for place autocomplete to count a
small score for non-exact matches, to account for one typo.
With these changes, we will favor "San Diego" over "Dieppe" for the
query "Sna Die".
Prod: https://screenshot.googleplex.com/Bsx2BbyLZArbQuX
Local with this change:
https://screenshot.googleplex.com/9jHqKb2uHJLz37k

Note that "Sne Die" will still go back to "Dieppe" because that's 2
typos, so San Diego is out even if it was returned by google Maps
predictions: https://screenshot.googleplex.com/9LViJoVFni3Lui6

Typo check done as a bag of letters with at most off by one. We do this
check on top of the Google Maps Predictions which already take into
account typo correction. This part is just to choose the best prediction
from google maps.

Doing this as part of gaps identified in place autocomplete:
https://docs.google.com/document/d/15RVckX9ck5eyyhBHW8Nb9lmxPBDPMIeLbax14HbN-GI/edit?tab=t.0
  • Loading branch information
gmechali authored Oct 28, 2024
1 parent 499b981 commit 204ee26
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 3 deletions.
46 changes: 44 additions & 2 deletions server/routes/shared_api/autocomplete/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,40 @@ def execute_maps_request(query: str, language: str) -> Dict:
return json.loads(response.text)


def bag_of_letters(text: str) -> Dict:
"""Creates a bag-of-letters representation of a given string.
Returns:
dict: A dictionary where keys are letters and values are their counts.
"""
bag = {}
for char in text.lower():
if char.isalpha():
bag[char] = bag.get(char, 0) + 1
return bag


# TODO(gmechali): Look into a better typo algo e.g Levenshtein distance.
def off_by_one_letter(str1_word: str, name_word: str) -> bool:
"""Function to do off by one check.
Returns whether the two strings are off by at most one letter.
"""
offby = 0
str1_bag = bag_of_letters(str1_word)
str2_bag = bag_of_letters(name_word)
for key, value in str1_bag.items():
if key in str2_bag:
offby += abs(str2_bag[key] - value)
else:
offby += value

# Add to offby for letters in str2 but not str1.
for key, value in str2_bag.items():
if key not in str1_bag:
offby += value

return offby <= 1


def get_match_score(match_string: str, name: str) -> float:
"""Computes a 'score' based on the matching words in two strings. Lowest
score is best match.
Expand All @@ -86,6 +120,7 @@ def get_match_score(match_string: str, name: str) -> float:
start_index = 0
for str1_word in words_in_str1:
str1_word = str1_word.lower()
found_match = False
for idx, name_word in enumerate(words_in_name):
if idx < start_index:
continue
Expand All @@ -94,13 +129,20 @@ def get_match_score(match_string: str, name: str) -> float:
if str1_word == name_word:
start_index = idx + 1
score -= 1
found_match = True
break
elif str1_word in name_word:
start_index = idx + 1
score -= 0.5
found_match = True
break
else:
score += 1
elif off_by_one_letter(str1_word, name_word):
start_index = idx + 1
found_match = True
score -= 0.25

if not found_match:
score += 1

return score

Expand Down
22 changes: 21 additions & 1 deletion server/tests/routes/api/autocomplete_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import unittest
from unittest.mock import patch

from server.routes.shared_api.autocomplete import helpers
import server.tests.routes.api.mock_data as mock_data
from web_app import app

Expand Down Expand Up @@ -64,4 +65,23 @@ def mock_predict_effect(query, lang):
self.assertEqual(response.status_code, 200)

response_dict = json.loads(response.data.decode("utf-8"))
self.assertEqual(len(response_dict["predictions"]), 5)
self.assertEqual(len(response_dict["predictions"]), 5)

# Tests for helpers within autocomplete.
def test_bag_of_words_same(self):
"""Tests that bag of words passes for same letters."""
text = "San"
reordered_text = "Sna"
self.assertTrue(helpers.off_by_one_letter(text, reordered_text))

def test_bag_of_words_off_by_one(self):
"""Tests that bag of words passes when off by one."""
text = "Diego"
off_by_one_text = "Digo"
self.assertTrue(helpers.off_by_one_letter(text, off_by_one_text))

def test_bag_of_words_off_by_two(self):
"""Tests that bag of words passes when off by two."""
text = "Diego"
off_by_one_text = "Diaga"
self.assertFalse(helpers.off_by_one_letter(text, off_by_one_text))

0 comments on commit 204ee26

Please sign in to comment.