From 2fa8f2a74e507cecffe227caa9723e5184595c99 Mon Sep 17 00:00:00 2001
From: Gabriel Mechali <gabriel.mechali@gmail.com>
Date: Wed, 6 Nov 2024 11:43:07 -0500
Subject: [PATCH 1/3] Replaces accents etc, into ascii equivalents to make
 these better match in scoring algo.

---
 server/routes/shared_api/autocomplete/helpers.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py
index 53a667f0c9..cface98fb2 100644
--- a/server/routes/shared_api/autocomplete/helpers.py
+++ b/server/routes/shared_api/autocomplete/helpers.py
@@ -16,6 +16,7 @@
 import logging
 import re
 from typing import Dict, List
+import unicodedata
 from urllib.parse import urlencode
 
 from flask import current_app
@@ -135,11 +136,21 @@ def off_by_one_letter(str1_word: str, name_word: str) -> bool:
   return offby <= 1
 
 
+def sanitize_and_replace_non_ascii(str: str) -> str:
+  """Sanitize and replace non ascii.
+  Returns:
+    String sanitized and without accents, cedillas, or enye."""
+  nfkd_form = unicodedata.normalize('NFKD', str)
+  return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
+
+
 def get_match_score(match_string: str, name: str) -> float:
   """Computes a 'score' based on the matching words in two strings. Lowest
   score is best match.
   Returns:
     Float score."""
+  name = sanitize_and_replace_non_ascii(name)
+  match_string = sanitize_and_replace_non_ascii(match_string)
 
   rgx = re.compile(r'[\s|,]+')
   words_in_name = re.split(rgx, name)

From 2ec66d6d420cde0c8ffae29f89f298ca1cc5f160 Mon Sep 17 00:00:00 2001
From: Gabriel Mechali <gabriel.mechali@gmail.com>
Date: Wed, 6 Nov 2024 12:04:17 -0500
Subject: [PATCH 2/3] Rename param to make code analysis happy.

---
 server/routes/shared_api/autocomplete/helpers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py
index cface98fb2..0f1706f30c 100644
--- a/server/routes/shared_api/autocomplete/helpers.py
+++ b/server/routes/shared_api/autocomplete/helpers.py
@@ -136,11 +136,11 @@ def off_by_one_letter(str1_word: str, name_word: str) -> bool:
   return offby <= 1
 
 
-def sanitize_and_replace_non_ascii(str: str) -> str:
+def sanitize_and_replace_non_ascii(input: str) -> str:
   """Sanitize and replace non ascii.
   Returns:
     String sanitized and without accents, cedillas, or enye."""
-  nfkd_form = unicodedata.normalize('NFKD', str)
+  nfkd_form = unicodedata.normalize('NFKD', input)
   return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
 
 

From 2f407a1ec42e1c799bbddfa8bd94763e6e01c9eb Mon Sep 17 00:00:00 2001
From: Gabriel Mechali <gabriel.mechali@gmail.com>
Date: Wed, 6 Nov 2024 12:09:47 -0500
Subject: [PATCH 3/3] Rename param to make code analysis happy, input still was
 bad..

---
 server/routes/shared_api/autocomplete/helpers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py
index 0f1706f30c..4a87ff5372 100644
--- a/server/routes/shared_api/autocomplete/helpers.py
+++ b/server/routes/shared_api/autocomplete/helpers.py
@@ -136,12 +136,12 @@ def off_by_one_letter(str1_word: str, name_word: str) -> bool:
   return offby <= 1
 
 
-def sanitize_and_replace_non_ascii(input: str) -> str:
+def sanitize_and_replace_non_ascii(string: str) -> str:
   """Sanitize and replace non ascii.
   Returns:
     String sanitized and without accents, cedillas, or enye."""
-  nfkd_form = unicodedata.normalize('NFKD', input)
-  return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
+  nfkd_form = unicodedata.normalize('NFKD', string)
+  return "".join([c for c in nfkd_form if not unicodedata.combining(c)])
 
 
 def get_match_score(match_string: str, name: str) -> float: