From d5dd931e18e6396fe56f6cbd39e5126824a9578b Mon Sep 17 00:00:00 2001
From: Gabriel Mechali <gabriel.mechali@gmail.com>
Date: Wed, 30 Oct 2024 14:06:10 -0400
Subject: [PATCH 1/4] Add continents for autocomplete hack.

---
 .../shared_api/autocomplete/autocomplete.py   | 16 ++--
 .../routes/shared_api/autocomplete/helpers.py | 73 ++++++++++++++++++-
 2 files changed, 76 insertions(+), 13 deletions(-)

diff --git a/server/routes/shared_api/autocomplete/autocomplete.py b/server/routes/shared_api/autocomplete/autocomplete.py
index cb29b539c3..36b7bb0c52 100644
--- a/server/routes/shared_api/autocomplete/autocomplete.py
+++ b/server/routes/shared_api/autocomplete/autocomplete.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 import logging
 
 from flask import Blueprint
@@ -22,7 +21,6 @@
 from server.routes.shared_api.autocomplete import helpers
 from server.routes.shared_api.autocomplete.types import AutoCompleteApiResponse
 from server.routes.shared_api.autocomplete.types import AutoCompleteResult
-from server.routes.shared_api.place import findplacedcid
 
 # TODO(gmechali): Add Stat Var search.
 
@@ -45,15 +43,7 @@ def autocomplete():
   # Send requests to the Google Maps Predictions API.
   prediction_responses = helpers.predict(queries, lang)
 
-  place_ids = []
-  for prediction in prediction_responses:
-    place_ids.append(prediction.place_id)
-
-  place_id_to_dcid = []
-  if place_ids:
-    place_id_to_dcid = json.loads(findplacedcid(place_ids).data)
-  logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.",
-               len(place_id_to_dcid))
+  place_id_to_dcid = helpers.fetch_place_id_to_dcid(prediction_responses)
 
   final_predictions = []
   for prediction in prediction_responses:
@@ -64,6 +54,10 @@ def autocomplete():
           matched_query=prediction.matched_query,
           dcid=place_id_to_dcid[prediction.place_id])
       final_predictions.append(current_prediction)
+
+      if len(final_predictions) == helpers.DISPLAYED_RESPONSE_COUNT_LIMIT:
+        break
+
   logging.info(
       "[Place_Autocomplete] Returning a total of %d place predictions.",
       len(final_predictions))
diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py
index 08e96617d7..412a61ef16 100644
--- a/server/routes/shared_api/autocomplete/helpers.py
+++ b/server/routes/shared_api/autocomplete/helpers.py
@@ -22,6 +22,7 @@
 import requests
 
 from server.routes.shared_api.autocomplete.types import ScoredPrediction
+from server.routes.shared_api.place import findplacedcid
 
 MAPS_API_URL = "https://maps.googleapis.com/maps/api/place/autocomplete/json?"
 MIN_CHARACTERS_PER_QUERY = 3
@@ -112,13 +113,15 @@ def get_match_score(match_string: str, name: str) -> float:
   score is best match.
   Returns:
     Float score."""
+  
+  # TODO(gmechali): Replace weird characters in both input and like i with accent, o with two dots etc.
   rgx = re.compile(r'[\s|,]+')
   words_in_name = re.split(rgx, name)
   words_in_str1 = re.split(rgx, match_string)
 
   score = 0
   start_index = 0
-  for str1_word in words_in_str1:
+  for str1_idx, str1_word in enumerate(words_in_str1):
     str1_word = str1_word.lower()
     found_match = False
     for idx, name_word in enumerate(words_in_name):
@@ -126,6 +129,10 @@ def get_match_score(match_string: str, name: str) -> float:
         continue
 
       name_word = name_word.lower()
+      if idx == 0 and str1_idx == 0 and name_word.startswith(str1_word):
+        # boost score for start of query.
+        score -= 0.5
+
       if str1_word == name_word:
         start_index = idx + 1
         score -= 1
@@ -147,6 +154,42 @@ def get_match_score(match_string: str, name: str) -> float:
   return score
 
 
+def score_below_zero(pred: ScoredPrediction) -> bool:
+  """Returns whether the score is below 0."""
+  return pred.score < 0
+
+
+def prepend_continent_hack(responses: List[ScoredPrediction], queries: List[str]) -> List[ScoredPrediction]:
+  queries.reverse()
+  continent_responses = []
+  single_continents = [{'description': 'Europe', 'place_id': 'ChIJhdqtz4aI7UYRefD8s-aZ73I'},
+                          {'description': 'North America', 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'},
+                          {'description': 'South America', 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'},
+                          {'description': 'Oceania', 'place_id': 'ChIJQbA4_Cu8QW4RbuvrxISzaks'},
+                          {'description': 'Africa', 'place_id': 'ChIJ1fWMlApsoBARs_CQnslwghA'},
+                          {'description': 'Asia', 'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak'}]
+  for continent in single_continents:
+    scored_prediction = ScoredPrediction(description=continent['description'],
+                                           place_id=continent['place_id'],
+                                           matched_query=queries[0],
+                                           score=get_match_score(queries[0], continent['description']))
+    continent_responses.append(scored_prediction)
+
+  if len(queries) > 1:
+    # double word continent hack
+    double_continents = [{'description': 'North America', 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'},
+                            {'description': 'South America', 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'}]
+    for continent in double_continents:
+      scored_prediction = ScoredPrediction(description=continent['description'],
+                                            place_id=continent['place_id'],
+                                            matched_query=queries[1],
+                                            score=get_match_score(queries[1], continent['description']))
+      continent_responses.append(scored_prediction)
+
+  continent_responses = list(filter(score_below_zero, continent_responses))
+  return continent_responses + responses
+
+  
 def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
   """Trigger maps prediction api requests and parse the output. Remove duplication responses and limit the number of results.
   Returns:
@@ -164,14 +207,19 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
                                                query, pred['description']))
       all_responses.append(scored_prediction)
 
+  # single word continent hack
+  all_responses = prepend_continent_hack(all_responses, queries)
+
   all_responses.sort(key=get_score)
   logging.info("[Place_Autocomplete] Received %d total place predictions.",
                len(all_responses))
+  # all_responses = list(filter(score_below_zero, all_responses))
+
 
   responses = []
   place_ids = set()
   index = 0
-  while len(responses) < DISPLAYED_RESPONSE_COUNT_LIMIT and index < len(
+  while len(responses) < 2 * DISPLAYED_RESPONSE_COUNT_LIMIT and index < len(
       all_responses):
     if all_responses[index].place_id not in place_ids:
       responses.append(all_responses[index])
@@ -180,6 +228,27 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
 
   return responses
 
+def fetch_place_id_to_dcid(prediction_responses: List[ScoredPrediction]) -> Dict:
+  place_ids = []
+  for prediction in prediction_responses:
+    place_ids.append(prediction.place_id)
+
+  place_id_to_dcid = dict()
+  if place_ids:
+    place_id_to_dcid = json.loads(findplacedcid(place_ids).data)
+  
+  place_id_to_dcid['ChIJhdqtz4aI7UYRefD8s-aZ73I'] = 'europe'
+  place_id_to_dcid['ChIJtTRdNRw0CZQRK-PGyc8M1Gk'] = 'southamerica'
+  place_id_to_dcid['ChIJnXKOaXELs1IRgqNhl4MoExM'] = 'northamerica'
+  place_id_to_dcid['ChIJV-jLJIrxYzYRWfSg0_xrQak'] = 'asia'
+  place_id_to_dcid['ChIJS3WQM3uWuaQRdSAPdB--Um4'] = 'antarctica'
+  place_id_to_dcid['ChIJQbA4_Cu8QW4RbuvrxISzaks'] = 'oceania'
+  place_id_to_dcid['ChIJ1fWMlApsoBARs_CQnslwghA'] = 'africa'
+
+  logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.",
+               len(place_id_to_dcid))
+
+  return place_id_to_dcid
 
 def get_score(p: ScoredPrediction) -> float:
   """Returns the score."""

From 894d1ed13930a8909d718521d3592cbea579765e Mon Sep 17 00:00:00 2001
From: Gabriel Mechali <gabriel.mechali@gmail.com>
Date: Wed, 30 Oct 2024 14:24:16 -0400
Subject: [PATCH 2/4] Clean up the continent hack

---
 .../routes/shared_api/autocomplete/helpers.py | 99 ++++++++++++-------
 1 file changed, 65 insertions(+), 34 deletions(-)

diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py
index 412a61ef16..6dbb8b5d30 100644
--- a/server/routes/shared_api/autocomplete/helpers.py
+++ b/server/routes/shared_api/autocomplete/helpers.py
@@ -28,6 +28,35 @@
 MIN_CHARACTERS_PER_QUERY = 3
 MAX_NUM_OF_QUERIES = 4
 DISPLAYED_RESPONSE_COUNT_LIMIT = 5
+TWO_WORD_CONTINENTS = [{
+    'description': 'North America',
+    'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'
+}, {
+    'description': 'South America',
+    'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'
+}]
+CONTINENTS = [{
+    'description': 'Europe',
+    'place_id': 'ChIJhdqtz4aI7UYRefD8s-aZ73I'
+}, {
+    'description': 'Oceania',
+    'place_id': 'ChIJQbA4_Cu8QW4RbuvrxISzaks'
+}, {
+    'description': 'Africa',
+    'place_id': 'ChIJ1fWMlApsoBARs_CQnslwghA'
+}, {
+    'description': 'Asia',
+    'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak'
+}] + TWO_WORD_CONTINENTS
+CONTINENT_PLACE_ID_TO_DCID = {
+'ChIJhdqtz4aI7UYRefD8s-aZ73I': 'europe',
+  'ChIJtTRdNRw0CZQRK-PGyc8M1Gk': 'southamerica',
+  'ChIJnXKOaXELs1IRgqNhl4MoExM': 'northamerica',
+  'ChIJV-jLJIrxYzYRWfSg0_xrQak': 'asia',
+  'ChIJS3WQM3uWuaQRdSAPdB--Um4': 'antarctica',
+  'ChIJQbA4_Cu8QW4RbuvrxISzaks': 'oceania',
+  'ChIJ1fWMlApsoBARs_CQnslwghA': 'africa'
+  }
 
 
 def find_queries(user_query: str) -> List[str]:
@@ -113,8 +142,7 @@ def get_match_score(match_string: str, name: str) -> float:
   score is best match.
   Returns:
     Float score."""
-  
-  # TODO(gmechali): Replace weird characters in both input and like i with accent, o with two dots etc.
+
   rgx = re.compile(r'[\s|,]+')
   words_in_name = re.split(rgx, name)
   words_in_str1 = re.split(rgx, match_string)
@@ -159,37 +187,40 @@ def score_below_zero(pred: ScoredPrediction) -> bool:
   return pred.score < 0
 
 
-def prepend_continent_hack(responses: List[ScoredPrediction], queries: List[str]) -> List[ScoredPrediction]:
-  queries.reverse()
+def prepend_continent_hack(responses: List[ScoredPrediction],
+                           queries: List[str]) -> List[ScoredPrediction]:
+  """Prepend continents as responses in order to hack it in autocomplete.
+  Returns:
+    List of scored predictions."""
+
   continent_responses = []
-  single_continents = [{'description': 'Europe', 'place_id': 'ChIJhdqtz4aI7UYRefD8s-aZ73I'},
-                          {'description': 'North America', 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'},
-                          {'description': 'South America', 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'},
-                          {'description': 'Oceania', 'place_id': 'ChIJQbA4_Cu8QW4RbuvrxISzaks'},
-                          {'description': 'Africa', 'place_id': 'ChIJ1fWMlApsoBARs_CQnslwghA'},
-                          {'description': 'Asia', 'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak'}]
-  for continent in single_continents:
+  single_word_query = queries[-1]
+  for continent in CONTINENTS:
     scored_prediction = ScoredPrediction(description=continent['description'],
-                                           place_id=continent['place_id'],
-                                           matched_query=queries[0],
-                                           score=get_match_score(queries[0], continent['description']))
+                                         place_id=continent['place_id'],
+                                         matched_query=single_word_query,
+                                         score=get_match_score(
+                                             single_word_query,
+                                             continent['description']))
     continent_responses.append(scored_prediction)
 
   if len(queries) > 1:
-    # double word continent hack
-    double_continents = [{'description': 'North America', 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'},
-                            {'description': 'South America', 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'}]
-    for continent in double_continents:
+    two_word_query = queries[-2]
+    # If we have a 2 two word query, also place the two word continents as responses.
+    for continent in TWO_WORD_CONTINENTS:
       scored_prediction = ScoredPrediction(description=continent['description'],
-                                            place_id=continent['place_id'],
-                                            matched_query=queries[1],
-                                            score=get_match_score(queries[1], continent['description']))
+                                           place_id=continent['place_id'],
+                                           matched_query=two_word_query,
+                                           score=get_match_score(
+                                               two_word_query,
+                                               continent['description']))
       continent_responses.append(scored_prediction)
 
+  # Only keep continents with a score below 0 as it implies it's close to the query.
   continent_responses = list(filter(score_below_zero, continent_responses))
   return continent_responses + responses
 
-  
+
 def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
   """Trigger maps prediction api requests and parse the output. Remove duplication responses and limit the number of results.
   Returns:
@@ -207,14 +238,13 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
                                                query, pred['description']))
       all_responses.append(scored_prediction)
 
-  # single word continent hack
+  # Continent hack - Continents not supported by Google Maps Predictions API.
+  # This hack will always evaluate continents for each response. They will get filtered in/out based on the match_score we compute.
   all_responses = prepend_continent_hack(all_responses, queries)
 
   all_responses.sort(key=get_score)
   logging.info("[Place_Autocomplete] Received %d total place predictions.",
                len(all_responses))
-  # all_responses = list(filter(score_below_zero, all_responses))
-
 
   responses = []
   place_ids = set()
@@ -228,7 +258,13 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
 
   return responses
 
-def fetch_place_id_to_dcid(prediction_responses: List[ScoredPrediction]) -> Dict:
+
+def fetch_place_id_to_dcid(
+    prediction_responses: List[ScoredPrediction]) -> Dict:
+  """Fetches the associated DCID for each place ID returned by Google.
+  Returns:
+    Mapping of Place ID to DCID."""
+
   place_ids = []
   for prediction in prediction_responses:
     place_ids.append(prediction.place_id)
@@ -236,14 +272,9 @@ def fetch_place_id_to_dcid(prediction_responses: List[ScoredPrediction]) -> Dict
   place_id_to_dcid = dict()
   if place_ids:
     place_id_to_dcid = json.loads(findplacedcid(place_ids).data)
-  
-  place_id_to_dcid['ChIJhdqtz4aI7UYRefD8s-aZ73I'] = 'europe'
-  place_id_to_dcid['ChIJtTRdNRw0CZQRK-PGyc8M1Gk'] = 'southamerica'
-  place_id_to_dcid['ChIJnXKOaXELs1IRgqNhl4MoExM'] = 'northamerica'
-  place_id_to_dcid['ChIJV-jLJIrxYzYRWfSg0_xrQak'] = 'asia'
-  place_id_to_dcid['ChIJS3WQM3uWuaQRdSAPdB--Um4'] = 'antarctica'
-  place_id_to_dcid['ChIJQbA4_Cu8QW4RbuvrxISzaks'] = 'oceania'
-  place_id_to_dcid['ChIJ1fWMlApsoBARs_CQnslwghA'] = 'africa'
+
+  # Add hardcoded continent Place IDs to DCIDs.
+  place_id_to_dcid.update(CONTINENT_PLACE_ID_TO_DCID)
 
   logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.",
                len(place_id_to_dcid))

From e913b79f732ddbeccc38871ee1a3e9c68a402507 Mon Sep 17 00:00:00 2001
From: Gabriel Mechali <gabriel.mechali@gmail.com>
Date: Wed, 30 Oct 2024 14:41:59 -0400
Subject: [PATCH 3/4] Reduce boost to 0.25

---
 .../routes/shared_api/autocomplete/helpers.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py
index 6dbb8b5d30..b33b053a5e 100644
--- a/server/routes/shared_api/autocomplete/helpers.py
+++ b/server/routes/shared_api/autocomplete/helpers.py
@@ -49,14 +49,14 @@
     'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak'
 }] + TWO_WORD_CONTINENTS
 CONTINENT_PLACE_ID_TO_DCID = {
-'ChIJhdqtz4aI7UYRefD8s-aZ73I': 'europe',
-  'ChIJtTRdNRw0CZQRK-PGyc8M1Gk': 'southamerica',
-  'ChIJnXKOaXELs1IRgqNhl4MoExM': 'northamerica',
-  'ChIJV-jLJIrxYzYRWfSg0_xrQak': 'asia',
-  'ChIJS3WQM3uWuaQRdSAPdB--Um4': 'antarctica',
-  'ChIJQbA4_Cu8QW4RbuvrxISzaks': 'oceania',
-  'ChIJ1fWMlApsoBARs_CQnslwghA': 'africa'
-  }
+    'ChIJhdqtz4aI7UYRefD8s-aZ73I': 'europe',
+    'ChIJtTRdNRw0CZQRK-PGyc8M1Gk': 'southamerica',
+    'ChIJnXKOaXELs1IRgqNhl4MoExM': 'northamerica',
+    'ChIJV-jLJIrxYzYRWfSg0_xrQak': 'asia',
+    'ChIJS3WQM3uWuaQRdSAPdB--Um4': 'antarctica',
+    'ChIJQbA4_Cu8QW4RbuvrxISzaks': 'oceania',
+    'ChIJ1fWMlApsoBARs_CQnslwghA': 'africa'
+}
 
 
 def find_queries(user_query: str) -> List[str]:
@@ -159,7 +159,7 @@ def get_match_score(match_string: str, name: str) -> float:
       name_word = name_word.lower()
       if idx == 0 and str1_idx == 0 and name_word.startswith(str1_word):
         # boost score for start of query.
-        score -= 0.5
+        score -= 0.25
 
       if str1_word == name_word:
         start_index = idx + 1
@@ -281,6 +281,7 @@ def fetch_place_id_to_dcid(
 
   return place_id_to_dcid
 
+
 def get_score(p: ScoredPrediction) -> float:
   """Returns the score."""
   return p.score

From c04e6b7ee17aca32240092527ad2a959453efc45 Mon Sep 17 00:00:00 2001
From: Gabriel Mechali <gabriel.mechali@gmail.com>
Date: Wed, 30 Oct 2024 18:32:01 -0400
Subject: [PATCH 4/4] Adds type on response dict.

---
 server/routes/shared_api/autocomplete/helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py
index b33b053a5e..f0d2e325af 100644
--- a/server/routes/shared_api/autocomplete/helpers.py
+++ b/server/routes/shared_api/autocomplete/helpers.py
@@ -260,7 +260,7 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
 
 
 def fetch_place_id_to_dcid(
-    prediction_responses: List[ScoredPrediction]) -> Dict:
+    prediction_responses: List[ScoredPrediction]) -> Dict[str, str]:
   """Fetches the associated DCID for each place ID returned by Google.
   Returns:
     Mapping of Place ID to DCID."""