datacommonsorg · gmechali · Oct 31, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/server/routes/shared_api/autocomplete/autocomplete.py b/server/routes/shared_api/autocomplete/autocomplete.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 import logging
 
 from flask import Blueprint
@@ -22,7 +21,6 @@
 from server.routes.shared_api.autocomplete import helpers
 from server.routes.shared_api.autocomplete.types import AutoCompleteApiResponse
 from server.routes.shared_api.autocomplete.types import AutoCompleteResult
-from server.routes.shared_api.place import findplacedcid
 
 # TODO(gmechali): Add Stat Var search.
 
@@ -45,15 +43,7 @@ def autocomplete():
   # Send requests to the Google Maps Predictions API.
   prediction_responses = helpers.predict(queries, lang)
 
-  place_ids = []
-  for prediction in prediction_responses:
-    place_ids.append(prediction.place_id)
-
-  place_id_to_dcid = []
-  if place_ids:
-    place_id_to_dcid = json.loads(findplacedcid(place_ids).data)
-  logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.",
-               len(place_id_to_dcid))
+  place_id_to_dcid = helpers.fetch_place_id_to_dcid(prediction_responses)
 
   final_predictions = []
   for prediction in prediction_responses:
@@ -64,6 +54,10 @@ def autocomplete():
           matched_query=prediction.matched_query,
           dcid=place_id_to_dcid[prediction.place_id])
       final_predictions.append(current_prediction)
+
+      if len(final_predictions) == helpers.DISPLAYED_RESPONSE_COUNT_LIMIT:
+        break
+
   logging.info(
       "[Place_Autocomplete] Returning a total of %d place predictions.",
       len(final_predictions))

diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py
@@ -22,11 +22,41 @@
 import requests
 
 from server.routes.shared_api.autocomplete.types import ScoredPrediction
+from server.routes.shared_api.place import findplacedcid
 
 MAPS_API_URL = "https://maps.googleapis.com/maps/api/place/autocomplete/json?"
 MIN_CHARACTERS_PER_QUERY = 3
 MAX_NUM_OF_QUERIES = 4
 DISPLAYED_RESPONSE_COUNT_LIMIT = 5
+TWO_WORD_CONTINENTS = [{
+    'description': 'North America',
+    'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'
+}, {
+    'description': 'South America',
+    'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'
+}]
+CONTINENTS = [{
+    'description': 'Europe',
+    'place_id': 'ChIJhdqtz4aI7UYRefD8s-aZ73I'
+}, {
+    'description': 'Oceania',
+    'place_id': 'ChIJQbA4_Cu8QW4RbuvrxISzaks'
+}, {
+    'description': 'Africa',
+    'place_id': 'ChIJ1fWMlApsoBARs_CQnslwghA'
+}, {
+    'description': 'Asia',
+    'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak'
+}] + TWO_WORD_CONTINENTS
+CONTINENT_PLACE_ID_TO_DCID = {
+    'ChIJhdqtz4aI7UYRefD8s-aZ73I': 'europe',
+    'ChIJtTRdNRw0CZQRK-PGyc8M1Gk': 'southamerica',
+    'ChIJnXKOaXELs1IRgqNhl4MoExM': 'northamerica',
+    'ChIJV-jLJIrxYzYRWfSg0_xrQak': 'asia',
+    'ChIJS3WQM3uWuaQRdSAPdB--Um4': 'antarctica',
+    'ChIJQbA4_Cu8QW4RbuvrxISzaks': 'oceania',
+    'ChIJ1fWMlApsoBARs_CQnslwghA': 'africa'
+}
 
 
 def find_queries(user_query: str) -> List[str]:
@@ -112,20 +142,25 @@ def get_match_score(match_string: str, name: str) -> float:
   score is best match.
   Returns:
     Float score."""
+
   rgx = re.compile(r'[\s|,]+')
   words_in_name = re.split(rgx, name)
   words_in_str1 = re.split(rgx, match_string)
 
   score = 0
   start_index = 0
-  for str1_word in words_in_str1:
+  for str1_idx, str1_word in enumerate(words_in_str1):
     str1_word = str1_word.lower()
     found_match = False
     for idx, name_word in enumerate(words_in_name):
       if idx < start_index:
         continue
 
       name_word = name_word.lower()
+      if idx == 0 and str1_idx == 0 and name_word.startswith(str1_word):
+        # boost score for start of query.
+        score -= 0.25
+
       if str1_word == name_word:
         start_index = idx + 1
         score -= 1
@@ -147,6 +182,45 @@ def get_match_score(match_string: str, name: str) -> float:
   return score
 
 
+def score_below_zero(pred: ScoredPrediction) -> bool:
+  """Returns whether the score is below 0."""
+  return pred.score < 0
+
+
+def prepend_continent_hack(responses: List[ScoredPrediction],
+                           queries: List[str]) -> List[ScoredPrediction]:
+  """Prepend continents as responses in order to hack it in autocomplete.
+  Returns:
+    List of scored predictions."""
+
+  continent_responses = []
+  single_word_query = queries[-1]
+  for continent in CONTINENTS:
+    scored_prediction = ScoredPrediction(description=continent['description'],
+                                         place_id=continent['place_id'],
+                                         matched_query=single_word_query,
+                                         score=get_match_score(
+                                             single_word_query,
+                                             continent['description']))
+    continent_responses.append(scored_prediction)
+
+  if len(queries) > 1:
+    two_word_query = queries[-2]
+    # If we have a 2 two word query, also place the two word continents as responses.
+    for continent in TWO_WORD_CONTINENTS:
+      scored_prediction = ScoredPrediction(description=continent['description'],
+                                           place_id=continent['place_id'],
+                                           matched_query=two_word_query,
+                                           score=get_match_score(
+                                               two_word_query,
+                                               continent['description']))
+      continent_responses.append(scored_prediction)
+
+  # Only keep continents with a score below 0 as it implies it's close to the query.
+  continent_responses = list(filter(score_below_zero, continent_responses))
+  return continent_responses + responses
+
+
 def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
   """Trigger maps prediction api requests and parse the output. Remove duplication responses and limit the number of results.
   Returns:
@@ -164,14 +238,18 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
                                                query, pred['description']))
       all_responses.append(scored_prediction)
 
+  # Continent hack - Continents not supported by Google Maps Predictions API.
+  # This hack will always evaluate continents for each response. They will get filtered in/out based on the match_score we compute.
+  all_responses = prepend_continent_hack(all_responses, queries)
+
   all_responses.sort(key=get_score)
   logging.info("[Place_Autocomplete] Received %d total place predictions.",
                len(all_responses))
 
   responses = []
   place_ids = set()
   index = 0
-  while len(responses) < DISPLAYED_RESPONSE_COUNT_LIMIT and index < len(
+  while len(responses) < 2 * DISPLAYED_RESPONSE_COUNT_LIMIT and index < len(
       all_responses):
     if all_responses[index].place_id not in place_ids:
       responses.append(all_responses[index])
@@ -181,6 +259,29 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
   return responses
 
 
+def fetch_place_id_to_dcid(
+    prediction_responses: List[ScoredPrediction]) -> Dict[str, str]:
+  """Fetches the associated DCID for each place ID returned by Google.
+  Returns:
+    Mapping of Place ID to DCID."""
+
+  place_ids = []
+  for prediction in prediction_responses:
+    place_ids.append(prediction.place_id)
+
+  place_id_to_dcid = dict()
+  if place_ids:
+    place_id_to_dcid = json.loads(findplacedcid(place_ids).data)
+
+  # Add hardcoded continent Place IDs to DCIDs.
+  place_id_to_dcid.update(CONTINENT_PLACE_ID_TO_DCID)
+
+  logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.",
+               len(place_id_to_dcid))
+
+  return place_id_to_dcid
+
+
 def get_score(p: ScoredPrediction) -> float:
   """Returns the score."""
   return p.score