Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds Continents to AutoComplete #4693

Merged
merged 5 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions server/routes/shared_api/autocomplete/autocomplete.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import logging

from flask import Blueprint
Expand All @@ -22,7 +21,6 @@
from server.routes.shared_api.autocomplete import helpers
from server.routes.shared_api.autocomplete.types import AutoCompleteApiResponse
from server.routes.shared_api.autocomplete.types import AutoCompleteResult
from server.routes.shared_api.place import findplacedcid

# TODO(gmechali): Add Stat Var search.

Expand All @@ -45,15 +43,7 @@ def autocomplete():
# Send requests to the Google Maps Predictions API.
prediction_responses = helpers.predict(queries, lang)

place_ids = []
for prediction in prediction_responses:
place_ids.append(prediction.place_id)

place_id_to_dcid = []
if place_ids:
place_id_to_dcid = json.loads(findplacedcid(place_ids).data)
logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.",
len(place_id_to_dcid))
place_id_to_dcid = helpers.fetch_place_id_to_dcid(prediction_responses)

final_predictions = []
for prediction in prediction_responses:
Expand All @@ -64,6 +54,10 @@ def autocomplete():
matched_query=prediction.matched_query,
dcid=place_id_to_dcid[prediction.place_id])
final_predictions.append(current_prediction)

if len(final_predictions) == helpers.DISPLAYED_RESPONSE_COUNT_LIMIT:
break

logging.info(
"[Place_Autocomplete] Returning a total of %d place predictions.",
len(final_predictions))
Expand Down
105 changes: 103 additions & 2 deletions server/routes/shared_api/autocomplete/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,41 @@
import requests

from server.routes.shared_api.autocomplete.types import ScoredPrediction
from server.routes.shared_api.place import findplacedcid

MAPS_API_URL = "https://maps.googleapis.com/maps/api/place/autocomplete/json?"
MIN_CHARACTERS_PER_QUERY = 3
MAX_NUM_OF_QUERIES = 4
DISPLAYED_RESPONSE_COUNT_LIMIT = 5
TWO_WORD_CONTINENTS = [{
gmechali marked this conversation as resolved.
Show resolved Hide resolved
gmechali marked this conversation as resolved.
Show resolved Hide resolved
'description': 'North America',
'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'
gmechali marked this conversation as resolved.
Show resolved Hide resolved
}, {
'description': 'South America',
'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'
}]
CONTINENTS = [{
'description': 'Europe',
'place_id': 'ChIJhdqtz4aI7UYRefD8s-aZ73I'
}, {
'description': 'Oceania',
'place_id': 'ChIJQbA4_Cu8QW4RbuvrxISzaks'
}, {
'description': 'Africa',
'place_id': 'ChIJ1fWMlApsoBARs_CQnslwghA'
}, {
'description': 'Asia',
'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak'
}] + TWO_WORD_CONTINENTS
CONTINENT_PLACE_ID_TO_DCID = {
gmechali marked this conversation as resolved.
Show resolved Hide resolved
'ChIJhdqtz4aI7UYRefD8s-aZ73I': 'europe',
'ChIJtTRdNRw0CZQRK-PGyc8M1Gk': 'southamerica',
'ChIJnXKOaXELs1IRgqNhl4MoExM': 'northamerica',
'ChIJV-jLJIrxYzYRWfSg0_xrQak': 'asia',
'ChIJS3WQM3uWuaQRdSAPdB--Um4': 'antarctica',
'ChIJQbA4_Cu8QW4RbuvrxISzaks': 'oceania',
'ChIJ1fWMlApsoBARs_CQnslwghA': 'africa'
}


def find_queries(user_query: str) -> List[str]:
Expand Down Expand Up @@ -112,20 +142,25 @@ def get_match_score(match_string: str, name: str) -> float:
score is best match.
Returns:
Float score."""

rgx = re.compile(r'[\s|,]+')
words_in_name = re.split(rgx, name)
words_in_str1 = re.split(rgx, match_string)

score = 0
start_index = 0
for str1_word in words_in_str1:
for str1_idx, str1_word in enumerate(words_in_str1):
str1_word = str1_word.lower()
found_match = False
for idx, name_word in enumerate(words_in_name):
if idx < start_index:
continue

name_word = name_word.lower()
if idx == 0 and str1_idx == 0 and name_word.startswith(str1_word):
# boost score for start of query.
score -= 0.25

if str1_word == name_word:
start_index = idx + 1
score -= 1
Expand All @@ -147,6 +182,45 @@ def get_match_score(match_string: str, name: str) -> float:
return score


def score_below_zero(pred: ScoredPrediction) -> bool:
"""Returns whether the score is below 0."""
return pred.score < 0


def prepend_continent_hack(responses: List[ScoredPrediction],
queries: List[str]) -> List[ScoredPrediction]:
"""Prepend continents as responses in order to hack it in autocomplete.
Returns:
List of scored predictions."""

continent_responses = []
single_word_query = queries[-1]
for continent in CONTINENTS:
scored_prediction = ScoredPrediction(description=continent['description'],
place_id=continent['place_id'],
matched_query=single_word_query,
score=get_match_score(
single_word_query,
continent['description']))
continent_responses.append(scored_prediction)

if len(queries) > 1:
two_word_query = queries[-2]
# If we have a 2 two word query, also place the two word continents as responses.
for continent in TWO_WORD_CONTINENTS:
scored_prediction = ScoredPrediction(description=continent['description'],
place_id=continent['place_id'],
matched_query=two_word_query,
score=get_match_score(
two_word_query,
continent['description']))
continent_responses.append(scored_prediction)

# Only keep continents with a score below 0 as it implies it's close to the query.
continent_responses = list(filter(score_below_zero, continent_responses))
return continent_responses + responses


def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
"""Trigger maps prediction api requests and parse the output. Remove duplication responses and limit the number of results.
Returns:
Expand All @@ -164,14 +238,18 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
query, pred['description']))
all_responses.append(scored_prediction)

# Continent hack - Continents not supported by Google Maps Predictions API.
# This hack will always evaluate continents for each response. They will get filtered in/out based on the match_score we compute.
all_responses = prepend_continent_hack(all_responses, queries)

all_responses.sort(key=get_score)
logging.info("[Place_Autocomplete] Received %d total place predictions.",
len(all_responses))

responses = []
place_ids = set()
index = 0
while len(responses) < DISPLAYED_RESPONSE_COUNT_LIMIT and index < len(
while len(responses) < 2 * DISPLAYED_RESPONSE_COUNT_LIMIT and index < len(
all_responses):
if all_responses[index].place_id not in place_ids:
responses.append(all_responses[index])
Expand All @@ -181,6 +259,29 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]:
return responses


def fetch_place_id_to_dcid(
prediction_responses: List[ScoredPrediction]) -> Dict[str, str]:
"""Fetches the associated DCID for each place ID returned by Google.
Returns:
Mapping of Place ID to DCID."""

place_ids = []
for prediction in prediction_responses:
place_ids.append(prediction.place_id)

place_id_to_dcid = dict()
if place_ids:
place_id_to_dcid = json.loads(findplacedcid(place_ids).data)

# Add hardcoded continent Place IDs to DCIDs.
place_id_to_dcid.update(CONTINENT_PLACE_ID_TO_DCID)

logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.",
len(place_id_to_dcid))

return place_id_to_dcid


def get_score(p: ScoredPrediction) -> float:
"""Returns the score."""
return p.score
Loading