diff --git a/server/routes/shared_api/autocomplete/autocomplete.py b/server/routes/shared_api/autocomplete/autocomplete.py index cb29b539c3..36b7bb0c52 100644 --- a/server/routes/shared_api/autocomplete/autocomplete.py +++ b/server/routes/shared_api/autocomplete/autocomplete.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import logging from flask import Blueprint @@ -22,7 +21,6 @@ from server.routes.shared_api.autocomplete import helpers from server.routes.shared_api.autocomplete.types import AutoCompleteApiResponse from server.routes.shared_api.autocomplete.types import AutoCompleteResult -from server.routes.shared_api.place import findplacedcid # TODO(gmechali): Add Stat Var search. @@ -45,15 +43,7 @@ def autocomplete(): # Send requests to the Google Maps Predictions API. prediction_responses = helpers.predict(queries, lang) - place_ids = [] - for prediction in prediction_responses: - place_ids.append(prediction.place_id) - - place_id_to_dcid = [] - if place_ids: - place_id_to_dcid = json.loads(findplacedcid(place_ids).data) - logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.", - len(place_id_to_dcid)) + place_id_to_dcid = helpers.fetch_place_id_to_dcid(prediction_responses) final_predictions = [] for prediction in prediction_responses: @@ -64,6 +54,10 @@ def autocomplete(): matched_query=prediction.matched_query, dcid=place_id_to_dcid[prediction.place_id]) final_predictions.append(current_prediction) + + if len(final_predictions) == helpers.DISPLAYED_RESPONSE_COUNT_LIMIT: + break + logging.info( "[Place_Autocomplete] Returning a total of %d place predictions.", len(final_predictions)) diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py index 08e96617d7..f0d2e325af 100644 --- a/server/routes/shared_api/autocomplete/helpers.py +++ b/server/routes/shared_api/autocomplete/helpers.py @@ -22,11 +22,41 @@ import requests from server.routes.shared_api.autocomplete.types import ScoredPrediction +from server.routes.shared_api.place import findplacedcid MAPS_API_URL = "https://maps.googleapis.com/maps/api/place/autocomplete/json?" MIN_CHARACTERS_PER_QUERY = 3 MAX_NUM_OF_QUERIES = 4 DISPLAYED_RESPONSE_COUNT_LIMIT = 5 +TWO_WORD_CONTINENTS = [{ + 'description': 'North America', + 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM' +}, { + 'description': 'South America', + 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk' +}] +CONTINENTS = [{ + 'description': 'Europe', + 'place_id': 'ChIJhdqtz4aI7UYRefD8s-aZ73I' +}, { + 'description': 'Oceania', + 'place_id': 'ChIJQbA4_Cu8QW4RbuvrxISzaks' +}, { + 'description': 'Africa', + 'place_id': 'ChIJ1fWMlApsoBARs_CQnslwghA' +}, { + 'description': 'Asia', + 'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak' +}] + TWO_WORD_CONTINENTS +CONTINENT_PLACE_ID_TO_DCID = { + 'ChIJhdqtz4aI7UYRefD8s-aZ73I': 'europe', + 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk': 'southamerica', + 'ChIJnXKOaXELs1IRgqNhl4MoExM': 'northamerica', + 'ChIJV-jLJIrxYzYRWfSg0_xrQak': 'asia', + 'ChIJS3WQM3uWuaQRdSAPdB--Um4': 'antarctica', + 'ChIJQbA4_Cu8QW4RbuvrxISzaks': 'oceania', + 'ChIJ1fWMlApsoBARs_CQnslwghA': 'africa' +} def find_queries(user_query: str) -> List[str]: @@ -112,13 +142,14 @@ def get_match_score(match_string: str, name: str) -> float: score is best match. Returns: Float score.""" + rgx = re.compile(r'[\s|,]+') words_in_name = re.split(rgx, name) words_in_str1 = re.split(rgx, match_string) score = 0 start_index = 0 - for str1_word in words_in_str1: + for str1_idx, str1_word in enumerate(words_in_str1): str1_word = str1_word.lower() found_match = False for idx, name_word in enumerate(words_in_name): @@ -126,6 +157,10 @@ def get_match_score(match_string: str, name: str) -> float: continue name_word = name_word.lower() + if idx == 0 and str1_idx == 0 and name_word.startswith(str1_word): + # boost score for start of query. + score -= 0.25 + if str1_word == name_word: start_index = idx + 1 score -= 1 @@ -147,6 +182,45 @@ def get_match_score(match_string: str, name: str) -> float: return score +def score_below_zero(pred: ScoredPrediction) -> bool: + """Returns whether the score is below 0.""" + return pred.score < 0 + + +def prepend_continent_hack(responses: List[ScoredPrediction], + queries: List[str]) -> List[ScoredPrediction]: + """Prepend continents as responses in order to hack it in autocomplete. + Returns: + List of scored predictions.""" + + continent_responses = [] + single_word_query = queries[-1] + for continent in CONTINENTS: + scored_prediction = ScoredPrediction(description=continent['description'], + place_id=continent['place_id'], + matched_query=single_word_query, + score=get_match_score( + single_word_query, + continent['description'])) + continent_responses.append(scored_prediction) + + if len(queries) > 1: + two_word_query = queries[-2] + # If we have a 2 two word query, also place the two word continents as responses. + for continent in TWO_WORD_CONTINENTS: + scored_prediction = ScoredPrediction(description=continent['description'], + place_id=continent['place_id'], + matched_query=two_word_query, + score=get_match_score( + two_word_query, + continent['description'])) + continent_responses.append(scored_prediction) + + # Only keep continents with a score below 0 as it implies it's close to the query. + continent_responses = list(filter(score_below_zero, continent_responses)) + return continent_responses + responses + + def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: """Trigger maps prediction api requests and parse the output. Remove duplication responses and limit the number of results. Returns: @@ -164,6 +238,10 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: query, pred['description'])) all_responses.append(scored_prediction) + # Continent hack - Continents not supported by Google Maps Predictions API. + # This hack will always evaluate continents for each response. They will get filtered in/out based on the match_score we compute. + all_responses = prepend_continent_hack(all_responses, queries) + all_responses.sort(key=get_score) logging.info("[Place_Autocomplete] Received %d total place predictions.", len(all_responses)) @@ -171,7 +249,7 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: responses = [] place_ids = set() index = 0 - while len(responses) < DISPLAYED_RESPONSE_COUNT_LIMIT and index < len( + while len(responses) < 2 * DISPLAYED_RESPONSE_COUNT_LIMIT and index < len( all_responses): if all_responses[index].place_id not in place_ids: responses.append(all_responses[index]) @@ -181,6 +259,29 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: return responses +def fetch_place_id_to_dcid( + prediction_responses: List[ScoredPrediction]) -> Dict[str, str]: + """Fetches the associated DCID for each place ID returned by Google. + Returns: + Mapping of Place ID to DCID.""" + + place_ids = [] + for prediction in prediction_responses: + place_ids.append(prediction.place_id) + + place_id_to_dcid = dict() + if place_ids: + place_id_to_dcid = json.loads(findplacedcid(place_ids).data) + + # Add hardcoded continent Place IDs to DCIDs. + place_id_to_dcid.update(CONTINENT_PLACE_ID_TO_DCID) + + logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.", + len(place_id_to_dcid)) + + return place_id_to_dcid + + def get_score(p: ScoredPrediction) -> float: """Returns the score.""" return p.score