From d5dd931e18e6396fe56f6cbd39e5126824a9578b Mon Sep 17 00:00:00 2001 From: Gabriel Mechali Date: Wed, 30 Oct 2024 14:06:10 -0400 Subject: [PATCH 1/4] Add continents for autocomplete hack. --- .../shared_api/autocomplete/autocomplete.py | 16 ++-- .../routes/shared_api/autocomplete/helpers.py | 73 ++++++++++++++++++- 2 files changed, 76 insertions(+), 13 deletions(-) diff --git a/server/routes/shared_api/autocomplete/autocomplete.py b/server/routes/shared_api/autocomplete/autocomplete.py index cb29b539c3..36b7bb0c52 100644 --- a/server/routes/shared_api/autocomplete/autocomplete.py +++ b/server/routes/shared_api/autocomplete/autocomplete.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import logging from flask import Blueprint @@ -22,7 +21,6 @@ from server.routes.shared_api.autocomplete import helpers from server.routes.shared_api.autocomplete.types import AutoCompleteApiResponse from server.routes.shared_api.autocomplete.types import AutoCompleteResult -from server.routes.shared_api.place import findplacedcid # TODO(gmechali): Add Stat Var search. @@ -45,15 +43,7 @@ def autocomplete(): # Send requests to the Google Maps Predictions API. prediction_responses = helpers.predict(queries, lang) - place_ids = [] - for prediction in prediction_responses: - place_ids.append(prediction.place_id) - - place_id_to_dcid = [] - if place_ids: - place_id_to_dcid = json.loads(findplacedcid(place_ids).data) - logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.", - len(place_id_to_dcid)) + place_id_to_dcid = helpers.fetch_place_id_to_dcid(prediction_responses) final_predictions = [] for prediction in prediction_responses: @@ -64,6 +54,10 @@ def autocomplete(): matched_query=prediction.matched_query, dcid=place_id_to_dcid[prediction.place_id]) final_predictions.append(current_prediction) + + if len(final_predictions) == helpers.DISPLAYED_RESPONSE_COUNT_LIMIT: + break + logging.info( "[Place_Autocomplete] Returning a total of %d place predictions.", len(final_predictions)) diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py index 08e96617d7..412a61ef16 100644 --- a/server/routes/shared_api/autocomplete/helpers.py +++ b/server/routes/shared_api/autocomplete/helpers.py @@ -22,6 +22,7 @@ import requests from server.routes.shared_api.autocomplete.types import ScoredPrediction +from server.routes.shared_api.place import findplacedcid MAPS_API_URL = "https://maps.googleapis.com/maps/api/place/autocomplete/json?" MIN_CHARACTERS_PER_QUERY = 3 @@ -112,13 +113,15 @@ def get_match_score(match_string: str, name: str) -> float: score is best match. Returns: Float score.""" + + # TODO(gmechali): Replace weird characters in both input and like i with accent, o with two dots etc. rgx = re.compile(r'[\s|,]+') words_in_name = re.split(rgx, name) words_in_str1 = re.split(rgx, match_string) score = 0 start_index = 0 - for str1_word in words_in_str1: + for str1_idx, str1_word in enumerate(words_in_str1): str1_word = str1_word.lower() found_match = False for idx, name_word in enumerate(words_in_name): @@ -126,6 +129,10 @@ def get_match_score(match_string: str, name: str) -> float: continue name_word = name_word.lower() + if idx == 0 and str1_idx == 0 and name_word.startswith(str1_word): + # boost score for start of query. + score -= 0.5 + if str1_word == name_word: start_index = idx + 1 score -= 1 @@ -147,6 +154,42 @@ def get_match_score(match_string: str, name: str) -> float: return score +def score_below_zero(pred: ScoredPrediction) -> bool: + """Returns whether the score is below 0.""" + return pred.score < 0 + + +def prepend_continent_hack(responses: List[ScoredPrediction], queries: List[str]) -> List[ScoredPrediction]: + queries.reverse() + continent_responses = [] + single_continents = [{'description': 'Europe', 'place_id': 'ChIJhdqtz4aI7UYRefD8s-aZ73I'}, + {'description': 'North America', 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'}, + {'description': 'South America', 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'}, + {'description': 'Oceania', 'place_id': 'ChIJQbA4_Cu8QW4RbuvrxISzaks'}, + {'description': 'Africa', 'place_id': 'ChIJ1fWMlApsoBARs_CQnslwghA'}, + {'description': 'Asia', 'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak'}] + for continent in single_continents: + scored_prediction = ScoredPrediction(description=continent['description'], + place_id=continent['place_id'], + matched_query=queries[0], + score=get_match_score(queries[0], continent['description'])) + continent_responses.append(scored_prediction) + + if len(queries) > 1: + # double word continent hack + double_continents = [{'description': 'North America', 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'}, + {'description': 'South America', 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'}] + for continent in double_continents: + scored_prediction = ScoredPrediction(description=continent['description'], + place_id=continent['place_id'], + matched_query=queries[1], + score=get_match_score(queries[1], continent['description'])) + continent_responses.append(scored_prediction) + + continent_responses = list(filter(score_below_zero, continent_responses)) + return continent_responses + responses + + def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: """Trigger maps prediction api requests and parse the output. Remove duplication responses and limit the number of results. Returns: @@ -164,14 +207,19 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: query, pred['description'])) all_responses.append(scored_prediction) + # single word continent hack + all_responses = prepend_continent_hack(all_responses, queries) + all_responses.sort(key=get_score) logging.info("[Place_Autocomplete] Received %d total place predictions.", len(all_responses)) + # all_responses = list(filter(score_below_zero, all_responses)) + responses = [] place_ids = set() index = 0 - while len(responses) < DISPLAYED_RESPONSE_COUNT_LIMIT and index < len( + while len(responses) < 2 * DISPLAYED_RESPONSE_COUNT_LIMIT and index < len( all_responses): if all_responses[index].place_id not in place_ids: responses.append(all_responses[index]) @@ -180,6 +228,27 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: return responses +def fetch_place_id_to_dcid(prediction_responses: List[ScoredPrediction]) -> Dict: + place_ids = [] + for prediction in prediction_responses: + place_ids.append(prediction.place_id) + + place_id_to_dcid = dict() + if place_ids: + place_id_to_dcid = json.loads(findplacedcid(place_ids).data) + + place_id_to_dcid['ChIJhdqtz4aI7UYRefD8s-aZ73I'] = 'europe' + place_id_to_dcid['ChIJtTRdNRw0CZQRK-PGyc8M1Gk'] = 'southamerica' + place_id_to_dcid['ChIJnXKOaXELs1IRgqNhl4MoExM'] = 'northamerica' + place_id_to_dcid['ChIJV-jLJIrxYzYRWfSg0_xrQak'] = 'asia' + place_id_to_dcid['ChIJS3WQM3uWuaQRdSAPdB--Um4'] = 'antarctica' + place_id_to_dcid['ChIJQbA4_Cu8QW4RbuvrxISzaks'] = 'oceania' + place_id_to_dcid['ChIJ1fWMlApsoBARs_CQnslwghA'] = 'africa' + + logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.", + len(place_id_to_dcid)) + + return place_id_to_dcid def get_score(p: ScoredPrediction) -> float: """Returns the score.""" From 894d1ed13930a8909d718521d3592cbea579765e Mon Sep 17 00:00:00 2001 From: Gabriel Mechali Date: Wed, 30 Oct 2024 14:24:16 -0400 Subject: [PATCH 2/4] Clean up the continent hack --- .../routes/shared_api/autocomplete/helpers.py | 99 ++++++++++++------- 1 file changed, 65 insertions(+), 34 deletions(-) diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py index 412a61ef16..6dbb8b5d30 100644 --- a/server/routes/shared_api/autocomplete/helpers.py +++ b/server/routes/shared_api/autocomplete/helpers.py @@ -28,6 +28,35 @@ MIN_CHARACTERS_PER_QUERY = 3 MAX_NUM_OF_QUERIES = 4 DISPLAYED_RESPONSE_COUNT_LIMIT = 5 +TWO_WORD_CONTINENTS = [{ + 'description': 'North America', + 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM' +}, { + 'description': 'South America', + 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk' +}] +CONTINENTS = [{ + 'description': 'Europe', + 'place_id': 'ChIJhdqtz4aI7UYRefD8s-aZ73I' +}, { + 'description': 'Oceania', + 'place_id': 'ChIJQbA4_Cu8QW4RbuvrxISzaks' +}, { + 'description': 'Africa', + 'place_id': 'ChIJ1fWMlApsoBARs_CQnslwghA' +}, { + 'description': 'Asia', + 'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak' +}] + TWO_WORD_CONTINENTS +CONTINENT_PLACE_ID_TO_DCID = { +'ChIJhdqtz4aI7UYRefD8s-aZ73I': 'europe', + 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk': 'southamerica', + 'ChIJnXKOaXELs1IRgqNhl4MoExM': 'northamerica', + 'ChIJV-jLJIrxYzYRWfSg0_xrQak': 'asia', + 'ChIJS3WQM3uWuaQRdSAPdB--Um4': 'antarctica', + 'ChIJQbA4_Cu8QW4RbuvrxISzaks': 'oceania', + 'ChIJ1fWMlApsoBARs_CQnslwghA': 'africa' + } def find_queries(user_query: str) -> List[str]: @@ -113,8 +142,7 @@ def get_match_score(match_string: str, name: str) -> float: score is best match. Returns: Float score.""" - - # TODO(gmechali): Replace weird characters in both input and like i with accent, o with two dots etc. + rgx = re.compile(r'[\s|,]+') words_in_name = re.split(rgx, name) words_in_str1 = re.split(rgx, match_string) @@ -159,37 +187,40 @@ def score_below_zero(pred: ScoredPrediction) -> bool: return pred.score < 0 -def prepend_continent_hack(responses: List[ScoredPrediction], queries: List[str]) -> List[ScoredPrediction]: - queries.reverse() +def prepend_continent_hack(responses: List[ScoredPrediction], + queries: List[str]) -> List[ScoredPrediction]: + """Prepend continents as responses in order to hack it in autocomplete. + Returns: + List of scored predictions.""" + continent_responses = [] - single_continents = [{'description': 'Europe', 'place_id': 'ChIJhdqtz4aI7UYRefD8s-aZ73I'}, - {'description': 'North America', 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'}, - {'description': 'South America', 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'}, - {'description': 'Oceania', 'place_id': 'ChIJQbA4_Cu8QW4RbuvrxISzaks'}, - {'description': 'Africa', 'place_id': 'ChIJ1fWMlApsoBARs_CQnslwghA'}, - {'description': 'Asia', 'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak'}] - for continent in single_continents: + single_word_query = queries[-1] + for continent in CONTINENTS: scored_prediction = ScoredPrediction(description=continent['description'], - place_id=continent['place_id'], - matched_query=queries[0], - score=get_match_score(queries[0], continent['description'])) + place_id=continent['place_id'], + matched_query=single_word_query, + score=get_match_score( + single_word_query, + continent['description'])) continent_responses.append(scored_prediction) if len(queries) > 1: - # double word continent hack - double_continents = [{'description': 'North America', 'place_id': 'ChIJnXKOaXELs1IRgqNhl4MoExM'}, - {'description': 'South America', 'place_id': 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk'}] - for continent in double_continents: + two_word_query = queries[-2] + # If we have a 2 two word query, also place the two word continents as responses. + for continent in TWO_WORD_CONTINENTS: scored_prediction = ScoredPrediction(description=continent['description'], - place_id=continent['place_id'], - matched_query=queries[1], - score=get_match_score(queries[1], continent['description'])) + place_id=continent['place_id'], + matched_query=two_word_query, + score=get_match_score( + two_word_query, + continent['description'])) continent_responses.append(scored_prediction) + # Only keep continents with a score below 0 as it implies it's close to the query. continent_responses = list(filter(score_below_zero, continent_responses)) return continent_responses + responses - + def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: """Trigger maps prediction api requests and parse the output. Remove duplication responses and limit the number of results. Returns: @@ -207,14 +238,13 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: query, pred['description'])) all_responses.append(scored_prediction) - # single word continent hack + # Continent hack - Continents not supported by Google Maps Predictions API. + # This hack will always evaluate continents for each response. They will get filtered in/out based on the match_score we compute. all_responses = prepend_continent_hack(all_responses, queries) all_responses.sort(key=get_score) logging.info("[Place_Autocomplete] Received %d total place predictions.", len(all_responses)) - # all_responses = list(filter(score_below_zero, all_responses)) - responses = [] place_ids = set() @@ -228,7 +258,13 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: return responses -def fetch_place_id_to_dcid(prediction_responses: List[ScoredPrediction]) -> Dict: + +def fetch_place_id_to_dcid( + prediction_responses: List[ScoredPrediction]) -> Dict: + """Fetches the associated DCID for each place ID returned by Google. + Returns: + Mapping of Place ID to DCID.""" + place_ids = [] for prediction in prediction_responses: place_ids.append(prediction.place_id) @@ -236,14 +272,9 @@ def fetch_place_id_to_dcid(prediction_responses: List[ScoredPrediction]) -> Dict place_id_to_dcid = dict() if place_ids: place_id_to_dcid = json.loads(findplacedcid(place_ids).data) - - place_id_to_dcid['ChIJhdqtz4aI7UYRefD8s-aZ73I'] = 'europe' - place_id_to_dcid['ChIJtTRdNRw0CZQRK-PGyc8M1Gk'] = 'southamerica' - place_id_to_dcid['ChIJnXKOaXELs1IRgqNhl4MoExM'] = 'northamerica' - place_id_to_dcid['ChIJV-jLJIrxYzYRWfSg0_xrQak'] = 'asia' - place_id_to_dcid['ChIJS3WQM3uWuaQRdSAPdB--Um4'] = 'antarctica' - place_id_to_dcid['ChIJQbA4_Cu8QW4RbuvrxISzaks'] = 'oceania' - place_id_to_dcid['ChIJ1fWMlApsoBARs_CQnslwghA'] = 'africa' + + # Add hardcoded continent Place IDs to DCIDs. + place_id_to_dcid.update(CONTINENT_PLACE_ID_TO_DCID) logging.info("[Place_Autocomplete] Found %d place ID to DCID mappings.", len(place_id_to_dcid)) From e913b79f732ddbeccc38871ee1a3e9c68a402507 Mon Sep 17 00:00:00 2001 From: Gabriel Mechali Date: Wed, 30 Oct 2024 14:41:59 -0400 Subject: [PATCH 3/4] Reduce boost to 0.25 --- .../routes/shared_api/autocomplete/helpers.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py index 6dbb8b5d30..b33b053a5e 100644 --- a/server/routes/shared_api/autocomplete/helpers.py +++ b/server/routes/shared_api/autocomplete/helpers.py @@ -49,14 +49,14 @@ 'place_id': 'ChIJV-jLJIrxYzYRWfSg0_xrQak' }] + TWO_WORD_CONTINENTS CONTINENT_PLACE_ID_TO_DCID = { -'ChIJhdqtz4aI7UYRefD8s-aZ73I': 'europe', - 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk': 'southamerica', - 'ChIJnXKOaXELs1IRgqNhl4MoExM': 'northamerica', - 'ChIJV-jLJIrxYzYRWfSg0_xrQak': 'asia', - 'ChIJS3WQM3uWuaQRdSAPdB--Um4': 'antarctica', - 'ChIJQbA4_Cu8QW4RbuvrxISzaks': 'oceania', - 'ChIJ1fWMlApsoBARs_CQnslwghA': 'africa' - } + 'ChIJhdqtz4aI7UYRefD8s-aZ73I': 'europe', + 'ChIJtTRdNRw0CZQRK-PGyc8M1Gk': 'southamerica', + 'ChIJnXKOaXELs1IRgqNhl4MoExM': 'northamerica', + 'ChIJV-jLJIrxYzYRWfSg0_xrQak': 'asia', + 'ChIJS3WQM3uWuaQRdSAPdB--Um4': 'antarctica', + 'ChIJQbA4_Cu8QW4RbuvrxISzaks': 'oceania', + 'ChIJ1fWMlApsoBARs_CQnslwghA': 'africa' +} def find_queries(user_query: str) -> List[str]: @@ -159,7 +159,7 @@ def get_match_score(match_string: str, name: str) -> float: name_word = name_word.lower() if idx == 0 and str1_idx == 0 and name_word.startswith(str1_word): # boost score for start of query. - score -= 0.5 + score -= 0.25 if str1_word == name_word: start_index = idx + 1 @@ -281,6 +281,7 @@ def fetch_place_id_to_dcid( return place_id_to_dcid + def get_score(p: ScoredPrediction) -> float: """Returns the score.""" return p.score From c04e6b7ee17aca32240092527ad2a959453efc45 Mon Sep 17 00:00:00 2001 From: Gabriel Mechali Date: Wed, 30 Oct 2024 18:32:01 -0400 Subject: [PATCH 4/4] Adds type on response dict. --- server/routes/shared_api/autocomplete/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/routes/shared_api/autocomplete/helpers.py b/server/routes/shared_api/autocomplete/helpers.py index b33b053a5e..f0d2e325af 100644 --- a/server/routes/shared_api/autocomplete/helpers.py +++ b/server/routes/shared_api/autocomplete/helpers.py @@ -260,7 +260,7 @@ def predict(queries: List[str], lang: str) -> List[ScoredPrediction]: def fetch_place_id_to_dcid( - prediction_responses: List[ScoredPrediction]) -> Dict: + prediction_responses: List[ScoredPrediction]) -> Dict[str, str]: """Fetches the associated DCID for each place ID returned by Google. Returns: Mapping of Place ID to DCID."""