diff --git a/server/integration_tests/nlnext_test.py b/server/integration_tests/nlnext_test.py
index 05e669cdaf..2c9c50c808 100644
--- a/server/integration_tests/nlnext_test.py
+++ b/server/integration_tests/nlnext_test.py
@@ -16,7 +16,6 @@
import multiprocessing
import os
import sys
-import time
from flask_testing import LiveServerTestCase
import requests
@@ -74,7 +73,6 @@ def run_sequence(self,
check_debug_info=True):
ctx = {}
for i, q in enumerate(queries):
- time.sleep(5)
print('Issuing ', test_dir, f'query[{i}]', q)
resp = requests.post(self.get_server_url() + f'/nl/data?q={q}',
json={
diff --git a/server/routes/nl.py b/server/routes/nl.py
index fd6f95dd1a..80623e9e26 100644
--- a/server/routes/nl.py
+++ b/server/routes/nl.py
@@ -108,11 +108,13 @@ def _remove_places(query, places_str_found: List[str]):
return ' '.join(query.split())
-def _get_place_from_dcids(place_dcids: List[str]) -> List[Place]:
+def _get_place_from_dcids(place_dcids: List[str],
+ debug_logs: Dict) -> List[Place]:
places = []
place_types_dict = dc.property_values(place_dcids, 'typeOf')
place_names_dict = dc.property_values(place_dcids, 'name')
+ dc_resolve_failures = []
# Iterate in the same order as place_dcids.
for p_dcid in place_dcids:
@@ -126,16 +128,23 @@ def _get_place_from_dcids(place_dcids: List[str]) -> List[Place]:
logging.info(
f"Place DCID ({p_dcid}) did not correspond to a place_type and/or place name."
)
+ dc_resolve_failures.append(p_dcid)
+
+ debug_logs.update({
+ "dc_resolution_failure": dc_resolve_failures,
+ "dc_resolved_places": places,
+ })
return places
-def _infer_place_dcids(places_str_found: List[str]) -> List[str]:
- # TODO: propagate several of the logging errors in this function to place detection
- # state displayed in debugInfo.
+def _infer_place_dcids(places_str_found: List[str],
+ debug_logs: Dict) -> List[str]:
if not places_str_found:
logging.info("places_found is empty. Nothing to retrieve from Maps API.")
- return []
+ override_places = []
+ maps_api_failures = []
+ no_dcids_found = []
place_dcids = []
# Iterate over all the places until a valid place DCID is found.
for p_str in places_str_found:
@@ -147,6 +156,7 @@ def _infer_place_dcids(places_str_found: List[str]) -> List[str]:
f"{p_str} was found in OVERRIDE_PLACE_TO_DCID_FOR_MAPS_API. Recording its DCID {place_dcid} without querying Maps API."
)
place_dcids.append(place_dcid)
+ override_places.append((p_str.lower(), place_dcid))
continue
logging.info(f"Searching Maps API with: {p_str}")
@@ -168,12 +178,22 @@ def _infer_place_dcids(places_str_found: List[str]) -> List[str]:
logging.info(
f"Maps API found a place {place_id} but no DCID match found for place string: {p_str}."
)
+ no_dcids_found.append(place_id)
else:
- logging.info("Maps API did not find a place for place string: {p_str}.")
+ logging.info(f"Maps API did not find a place for place string: {p_str}.")
+ maps_api_failures.append(p_str)
if not place_dcids:
logging.info(
- f"No place DCIDs were found. Using places_found = {places_str_found}")
+ f"No place DCIDs were found. Using places_found = {places_str_found}.")
+
+ # Update the debug_logs dict.
+ debug_logs.update({
+ "dcids_resolved": place_dcids,
+ "dcid_overrides_found": override_places,
+ "maps_api_failures": maps_api_failures,
+ "dcid_not_found_for_place_ids": no_dcids_found
+ })
return place_dcids
@@ -183,8 +203,8 @@ def _empty_svs_score_dict():
def _result_with_debug_info(data_dict: Dict, status: str,
query_detection: Detection,
- uttr_history: List[Dict],
- debug_counters: Dict) -> Dict:
+ uttr_history: List[Dict], debug_counters: Dict,
+ query_detection_debug_logs: str) -> Dict:
"""Using data_dict and query_detection, format the dictionary response."""
svs_dict = {
'SV': query_detection.svs_detected.sv_dcids,
@@ -262,6 +282,8 @@ def _result_with_debug_info(data_dict: Dict, status: str,
places_found_formatted,
'query_with_places_removed':
query_detection.places_detected.query_without_place_substr,
+ 'query_detection_debug_logs':
+ query_detection_debug_logs,
})
if query_detection.places_detected.main_place:
@@ -278,7 +300,8 @@ def _result_with_debug_info(data_dict: Dict, status: str,
return data_dict
-def _detection(orig_query, cleaned_query) -> Detection:
+def _detection(orig_query: str, cleaned_query: str,
+ query_detection_debug_logs: Dict) -> Detection:
model = current_app.config['NL_MODEL']
# Step 1: find all relevant places and the name/type of the main place found.
@@ -294,19 +317,36 @@ def _detection(orig_query, cleaned_query) -> Detection:
main_place = None
resolved_places = []
+ # Start updating the query_detection_debug_logs. Create space for place dcid inference
+ # and place resolution. If they remain empty, the function belows were never triggered.
+ query_detection_debug_logs["place_dcid_inference"] = {}
+ query_detection_debug_logs["place_resolution"] = {}
# Look to find place DCIDs.
if places_str_found:
- place_dcids = _infer_place_dcids(places_str_found)
+ place_dcids = _infer_place_dcids(
+ places_str_found, query_detection_debug_logs["place_dcid_inference"])
logging.info(f"Found {len(place_dcids)} place dcids: {place_dcids}.")
- # Step 2: replace the places in the query sentence with "".
- query = _remove_places(cleaned_query.lower(), places_str_found)
-
if place_dcids:
- resolved_places = _get_place_from_dcids(place_dcids)
+ resolved_places = _get_place_from_dcids(
+ place_dcids, query_detection_debug_logs["place_resolution"])
logging.info(
f"Resolved {len(resolved_places)} place dcids: {resolved_places}.")
+ # Step 2: replace the place strings with "" if place_dcids were found.
+ # Typically, this could also be done under the check for resolved_places
+ # but we don't expected the resolution from place dcids to fail (typically).
+ # Also, even if the resolution fails, if there is a place dcid found, it should
+ # be considered good enough to remove the place strings.
+ # TODO: investigate whether it is better to only remove place strings for which
+ # a DCID is found and leave the others in the query string. This is now relevant
+ # because we support multiple place detection+resolution. Previously, even if
+ # just one place was used (resolved), it made sense to remove all place strings.
+ # But now that multiple place strings are being resolved, if there is a failure
+ # in resolving a place, perhaps it should not be removed? This would be a change
+ # and would need to be validated first.
+ query = _remove_places(cleaned_query.lower(), places_str_found)
+
if resolved_places:
main_place = resolved_places[0]
logging.info(f"Using main_place as: {main_place}")
@@ -318,10 +358,26 @@ def _detection(orig_query, cleaned_query) -> Detection:
places_found=resolved_places,
main_place=main_place)
+ # Update the various place detection and query transformation debug logs dict.
+ query_detection_debug_logs["places_found_str"] = places_str_found
+ query_detection_debug_logs["main_place_inferred"] = main_place
+ query_detection_debug_logs["query_transformations"] = {
+ "place_detection_input": cleaned_query.lower(),
+ "place_detection_with_places_removed": query,
+ }
+ if not query_detection_debug_logs["place_dcid_inference"]:
+ query_detection_debug_logs[
+ "place_dcid_inference"] = "Place DCID Inference did not trigger (no place strings found)."
+ if not query_detection_debug_logs["place_resolution"]:
+ query_detection_debug_logs[
+ "place_resolution"] = "Place resolution did not trigger (no place dcids found)."
+
# Step 3: Identify the SV matched based on the query.
+ sv_debug_logs = {}
svs_scores_dict = _empty_svs_score_dict()
try:
- svs_scores_dict = model.detect_svs(query)
+ svs_scores_dict = model.detect_svs(
+ query, query_detection_debug_logs["query_transformations"])
except ValueError as e:
logging.info(e)
logging.info("Using an empty svs_scores_dict")
@@ -443,9 +499,12 @@ def data():
_detection("", ""), escaped_context_history,
{})
+ query_detection_debug_logs = {}
+ query_detection_debug_logs["original_query"] = query
# Query detection routine:
# Returns detection for Place, SVs and Query Classifications.
- query_detection = _detection(str(escape(original_query)), query)
+ query_detection = _detection(str(escape(original_query)), query,
+ query_detection_debug_logs)
# Generate new utterance.
prev_utterance = nl_utterance.load_utterance(context_history)
@@ -500,7 +559,8 @@ def data():
loop.run_until_complete(bt.write_row(session_info))
data_dict = _result_with_debug_info(data_dict, status_str, query_detection,
- context_history, dbg_counters)
+ context_history, dbg_counters,
+ query_detection_debug_logs)
return data_dict
diff --git a/server/services/nl.py b/server/services/nl.py
index d2b036e50f..0b22f38cab 100644
--- a/server/services/nl.py
+++ b/server/services/nl.py
@@ -398,14 +398,17 @@ def heuristic_correlation_classification(
return NLClassifier(type=ClassificationType.CORRELATION,
attributes=attributes)
- def detect_svs(self, query) -> Dict[str, Union[Dict, List]]:
+ def detect_svs(self, query: str,
+ debug_logs: Dict) -> Dict[str, Union[Dict, List]]:
# Remove stop words.
# Check comment at the top of this file above `ALL_STOP_WORDS` to understand
# the potential areas for improvement. For now, this removal blanket removes
# any words in ALL_STOP_WORDS which includes contained_in places and their
# plurals and any other query attribution/classification trigger words.
logging.info(f"SV Detection: Query provided to SV Detection: {query}")
+ debug_logs["sv_detection_query_input"] = query
query = utils.remove_stop_words(query, ALL_STOP_WORDS)
+ debug_logs["sv_detection_query_stop_words_removal"] = query
logging.info(f"SV Detection: Query used after removing stop words: {query}")
# Make API call to the NL models/embeddings server.
diff --git a/static/js/apps/nl_interface/debug_info.tsx b/static/js/apps/nl_interface/debug_info.tsx
index 7de5819dbe..3e5ccb61fc 100644
--- a/static/js/apps/nl_interface/debug_info.tsx
+++ b/static/js/apps/nl_interface/debug_info.tsx
@@ -106,6 +106,7 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element {
mainPlaceDCID: props.debugData["main_place_dcid"],
mainPlaceName: props.debugData["main_place_name"],
queryWithoutPlaces: props.debugData["query_with_places_removed"],
+ queryDetectionDebugLogs: props.debugData["query_detection_debug_logs"],
svScores: props.debugData["sv_matching"],
svSentences: props.debugData["svs_to_sentences"],
rankingClassification: props.debugData["ranking_classification"],
@@ -220,6 +221,16 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element {
{svToSentences(debugInfo.svScores, debugInfo.svSentences)}
+
+ Query Detection Debug Logs:
+
+
+