Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NL Interface] Adding some detection logging info to the Debug Info on frontend #2377

Merged
merged 5 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions server/integration_tests/nlnext_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import multiprocessing
import os
import sys
import time

from flask_testing import LiveServerTestCase
import requests
Expand Down Expand Up @@ -74,7 +73,6 @@ def run_sequence(self,
check_debug_info=True):
ctx = {}
for i, q in enumerate(queries):
time.sleep(5)
jehangiramjad marked this conversation as resolved.
Show resolved Hide resolved
print('Issuing ', test_dir, f'query[{i}]', q)
resp = requests.post(self.get_server_url() + f'/nl/data?q={q}',
json={
Expand Down
96 changes: 78 additions & 18 deletions server/routes/nl.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,13 @@ def _remove_places(query, places_str_found: List[str]):
return ' '.join(query.split())


def _get_place_from_dcids(place_dcids: List[str]) -> List[Place]:
def _get_place_from_dcids(place_dcids: List[str],
debug_logs: Dict) -> List[Place]:
places = []
place_types_dict = dc.property_values(place_dcids, 'typeOf')
place_names_dict = dc.property_values(place_dcids, 'name')

dc_resolve_failures = []
# Iterate in the same order as place_dcids.
for p_dcid in place_dcids:

Expand All @@ -126,16 +128,23 @@ def _get_place_from_dcids(place_dcids: List[str]) -> List[Place]:
logging.info(
f"Place DCID ({p_dcid}) did not correspond to a place_type and/or place name."
)
dc_resolve_failures.append(p_dcid)

debug_logs.update({
"dc_resolution_failure": dc_resolve_failures,
"dc_resolved_places": places,
})
return places


def _infer_place_dcids(places_str_found: List[str]) -> List[str]:
# TODO: propagate several of the logging errors in this function to place detection
# state displayed in debugInfo.
def _infer_place_dcids(places_str_found: List[str],
debug_logs: Dict) -> List[str]:
if not places_str_found:
logging.info("places_found is empty. Nothing to retrieve from Maps API.")
return []

override_places = []
maps_api_failures = []
no_dcids_found = []
place_dcids = []
# Iterate over all the places until a valid place DCID is found.
for p_str in places_str_found:
Expand All @@ -147,6 +156,7 @@ def _infer_place_dcids(places_str_found: List[str]) -> List[str]:
f"{p_str} was found in OVERRIDE_PLACE_TO_DCID_FOR_MAPS_API. Recording its DCID {place_dcid} without querying Maps API."
)
place_dcids.append(place_dcid)
override_places.append((p_str.lower(), place_dcid))
continue

logging.info(f"Searching Maps API with: {p_str}")
Expand All @@ -168,12 +178,22 @@ def _infer_place_dcids(places_str_found: List[str]) -> List[str]:
logging.info(
f"Maps API found a place {place_id} but no DCID match found for place string: {p_str}."
)
no_dcids_found.append(place_id)
else:
logging.info("Maps API did not find a place for place string: {p_str}.")
logging.info(f"Maps API did not find a place for place string: {p_str}.")
maps_api_failures.append(p_str)

if not place_dcids:
logging.info(
f"No place DCIDs were found. Using places_found = {places_str_found}")
f"No place DCIDs were found. Using places_found = {places_str_found}.")

# Update the debug_logs dict.
debug_logs.update({
"dcids_resolved": place_dcids,
"dcid_overrides_found": override_places,
"maps_api_failures": maps_api_failures,
"dcid_not_found_for_place_ids": no_dcids_found
})
return place_dcids


Expand All @@ -183,8 +203,8 @@ def _empty_svs_score_dict():

def _result_with_debug_info(data_dict: Dict, status: str,
query_detection: Detection,
uttr_history: List[Dict],
debug_counters: Dict) -> Dict:
uttr_history: List[Dict], debug_counters: Dict,
query_detection_debug_logs: str) -> Dict:
"""Using data_dict and query_detection, format the dictionary response."""
svs_dict = {
'SV': query_detection.svs_detected.sv_dcids,
Expand Down Expand Up @@ -262,6 +282,8 @@ def _result_with_debug_info(data_dict: Dict, status: str,
places_found_formatted,
'query_with_places_removed':
query_detection.places_detected.query_without_place_substr,
'query_detection_debug_logs':
query_detection_debug_logs,
})

if query_detection.places_detected.main_place:
Expand All @@ -278,7 +300,8 @@ def _result_with_debug_info(data_dict: Dict, status: str,
return data_dict


def _detection(orig_query, cleaned_query) -> Detection:
def _detection(orig_query: str, cleaned_query: str,
query_detection_debug_logs: Dict) -> Detection:
model = current_app.config['NL_MODEL']

# Step 1: find all relevant places and the name/type of the main place found.
Expand All @@ -294,19 +317,36 @@ def _detection(orig_query, cleaned_query) -> Detection:
main_place = None
resolved_places = []

# Start updating the query_detection_debug_logs. Create space for place dcid inference
# and place resolution. If they remain empty, the function belows were never triggered.
query_detection_debug_logs["place_dcid_inference"] = {}
query_detection_debug_logs["place_resolution"] = {}
# Look to find place DCIDs.
if places_str_found:
place_dcids = _infer_place_dcids(places_str_found)
place_dcids = _infer_place_dcids(
places_str_found, query_detection_debug_logs["place_dcid_inference"])
logging.info(f"Found {len(place_dcids)} place dcids: {place_dcids}.")

# Step 2: replace the places in the query sentence with "".
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pradh this is the fix for the bug we discussed. This needed to go under the following conditional (if place_dcids:).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you elaborate more on what the problem was? And maybe add some comments on why this should go under resolved_places, rather than say if place_dcids?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if it should be under if place_dcids or under if resolved_places because I have not seen resolution failures very often, if at all? But I have put this back under if place_dcids to have parity with the previous behavior and also added a detailed comment with a possible TODO (to investigate if only those place strings should be removed which get a DCID--this would be another change from existing/previous behavior so should first be tested).

query = _remove_places(cleaned_query.lower(), places_str_found)

if place_dcids:
resolved_places = _get_place_from_dcids(place_dcids)
resolved_places = _get_place_from_dcids(
place_dcids, query_detection_debug_logs["place_resolution"])
logging.info(
f"Resolved {len(resolved_places)} place dcids: {resolved_places}.")

# Step 2: replace the place strings with "" if place_dcids were found.
# Typically, this could also be done under the check for resolved_places
# but we don't expected the resolution from place dcids to fail (typically).
# Also, even if the resolution fails, if there is a place dcid found, it should
# be considered good enough to remove the place strings.
# TODO: investigate whether it is better to only remove place strings for which
# a DCID is found and leave the others in the query string. This is now relevant
# because we support multiple place detection+resolution. Previously, even if
# just one place was used (resolved), it made sense to remove all place strings.
# But now that multiple place strings are being resolved, if there is a failure
# in resolving a place, perhaps it should not be removed? This would be a change
# and would need to be validated first.
query = _remove_places(cleaned_query.lower(), places_str_found)

if resolved_places:
main_place = resolved_places[0]
logging.info(f"Using main_place as: {main_place}")
Expand All @@ -318,10 +358,26 @@ def _detection(orig_query, cleaned_query) -> Detection:
places_found=resolved_places,
main_place=main_place)

# Update the various place detection and query transformation debug logs dict.
query_detection_debug_logs["places_found_str"] = places_str_found
query_detection_debug_logs["main_place_inferred"] = main_place
query_detection_debug_logs["query_transformations"] = {
"place_detection_input": cleaned_query.lower(),
"place_detection_with_places_removed": query,
}
if not query_detection_debug_logs["place_dcid_inference"]:
query_detection_debug_logs[
"place_dcid_inference"] = "Place DCID Inference did not trigger (no place strings found)."
if not query_detection_debug_logs["place_resolution"]:
query_detection_debug_logs[
"place_resolution"] = "Place resolution did not trigger (no place dcids found)."

# Step 3: Identify the SV matched based on the query.
sv_debug_logs = {}
svs_scores_dict = _empty_svs_score_dict()
try:
svs_scores_dict = model.detect_svs(query)
svs_scores_dict = model.detect_svs(
query, query_detection_debug_logs["query_transformations"])
except ValueError as e:
logging.info(e)
logging.info("Using an empty svs_scores_dict")
Expand Down Expand Up @@ -443,9 +499,12 @@ def data():
_detection("", ""), escaped_context_history,
{})

query_detection_debug_logs = {}
query_detection_debug_logs["original_query"] = query
# Query detection routine:
# Returns detection for Place, SVs and Query Classifications.
query_detection = _detection(str(escape(original_query)), query)
query_detection = _detection(str(escape(original_query)), query,
query_detection_debug_logs)

# Generate new utterance.
prev_utterance = nl_utterance.load_utterance(context_history)
Expand Down Expand Up @@ -500,7 +559,8 @@ def data():
loop.run_until_complete(bt.write_row(session_info))

data_dict = _result_with_debug_info(data_dict, status_str, query_detection,
context_history, dbg_counters)
context_history, dbg_counters,
query_detection_debug_logs)

return data_dict

Expand Down
5 changes: 4 additions & 1 deletion server/services/nl.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,14 +398,17 @@ def heuristic_correlation_classification(
return NLClassifier(type=ClassificationType.CORRELATION,
attributes=attributes)

def detect_svs(self, query) -> Dict[str, Union[Dict, List]]:
def detect_svs(self, query: str,
debug_logs: Dict) -> Dict[str, Union[Dict, List]]:
# Remove stop words.
# Check comment at the top of this file above `ALL_STOP_WORDS` to understand
# the potential areas for improvement. For now, this removal blanket removes
# any words in ALL_STOP_WORDS which includes contained_in places and their
# plurals and any other query attribution/classification trigger words.
logging.info(f"SV Detection: Query provided to SV Detection: {query}")
debug_logs["sv_detection_query_input"] = query
query = utils.remove_stop_words(query, ALL_STOP_WORDS)
debug_logs["sv_detection_query_stop_words_removal"] = query
logging.info(f"SV Detection: Query used after removing stop words: {query}")

# Make API call to the NL models/embeddings server.
Expand Down
11 changes: 11 additions & 0 deletions static/js/apps/nl_interface/debug_info.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element {
mainPlaceDCID: props.debugData["main_place_dcid"],
mainPlaceName: props.debugData["main_place_name"],
queryWithoutPlaces: props.debugData["query_with_places_removed"],
queryDetectionDebugLogs: props.debugData["query_detection_debug_logs"],
svScores: props.debugData["sv_matching"],
svSentences: props.debugData["svs_to_sentences"],
rankingClassification: props.debugData["ranking_classification"],
Expand Down Expand Up @@ -220,6 +221,16 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element {
{svToSentences(debugInfo.svScores, debugInfo.svSentences)}
</Col>
</Row>
<Row>
<b>Query Detection Debug Logs:</b>
</Row>
<Row>
<Col>
<pre>
{JSON.stringify(debugInfo.queryDetectionDebugLogs, null, 2)}
</pre>
</Col>
</Row>
<Row>
<b>Debug Counters</b>
</Row>
Expand Down