Skip to content

Commit

Permalink
Success markers telemetry (#10065)
Browse files Browse the repository at this point in the history
* Markers telemetry

* Everything without tests

* Specified events.json

* Test added

* Changelog entry

* Naming fixes

* Fix lint, fix CLI bug

No way to actually change config path, now fixed with nargs

* Add markers parsed telemetry

* Document telemetry functions

* always add ANY_MARKER; add test

* Add complexity telemetry

* Skip root marker to avoid double reporting total marker count

Co-authored-by: Matthew Summers <[email protected]>
Co-authored-by: ka-bu <[email protected]>
  • Loading branch information
3 people authored Nov 5, 2021
1 parent b773f71 commit 8e922dd
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 7 deletions.
1 change: 1 addition & 0 deletions changelog/10065.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add telemetry to markers extraction.
86 changes: 86 additions & 0 deletions docs/docs/telemetry/events.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"Model Training",
"Model Testing",
"Model Serving",
"Markers Extraction",
"Data Handling",
"Miscellaneous"
],
Expand Down Expand Up @@ -422,6 +423,91 @@
"num_synonyms",
"num_regexes"
]
},
"Markers Extraction Initiated": {
"description": "Triggered when marker extraction has been initiated.",
"type": "object",
"section": "Markers Extraction",
"properties": {
"strategy": {
"type": "string",
"description": "Strategy to use when selecting trackers to extract from."
},
"only_extract": {
"type": "boolean",
"description": "Indicates if path to write out statistics hasn't been specified."
},
"seed": {
"type": "boolean",
"description": "The seed to initialise the random number generator for use with the 'sample' strategy."
},
"count": {
"type": ["integer", "null"],
"description": "Number of trackers to extract from (for any strategy except 'all')."
}
},
"additionalProperties": false,
"required": [
"strategy",
"only_extract",
"seed",
"count"
]
},
"Markers Extracted": {
"description": "Triggered when markers have been extracted.",
"type": "object",
"section": "Markers Extraction",
"properties": {
"trackers_count": {
"type": "integer",
"description": "Number of processed trackers."
}
},
"additionalProperties": false,
"required": [
"trackers_count"
]
},
"Markers Parsed": {
"description": "Triggered when markers have been successfully parsed.",
"type": "object",
"section": "Markers Extraction",
"properties": {
"marker_count": {
"type": "integer",
"description": "Number of parsed markers."
},
"max_depth": {
"type": "integer",
"description": "Maximum depth of the parsed markers."
},
"branching_factor": {
"type": "integer",
"description": "Maximum number of children of any of the parsed markers."
}
},
"additionalProperties": false,
"required": [
"marker_count",
"max_depth",
"branching_factor"
]
},
"Markers Statistics Computed": {
"description": "Triggered when marker statistics have been computed.",
"type": "object",
"section": "Markers Extraction",
"properties": {
"trackers_count": {
"type": "integer",
"description": "Number of processed trackers."
}
},
"additionalProperties": false,
"required": [
"trackers_count"
]
}
}
}
19 changes: 19 additions & 0 deletions rasa/cli/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import List, Text, Optional
from pathlib import Path

from rasa import telemetry
from rasa.core.utils import AvailableEndpoints
from rasa.core.tracker_store import TrackerStore
from rasa.core.evaluation.marker_tracker_loader import MarkerTrackerLoader
Expand Down Expand Up @@ -131,6 +132,13 @@ def _run_markers(
computed per session will be stored in
'<path-to-stats-folder>/statistics-per-session.csv'.
"""
telemetry.track_markers_extraction_initiated(
strategy=strategy,
only_extract=stats_file_prefix is not None,
seed=seed is not None,
count=count,
)

domain = Domain.load(domain_path) if domain_path else None
markers = Marker.from_path(config)
if domain and not markers.validate_against_domain(domain):
Expand All @@ -139,6 +147,17 @@ def _run_markers(
"Please see errors listed above and fix before running again."
)

# Calculate telemetry
# Subtract one to remove the virtual OR over all markers
num_markers = len(markers) - 1
max_depth = markers.max_depth() - 1
# Find maximum branching of marker
branching_factor = max(
len(sub_marker) - 1 for marker in markers.sub_markers for sub_marker in marker
)

telemetry.track_markers_parsed_count(num_markers, max_depth, branching_factor)

tracker_loader = _create_tracker_loader(endpoint_config, strategy, count, seed)

def _append_suffix(path: Optional[Path], suffix: Text) -> Optional[Path]:
Expand Down
27 changes: 21 additions & 6 deletions rasa/core/evaluation/marker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from rasa.shared.data import is_likely_yaml_file
from rasa.shared.exceptions import InvalidConfigException, RasaException
from rasa.shared.core.events import ActionExecuted, UserUttered, Event
from rasa import telemetry
from rasa.shared.core.domain import Domain
from rasa.shared.core.trackers import DialogueStateTracker
from rasa.utils.io import WriteRow
Expand Down Expand Up @@ -271,6 +272,11 @@ def validate_against_domain(self, domain: Domain) -> bool:
"""
...

@abstractmethod
def max_depth(self) -> int:
"""Gets the maximum depth from this point in the marker tree."""
...

def evaluate_events(
self, events: List[Event], recursive: bool = False
) -> List[SessionEvaluation]:
Expand Down Expand Up @@ -447,12 +453,8 @@ def from_path(cls, path: Union[Path, Text]) -> Marker:
loaded_markers.append(marker)

# combine the markers
if len(loaded_markers) > 1:
marker = OrMarker(markers=loaded_markers)
marker.name = Marker.ANY_MARKER # cannot be set via name parameter
else:
marker = loaded_markers[0]

marker = OrMarker(markers=loaded_markers)
marker.name = Marker.ANY_MARKER # cannot be set via name parameter
return marker

@staticmethod
Expand Down Expand Up @@ -608,6 +610,9 @@ def evaluate_trackers(
if tracker:
tracker_result = self.evaluate_events(tracker.events)
processed_trackers[tracker.sender_id] = tracker_result

processed_trackers_count = len(processed_trackers)
telemetry.track_markers_extracted(processed_trackers_count)
Marker._save_results(output_file, processed_trackers)

# Compute and write statistics if requested.
Expand All @@ -622,6 +627,8 @@ def evaluate_trackers(
session_idx=session_idx,
meta_data_on_relevant_events_per_marker=session_result,
)

telemetry.track_markers_stats_computed(processed_trackers_count)
if overall_stats_file:
stats.overall_statistic_to_csv(path=overall_stats_file)
if session_stats_file:
Expand Down Expand Up @@ -756,6 +763,10 @@ def validate_against_domain(self, domain: Domain) -> bool:
marker.validate_against_domain(domain) for marker in self.sub_markers
)

def max_depth(self) -> int:
"""Gets the maximum depth from this point in the marker tree."""
return 1 + max(child.max_depth() for child in self.sub_markers)

@staticmethod
def from_tag_and_sub_config(
tag: Text, sub_config: Any, name: Optional[Text] = None,
Expand Down Expand Up @@ -837,6 +848,10 @@ def __len__(self) -> int:
"""Returns the count of all markers that are part of this marker."""
return 1

def max_depth(self) -> int:
"""Gets the maximum depth from this point in the marker tree."""
return 1

@staticmethod
def from_tag_and_sub_config(
tag: Text, sub_config: Any, name: Optional[Text] = None
Expand Down
73 changes: 73 additions & 0 deletions rasa/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@
TELEMETRY_VISUALIZATION_STARTED_EVENT = "Story Visualization Started"
TELEMETRY_TEST_CORE_EVENT = "Model Core Tested"
TELEMETRY_TEST_NLU_EVENT = "Model NLU Tested"
TELEMETRY_MARKERS_EXTRACTION_INITIATED_EVENT = "Markers Extraction Initiated"
TELEMETRY_MARKERS_EXTRACTED_EVENT = "Markers Extracted"
TELEMETRY_MARKERS_STATS_COMPUTED_EVENT = "Markers Statistics Computed"
TELEMETRY_MARKERS_PARSED_COUNT = "Markers Parsed"

# used to calculate the context on the first call and cache it afterwards
TELEMETRY_CONTEXT = None
Expand Down Expand Up @@ -1005,3 +1009,72 @@ def track_nlu_model_test(test_data: "TrainingData") -> None:
"num_regexes": len(test_data.regex_features),
},
)


@ensure_telemetry_enabled
def track_markers_extraction_initiated(
strategy: Text, only_extract: bool, seed: bool, count: Optional[int],
) -> None:
"""Track when a user tries to extract success markers.
Args:
strategy: The strategy the user is using for tracker selection
only_extract: Indicates if the user is only extracting markers or also
producing stats
seed: Indicates if the user used a seed for this attempt
count: (Optional) The number of trackers the user is trying to select.
"""
_track(
TELEMETRY_MARKERS_EXTRACTION_INITIATED_EVENT,
{
"strategy": strategy,
"only_extract": only_extract,
"seed": seed,
"count": count,
},
)


@ensure_telemetry_enabled
def track_markers_extracted(trackers_count: int) -> None:
"""Track when markers have been extracted by a user.
Args:
trackers_count: The actual number of trackers processed
"""
_track(
TELEMETRY_MARKERS_EXTRACTED_EVENT, {"trackers_count": trackers_count},
)


@ensure_telemetry_enabled
def track_markers_stats_computed(trackers_count: int) -> None:
"""Track when stats over markers have been computed by a user.
Args:
trackers_count: The actual number of trackers processed
"""
_track(
TELEMETRY_MARKERS_STATS_COMPUTED_EVENT, {"trackers_count": trackers_count},
)


@ensure_telemetry_enabled
def track_markers_parsed_count(
marker_count: int, max_depth: int, branching_factor: int
) -> None:
"""Track when markers have been successfully parsed from config.
Args:
marker_count: The number of markers found in the config
max_depth: The maximum depth of any marker in the config
branching_factor: The maximum number of children of any marker in the config.
"""
_track(
TELEMETRY_MARKERS_PARSED_COUNT,
{
"marker_count": marker_count,
"max_depth": max_depth,
"branching_factor": branching_factor,
},
)
46 changes: 46 additions & 0 deletions tests/core/evaluation/test_marker.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,32 @@ def test_marker_from_path_only_reads_yamls(tmp_path: Path,):
)


@pytest.mark.parametrize(
"configs",
[
{"my_marker1": {IntentDetectedMarker.positive_tag(): "this-intent"}},
{
"my_marker1": {IntentDetectedMarker.positive_tag(): "this-intent"},
"my_marker2": {IntentDetectedMarker.positive_tag(): "this-action"},
},
],
)
def test_marker_from_path_adds_special_or_marker(tmp_path: Path, configs: Any):

yaml_file = tmp_path / "config.yml"
rasa.shared.utils.io.write_yaml(
data=configs, target=yaml_file,
)
loaded = Marker.from_path(tmp_path)
assert isinstance(loaded, OrMarker)
assert loaded.name == Marker.ANY_MARKER
assert len(loaded.sub_markers) == len(configs)
assert all(
isinstance(sub_marker, IntentDetectedMarker)
for sub_marker in loaded.sub_markers
)


@pytest.mark.parametrize(
"path_config_tuples",
[
Expand Down Expand Up @@ -656,3 +682,23 @@ def test_marker_from_path_raises(
rasa.shared.utils.io.write_yaml(data=config, target=full_path)
with pytest.raises(InvalidMarkerConfig):
Marker.from_path(tmp_path)


@pytest.mark.parametrize(
"marker,expected_depth",
[
(
AndMarker(
markers=[
SlotSetMarker("s1"),
OrMarker([IntentDetectedMarker("4"), IntentDetectedMarker("6"),]),
],
),
3,
),
(SlotSetMarker("s1"), 1),
(AndMarker(markers=[SlotSetMarker("s1"), IntentDetectedMarker("6"),],), 2),
],
)
def test_marker_depth(marker: Marker, expected_depth: int):
assert marker.max_depth() == expected_depth
10 changes: 9 additions & 1 deletion tests/test_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,18 @@ async def test_events_schema(

telemetry.track_nlu_model_test(TrainingData())

telemetry.track_markers_extraction_initiated("all", False, False, None)

telemetry.track_markers_extracted(1)

telemetry.track_markers_stats_computed(1)

telemetry.track_markers_parsed_count(1, 1, 1)

pending = asyncio.all_tasks() - initial
await asyncio.gather(*pending)

assert mock.call_count == 15
assert mock.call_count == 19

for args, _ in mock.call_args_list:
event = args[0]
Expand Down

0 comments on commit 8e922dd

Please sign in to comment.