Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Success markers telemetry #10065

Merged
merged 20 commits into from
Nov 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/10065.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add telemetry to markers extraction.
86 changes: 86 additions & 0 deletions docs/docs/telemetry/events.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"Model Training",
"Model Testing",
"Model Serving",
"Markers Extraction",
"Data Handling",
"Miscellaneous"
],
Expand Down Expand Up @@ -422,6 +423,91 @@
"num_synonyms",
"num_regexes"
]
},
"Markers Extraction Initiated": {
"description": "Triggered when marker extraction has been initiated.",
"type": "object",
"section": "Markers Extraction",
"properties": {
"strategy": {
"type": "string",
"description": "Strategy to use when selecting trackers to extract from."
},
"only_extract": {
"type": "boolean",
"description": "Indicates if path to write out statistics hasn't been specified."
},
"seed": {
"type": "boolean",
"description": "The seed to initialise the random number generator for use with the 'sample' strategy."
},
"count": {
"type": ["integer", "null"],
"description": "Number of trackers to extract from (for any strategy except 'all')."
}
},
"additionalProperties": false,
"required": [
"strategy",
"only_extract",
"seed",
"count"
Comment on lines +450 to +454
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@usc-m are all of these required? 🤔 or just the top two?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the perspective of telemetry I think it makes sense - we'd want to know if it's being used or not. The types of those are string or null so if it's not present we'd see an explicit null. Would be good to check with someone who understands the telemetry schema here better (though I think this is only used in the docs to explain what data we send back to users and isn't used to actually validate anything internally)

]
},
"Markers Extracted": {
"description": "Triggered when markers have been extracted.",
"type": "object",
"section": "Markers Extraction",
"properties": {
"trackers_count": {
"type": "integer",
"description": "Number of processed trackers."
}
},
"additionalProperties": false,
"required": [
"trackers_count"
]
},
"Markers Parsed": {
"description": "Triggered when markers have been successfully parsed.",
"type": "object",
"section": "Markers Extraction",
"properties": {
"marker_count": {
"type": "integer",
"description": "Number of parsed markers."
},
"max_depth": {
"type": "integer",
"description": "Maximum depth of the parsed markers."
},
"branching_factor": {
"type": "integer",
"description": "Maximum number of children of any of the parsed markers."
}
},
"additionalProperties": false,
"required": [
"marker_count",
"max_depth",
"branching_factor"
]
},
"Markers Statistics Computed": {
"description": "Triggered when marker statistics have been computed.",
"type": "object",
"section": "Markers Extraction",
"properties": {
"trackers_count": {
"type": "integer",
"description": "Number of processed trackers."
}
},
"additionalProperties": false,
"required": [
"trackers_count"
]
}
}
}
19 changes: 19 additions & 0 deletions rasa/cli/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import List, Text, Optional
from pathlib import Path

from rasa import telemetry
from rasa.core.utils import AvailableEndpoints
from rasa.core.tracker_store import TrackerStore
from rasa.core.evaluation.marker_tracker_loader import MarkerTrackerLoader
Expand Down Expand Up @@ -131,6 +132,13 @@ def _run_markers(
computed per session will be stored in
'<path-to-stats-folder>/statistics-per-session.csv'.
"""
telemetry.track_markers_extraction_initiated(
strategy=strategy,
only_extract=stats_file_prefix is not None,
seed=seed is not None,
count=count,
)

domain = Domain.load(domain_path) if domain_path else None
markers = Marker.from_path(config)
if domain and not markers.validate_against_domain(domain):
Expand All @@ -139,6 +147,17 @@ def _run_markers(
"Please see errors listed above and fix before running again."
)

# Calculate telemetry
# Subtract one to remove the virtual OR over all markers
num_markers = len(markers) - 1
Copy link
Contributor

@ka-bu ka-bu Nov 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the total number of all conditions and operators used in all marker configurations that were evaluated -- to get the number of user-defined markers we should

  1. get rid of the special case here and always add an "ANY_MARKER
  2. len(markers.sub_markers) instead of len(markers) ... My bad, iter gives us all conditions and operators but len is just the sub-markers all good 👍 - but because of 1. there might not be sub-markers if there is a single user defined marker

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@usc-m , I could help and add that while you work on other comments if you like?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could also track something like max( len(sub_marker) for sub_marker in markers), i.e. the maximum number of conditions and operators used in a single user defined marker, to get a glimpse of how complex the queries are

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I suggested that too, also the branching factor (maximum number of children under any marker)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah, that's also a nice idea -- will definitely tell us if people miss begin able to re-use a marker definition

max_depth = markers.max_depth() - 1
# Find maximum branching of marker
branching_factor = max(
len(sub_marker) - 1 for marker in markers.sub_markers for sub_marker in marker
)

telemetry.track_markers_parsed_count(num_markers, max_depth, branching_factor)

tracker_loader = _create_tracker_loader(endpoint_config, strategy, count, seed)

def _append_suffix(path: Optional[Path], suffix: Text) -> Optional[Path]:
Expand Down
27 changes: 21 additions & 6 deletions rasa/core/evaluation/marker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from rasa.shared.data import is_likely_yaml_file
from rasa.shared.exceptions import InvalidConfigException, RasaException
from rasa.shared.core.events import ActionExecuted, UserUttered, Event
from rasa import telemetry
from rasa.shared.core.domain import Domain
from rasa.shared.core.trackers import DialogueStateTracker
from rasa.utils.io import WriteRow
Expand Down Expand Up @@ -271,6 +272,11 @@ def validate_against_domain(self, domain: Domain) -> bool:
"""
...

@abstractmethod
def max_depth(self) -> int:
"""Gets the maximum depth from this point in the marker tree."""
...

def evaluate_events(
self, events: List[Event], recursive: bool = False
) -> List[SessionEvaluation]:
Expand Down Expand Up @@ -447,12 +453,8 @@ def from_path(cls, path: Union[Path, Text]) -> Marker:
loaded_markers.append(marker)

# combine the markers
if len(loaded_markers) > 1:
marker = OrMarker(markers=loaded_markers)
marker.name = Marker.ANY_MARKER # cannot be set via name parameter
else:
marker = loaded_markers[0]

marker = OrMarker(markers=loaded_markers)
marker.name = Marker.ANY_MARKER # cannot be set via name parameter
return marker

@staticmethod
Expand Down Expand Up @@ -608,6 +610,9 @@ def evaluate_trackers(
if tracker:
tracker_result = self.evaluate_events(tracker.events)
processed_trackers[tracker.sender_id] = tracker_result

processed_trackers_count = len(processed_trackers)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we really want the number of processed trackers or processed sessions (or both)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in this case processed trackers is actually useful because we get it before, as a command line argument (the users intent) and after (what they actually got). Might help to highlight issues in our models of how tracker stores work. Right now we don't collect session info anywhere so I'm not sure what info we could get from it, but we could also just collect it as well if you think it would be useful

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no clue how people use sender_ids and trackers usually, so no idea if that is better - but I guess it won't hurt if we don't add this now (and maybe later)

telemetry.track_markers_extracted(processed_trackers_count)
Marker._save_results(output_file, processed_trackers)

# Compute and write statistics if requested.
Expand All @@ -622,6 +627,8 @@ def evaluate_trackers(
session_idx=session_idx,
meta_data_on_relevant_events_per_marker=session_result,
)

telemetry.track_markers_stats_computed(processed_trackers_count)
if overall_stats_file:
stats.overall_statistic_to_csv(path=overall_stats_file)
if session_stats_file:
Expand Down Expand Up @@ -756,6 +763,10 @@ def validate_against_domain(self, domain: Domain) -> bool:
marker.validate_against_domain(domain) for marker in self.sub_markers
)

def max_depth(self) -> int:
"""Gets the maximum depth from this point in the marker tree."""
return 1 + max(child.max_depth() for child in self.sub_markers)

@staticmethod
def from_tag_and_sub_config(
tag: Text, sub_config: Any, name: Optional[Text] = None,
Expand Down Expand Up @@ -837,6 +848,10 @@ def __len__(self) -> int:
"""Returns the count of all markers that are part of this marker."""
return 1

def max_depth(self) -> int:
"""Gets the maximum depth from this point in the marker tree."""
return 1

@staticmethod
def from_tag_and_sub_config(
tag: Text, sub_config: Any, name: Optional[Text] = None
Expand Down
73 changes: 73 additions & 0 deletions rasa/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@
TELEMETRY_VISUALIZATION_STARTED_EVENT = "Story Visualization Started"
TELEMETRY_TEST_CORE_EVENT = "Model Core Tested"
TELEMETRY_TEST_NLU_EVENT = "Model NLU Tested"
TELEMETRY_MARKERS_EXTRACTION_INITIATED_EVENT = "Markers Extraction Initiated"
TELEMETRY_MARKERS_EXTRACTED_EVENT = "Markers Extracted"
TELEMETRY_MARKERS_STATS_COMPUTED_EVENT = "Markers Statistics Computed"
TELEMETRY_MARKERS_PARSED_COUNT = "Markers Parsed"

# used to calculate the context on the first call and cache it afterwards
TELEMETRY_CONTEXT = None
Expand Down Expand Up @@ -1005,3 +1009,72 @@ def track_nlu_model_test(test_data: "TrainingData") -> None:
"num_regexes": len(test_data.regex_features),
},
)


@ensure_telemetry_enabled
def track_markers_extraction_initiated(
strategy: Text, only_extract: bool, seed: bool, count: Optional[int],
) -> None:
"""Track when a user tries to extract success markers.

Args:
strategy: The strategy the user is using for tracker selection
only_extract: Indicates if the user is only extracting markers or also
producing stats
seed: Indicates if the user used a seed for this attempt
count: (Optional) The number of trackers the user is trying to select.
"""
_track(
TELEMETRY_MARKERS_EXTRACTION_INITIATED_EVENT,
{
"strategy": strategy,
"only_extract": only_extract,
"seed": seed,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will report the actual seed used by the user - do we want the actual seed or do we want to just know if they used a seed? Is this something that's worth changing or are we considering the actual seed value not important enough to be careful about not collecting?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We only want to know if the user set a seed. I don't think it counts as private info so maybe it doesn't matter if we collect it. But if we can do true/false that might be better.

"count": count,
},
)


@ensure_telemetry_enabled
def track_markers_extracted(trackers_count: int) -> None:
"""Track when markers have been extracted by a user.

Args:
trackers_count: The actual number of trackers processed
"""
_track(
TELEMETRY_MARKERS_EXTRACTED_EVENT, {"trackers_count": trackers_count},
)


@ensure_telemetry_enabled
def track_markers_stats_computed(trackers_count: int) -> None:
"""Track when stats over markers have been computed by a user.

Args:
trackers_count: The actual number of trackers processed
"""
_track(
TELEMETRY_MARKERS_STATS_COMPUTED_EVENT, {"trackers_count": trackers_count},
)


@ensure_telemetry_enabled
def track_markers_parsed_count(
marker_count: int, max_depth: int, branching_factor: int
) -> None:
"""Track when markers have been successfully parsed from config.

Args:
marker_count: The number of markers found in the config
max_depth: The maximum depth of any marker in the config
branching_factor: The maximum number of children of any marker in the config.
"""
_track(
TELEMETRY_MARKERS_PARSED_COUNT,
{
"marker_count": marker_count,
"max_depth": max_depth,
"branching_factor": branching_factor,
},
)
46 changes: 46 additions & 0 deletions tests/core/evaluation/test_marker.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,32 @@ def test_marker_from_path_only_reads_yamls(tmp_path: Path,):
)


@pytest.mark.parametrize(
"configs",
[
{"my_marker1": {IntentDetectedMarker.positive_tag(): "this-intent"}},
{
"my_marker1": {IntentDetectedMarker.positive_tag(): "this-intent"},
"my_marker2": {IntentDetectedMarker.positive_tag(): "this-action"},
},
],
)
def test_marker_from_path_adds_special_or_marker(tmp_path: Path, configs: Any):

yaml_file = tmp_path / "config.yml"
rasa.shared.utils.io.write_yaml(
data=configs, target=yaml_file,
)
loaded = Marker.from_path(tmp_path)
assert isinstance(loaded, OrMarker)
assert loaded.name == Marker.ANY_MARKER
assert len(loaded.sub_markers) == len(configs)
assert all(
isinstance(sub_marker, IntentDetectedMarker)
for sub_marker in loaded.sub_markers
)


@pytest.mark.parametrize(
"path_config_tuples",
[
Expand Down Expand Up @@ -656,3 +682,23 @@ def test_marker_from_path_raises(
rasa.shared.utils.io.write_yaml(data=config, target=full_path)
with pytest.raises(InvalidMarkerConfig):
Marker.from_path(tmp_path)


@pytest.mark.parametrize(
"marker,expected_depth",
[
(
AndMarker(
markers=[
SlotSetMarker("s1"),
OrMarker([IntentDetectedMarker("4"), IntentDetectedMarker("6"),]),
],
),
3,
),
(SlotSetMarker("s1"), 1),
(AndMarker(markers=[SlotSetMarker("s1"), IntentDetectedMarker("6"),],), 2),
],
)
def test_marker_depth(marker: Marker, expected_depth: int):
assert marker.max_depth() == expected_depth
10 changes: 9 additions & 1 deletion tests/test_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,18 @@ async def test_events_schema(

telemetry.track_nlu_model_test(TrainingData())

telemetry.track_markers_extraction_initiated("all", False, False, None)

telemetry.track_markers_extracted(1)

telemetry.track_markers_stats_computed(1)

telemetry.track_markers_parsed_count(1, 1, 1)

pending = asyncio.all_tasks() - initial
await asyncio.gather(*pending)

assert mock.call_count == 15
assert mock.call_count == 19

for args, _ in mock.call_args_list:
event = args[0]
Expand Down