From 2d32b837037777ece86913003b5551ba8bd90c57 Mon Sep 17 00:00:00 2001 From: alwx Date: Tue, 2 Nov 2021 18:00:06 +0100 Subject: [PATCH 01/18] Markers telemetry --- rasa/cli/evaluate.py | 5 ++++- rasa/core/evaluation/marker_base.py | 6 ++++++ rasa/telemetry.py | 33 +++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index 5352d63ccad3..2fcaff38d324 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -2,6 +2,7 @@ from typing import List, Text, Optional from pathlib import Path +from rasa import telemetry from rasa.core.utils import AvailableEndpoints from rasa.core.tracker_store import TrackerStore from rasa.core.evaluation.marker_tracker_loader import MarkerTrackerLoader @@ -131,7 +132,9 @@ def _run_markers( computed per session will be stored in '/statistics-per-session.csv'. """ - domain = Domain.load(domain_path) if domain_path else None + telemetry.track_markers_evaluation_triggered() + + domain = Domain.load(domain_path) if domain_path else None markers = Marker.from_path(config) if domain and not markers.validate_against_domain(domain): rasa.shared.utils.cli.print_error_and_exit( diff --git a/rasa/core/evaluation/marker_base.py b/rasa/core/evaluation/marker_base.py index ae22bbe0f698..1f5fe23b766b 100644 --- a/rasa/core/evaluation/marker_base.py +++ b/rasa/core/evaluation/marker_base.py @@ -26,6 +26,7 @@ from rasa.shared.data import is_likely_yaml_file from rasa.shared.exceptions import InvalidConfigException, RasaException from rasa.shared.core.events import ActionExecuted, UserUttered, Event +from rasa import telemetry from rasa.shared.core.domain import Domain from rasa.shared.core.trackers import DialogueStateTracker from rasa.utils.io import WriteRow @@ -608,6 +609,9 @@ def evaluate_trackers( if tracker: tracker_result = self.evaluate_events(tracker.events) processed_trackers[tracker.sender_id] = tracker_result + + processed_trackers_count = len(processed_trackers) + telemetry.track_markers_extracted(processed_trackers_count) Marker._save_results(output_file, processed_trackers) # Compute and write statistics if requested. @@ -622,6 +626,8 @@ def evaluate_trackers( session_idx=session_idx, meta_data_on_relevant_events_per_marker=session_result, ) + + telemetry.track_markers_stats_computed(processed_trackers_count) if overall_stats_file: stats.overall_statistic_to_csv(path=overall_stats_file) if session_stats_file: diff --git a/rasa/telemetry.py b/rasa/telemetry.py index af4050567da9..1db3b72f13f1 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -94,6 +94,9 @@ TELEMETRY_VISUALIZATION_STARTED_EVENT = "Story Visualization Started" TELEMETRY_TEST_CORE_EVENT = "Model Core Tested" TELEMETRY_TEST_NLU_EVENT = "Model NLU Tested" +TELEMETRY_MARKERS_EVALUATION_TRIGGERED_EVENT = "Markers Evaluation Triggered" +TELEMETRY_MARKERS_EXTRACTED_EVENT = "Markers Extracted" +TELEMETRY_MARKERS_STATS_COMPUTED_EVENT = "Markers Stats Computed" # used to calculate the context on the first call and cache it afterwards TELEMETRY_CONTEXT = None @@ -1005,3 +1008,33 @@ def track_nlu_model_test(test_data: "TrainingData") -> None: "num_regexes": len(test_data.regex_features), }, ) + + +@ensure_telemetry_enabled +def track_markers_evaluation_triggered() -> None: + _track( + TELEMETRY_MARKERS_EVALUATION_TRIGGERED_EVENT, + { + + }, + ) + + +@ensure_telemetry_enabled +def track_markers_extracted(count: int) -> None: + _track( + TELEMETRY_MARKERS_EXTRACTED_EVENT, + { + "count": count + }, + ) + + +@ensure_telemetry_enabled +def track_markers_stats_computed(count: int) -> None: + _track( + TELEMETRY_MARKERS_STATS_COMPUTED_EVENT, + { + "count": count + }, + ) \ No newline at end of file From 56fb74fac0453abdd864fe889f8a3dd6d88503c2 Mon Sep 17 00:00:00 2001 From: alwx Date: Tue, 2 Nov 2021 18:11:26 +0100 Subject: [PATCH 02/18] Everything without tests --- rasa/cli/evaluate.py | 7 ++++++- rasa/telemetry.py | 12 ++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index 2fcaff38d324..42c86b6e25fa 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -132,7 +132,12 @@ def _run_markers( computed per session will be stored in '/statistics-per-session.csv'. """ - telemetry.track_markers_evaluation_triggered() + telemetry.track_markers_evaluation_triggered( + strategy=strategy, + only_extract=stats_file is None, + seed=seed, + count=count, + ) domain = Domain.load(domain_path) if domain_path else None markers = Marker.from_path(config) diff --git a/rasa/telemetry.py b/rasa/telemetry.py index 1db3b72f13f1..3d3e1e088949 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -1011,11 +1011,19 @@ def track_nlu_model_test(test_data: "TrainingData") -> None: @ensure_telemetry_enabled -def track_markers_evaluation_triggered() -> None: +def track_markers_evaluation_triggered( + strategy: Text, + only_extract: bool, + seed: Optional[int], + count: Optional[int], +) -> None: _track( TELEMETRY_MARKERS_EVALUATION_TRIGGERED_EVENT, { - + "strategy": strategy, + "only_extract": only_extract, + "seed": seed, + "count": count, }, ) From 2d8b79e882efb92badc08854dbcc76218580907a Mon Sep 17 00:00:00 2001 From: alwx Date: Tue, 2 Nov 2021 18:32:28 +0100 Subject: [PATCH 03/18] Specified events.json --- docs/docs/telemetry/events.json | 58 +++++++++++++++++++++++++++++++++ rasa/cli/evaluate.py | 7 ++-- rasa/telemetry.py | 27 +++++---------- 3 files changed, 69 insertions(+), 23 deletions(-) diff --git a/docs/docs/telemetry/events.json b/docs/docs/telemetry/events.json index 94017d0d4eb0..fce5617a0112 100644 --- a/docs/docs/telemetry/events.json +++ b/docs/docs/telemetry/events.json @@ -3,6 +3,7 @@ "Model Training", "Model Testing", "Model Serving", + "Markers Evaluation", "Data Handling", "Miscellaneous" ], @@ -422,6 +423,63 @@ "num_synonyms", "num_regexes" ] + }, + "Markers Evaluation Initiated": { + "description": "Triggered when markers evaluation has been initiated.", + "type": "object", + "section": "Markers Evaluation", + "properties": { + "strategy": { + "type": "string", + "description": "Strategy to use when selecting trackers to extract from." + }, + "only_extract": { + "type": "boolean", + "description": "Indicates if path to write out statistics hasn't been specified." + }, + "seed": { + "type": ["string", "null"], + "description": "The seed to initialise the random number generator for use with the 'sample' strategy." + }, + "count": { + "type": ["string", "null"], + "description": "Number of trackers to extract from (for any strategy except 'all')." + } + }, + "additionalProperties": false, + "required": [ + "count" + ] + }, + "Markers Extracted": { + "description": "Triggered when markers has been extracted.", + "type": "object", + "section": "Markers Evaluation", + "properties": { + "trackers_count": { + "type": "integer", + "description": "Number of processed trackers." + } + }, + "additionalProperties": false, + "required": [ + "trackers_count" + ] + }, + "Markers Stats Computed": { + "description": "Triggered when marker stats has been computed.", + "type": "object", + "section": "Markers Evaluation", + "properties": { + "trackers_count": { + "type": "integer", + "description": "Number of processed trackers." + } + }, + "additionalProperties": false, + "required": [ + "trackers_count" + ] } } } diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index 42c86b6e25fa..de3d3fdc56b5 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -132,11 +132,8 @@ def _run_markers( computed per session will be stored in '/statistics-per-session.csv'. """ - telemetry.track_markers_evaluation_triggered( - strategy=strategy, - only_extract=stats_file is None, - seed=seed, - count=count, + telemetry.track_markers_evaluation_initiated( + strategy=strategy, only_extract=stats_file is not None, seed=seed, count=count, ) domain = Domain.load(domain_path) if domain_path else None diff --git a/rasa/telemetry.py b/rasa/telemetry.py index 3d3e1e088949..9569a2e1a24c 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -94,7 +94,7 @@ TELEMETRY_VISUALIZATION_STARTED_EVENT = "Story Visualization Started" TELEMETRY_TEST_CORE_EVENT = "Model Core Tested" TELEMETRY_TEST_NLU_EVENT = "Model NLU Tested" -TELEMETRY_MARKERS_EVALUATION_TRIGGERED_EVENT = "Markers Evaluation Triggered" +TELEMETRY_MARKERS_EVALUATION_INITIATED_EVENT = "Markers Evaluation Initiated" TELEMETRY_MARKERS_EXTRACTED_EVENT = "Markers Extracted" TELEMETRY_MARKERS_STATS_COMPUTED_EVENT = "Markers Stats Computed" @@ -1011,14 +1011,11 @@ def track_nlu_model_test(test_data: "TrainingData") -> None: @ensure_telemetry_enabled -def track_markers_evaluation_triggered( - strategy: Text, - only_extract: bool, - seed: Optional[int], - count: Optional[int], +def track_markers_evaluation_initiated( + strategy: Text, only_extract: bool, seed: Optional[int], count: Optional[int], ) -> None: _track( - TELEMETRY_MARKERS_EVALUATION_TRIGGERED_EVENT, + TELEMETRY_MARKERS_EVALUATION_INITIATED_EVENT, { "strategy": strategy, "only_extract": only_extract, @@ -1029,20 +1026,14 @@ def track_markers_evaluation_triggered( @ensure_telemetry_enabled -def track_markers_extracted(count: int) -> None: +def track_markers_extracted(trackers_count: int) -> None: _track( - TELEMETRY_MARKERS_EXTRACTED_EVENT, - { - "count": count - }, + TELEMETRY_MARKERS_EXTRACTED_EVENT, {"trackers_count": trackers_count}, ) @ensure_telemetry_enabled -def track_markers_stats_computed(count: int) -> None: +def track_markers_stats_computed(trackers_count: int) -> None: _track( - TELEMETRY_MARKERS_STATS_COMPUTED_EVENT, - { - "count": count - }, - ) \ No newline at end of file + TELEMETRY_MARKERS_STATS_COMPUTED_EVENT, {"trackers_count": trackers_count}, + ) From 4f7a6ff0d36a04fad93a2e903204f80ab1f7c240 Mon Sep 17 00:00:00 2001 From: alwx Date: Tue, 2 Nov 2021 18:34:46 +0100 Subject: [PATCH 04/18] Test added --- tests/test_telemetry.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py index 813c095812f6..dfaf6255d51c 100644 --- a/tests/test_telemetry.py +++ b/tests/test_telemetry.py @@ -81,10 +81,16 @@ async def test_events_schema( telemetry.track_nlu_model_test(TrainingData()) + telemetry.track_markers_evaluation_initiated("all", False, None, None) + + telemetry.track_markers_extracted(1) + + telemetry.track_markers_stats_computed(1) + pending = asyncio.all_tasks() - initial await asyncio.gather(*pending) - assert mock.call_count == 15 + assert mock.call_count == 18 for args, _ in mock.call_args_list: event = args[0] From 15c1c6dbcb95509bffce75dde3eea366dd72d06e Mon Sep 17 00:00:00 2001 From: alwx Date: Tue, 2 Nov 2021 18:36:46 +0100 Subject: [PATCH 05/18] Changelog entry --- changelog/10065.misc.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/10065.misc.md diff --git a/changelog/10065.misc.md b/changelog/10065.misc.md new file mode 100644 index 000000000000..620cd7b398bb --- /dev/null +++ b/changelog/10065.misc.md @@ -0,0 +1 @@ +Add telemetry to markers evaluation. \ No newline at end of file From fd1d7e022421b8341b74948d4dab5e2c17277d2c Mon Sep 17 00:00:00 2001 From: alwx Date: Wed, 3 Nov 2021 09:56:51 +0100 Subject: [PATCH 06/18] Naming fixes --- changelog/10065.misc.md | 2 +- docs/docs/telemetry/events.json | 17 ++++++++++------- rasa/cli/evaluate.py | 2 +- rasa/telemetry.py | 6 +++--- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/changelog/10065.misc.md b/changelog/10065.misc.md index 620cd7b398bb..259e2eb009fe 100644 --- a/changelog/10065.misc.md +++ b/changelog/10065.misc.md @@ -1 +1 @@ -Add telemetry to markers evaluation. \ No newline at end of file +Add telemetry to markers extraction. \ No newline at end of file diff --git a/docs/docs/telemetry/events.json b/docs/docs/telemetry/events.json index fce5617a0112..a2e6c5f85aa5 100644 --- a/docs/docs/telemetry/events.json +++ b/docs/docs/telemetry/events.json @@ -3,7 +3,7 @@ "Model Training", "Model Testing", "Model Serving", - "Markers Evaluation", + "Markers Extraction", "Data Handling", "Miscellaneous" ], @@ -424,10 +424,10 @@ "num_regexes" ] }, - "Markers Evaluation Initiated": { - "description": "Triggered when markers evaluation has been initiated.", + "Markers Extraction Initiated": { + "description": "Triggered when markers extraction has been initiated.", "type": "object", - "section": "Markers Evaluation", + "section": "Markers Extraction", "properties": { "strategy": { "type": "string", @@ -448,13 +448,16 @@ }, "additionalProperties": false, "required": [ + "strategy", + "only_extract", + "seed", "count" ] }, "Markers Extracted": { - "description": "Triggered when markers has been extracted.", + "description": "Triggered when markers have been extracted.", "type": "object", - "section": "Markers Evaluation", + "section": "Markers Extraction", "properties": { "trackers_count": { "type": "integer", @@ -469,7 +472,7 @@ "Markers Stats Computed": { "description": "Triggered when marker stats has been computed.", "type": "object", - "section": "Markers Evaluation", + "section": "Markers Extraction", "properties": { "trackers_count": { "type": "integer", diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index de3d3fdc56b5..01847ca1e724 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -132,7 +132,7 @@ def _run_markers( computed per session will be stored in '/statistics-per-session.csv'. """ - telemetry.track_markers_evaluation_initiated( + telemetry.track_markers_extraction_initiated( strategy=strategy, only_extract=stats_file is not None, seed=seed, count=count, ) diff --git a/rasa/telemetry.py b/rasa/telemetry.py index 9569a2e1a24c..635f4885660d 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -94,7 +94,7 @@ TELEMETRY_VISUALIZATION_STARTED_EVENT = "Story Visualization Started" TELEMETRY_TEST_CORE_EVENT = "Model Core Tested" TELEMETRY_TEST_NLU_EVENT = "Model NLU Tested" -TELEMETRY_MARKERS_EVALUATION_INITIATED_EVENT = "Markers Evaluation Initiated" +TELEMETRY_MARKERS_EXTRACTION_INITIATED_EVENT = "Markers Extraction Initiated" TELEMETRY_MARKERS_EXTRACTED_EVENT = "Markers Extracted" TELEMETRY_MARKERS_STATS_COMPUTED_EVENT = "Markers Stats Computed" @@ -1011,11 +1011,11 @@ def track_nlu_model_test(test_data: "TrainingData") -> None: @ensure_telemetry_enabled -def track_markers_evaluation_initiated( +def track_markers_extraction_initiated( strategy: Text, only_extract: bool, seed: Optional[int], count: Optional[int], ) -> None: _track( - TELEMETRY_MARKERS_EVALUATION_INITIATED_EVENT, + TELEMETRY_MARKERS_EXTRACTION_INITIATED_EVENT, { "strategy": strategy, "only_extract": only_extract, From c95dcde6abc378a8aabfa763772ec24af395cbbe Mon Sep 17 00:00:00 2001 From: Matthew Summers Date: Wed, 3 Nov 2021 14:27:02 +0000 Subject: [PATCH 07/18] Fix lint, fix CLI bug No way to actually change config path, now fixed with nargs --- rasa/cli/arguments/evaluate.py | 1 + rasa/cli/evaluate.py | 7 +++++-- rasa/core/evaluation/marker_base.py | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/rasa/cli/arguments/evaluate.py b/rasa/cli/arguments/evaluate.py index 5b4f7a6de5c2..262bd41674e8 100644 --- a/rasa/cli/arguments/evaluate.py +++ b/rasa/cli/arguments/evaluate.py @@ -13,6 +13,7 @@ def set_markers_arguments(parser: argparse.ArgumentParser) -> None: parser.add_argument( "--config", default="markers.yml", + nargs="?", type=str, help="The config file(s) containing marker definitions. This can be a single " "YAML file, or a directory that contains several files with marker " diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index 01847ca1e724..a4fc524b8c87 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -133,10 +133,13 @@ def _run_markers( '/statistics-per-session.csv'. """ telemetry.track_markers_extraction_initiated( - strategy=strategy, only_extract=stats_file is not None, seed=seed, count=count, + strategy=strategy, + only_extract=stats_file_prefix is not None, + seed=seed, + count=count, ) - domain = Domain.load(domain_path) if domain_path else None + domain = Domain.load(domain_path) if domain_path else None markers = Marker.from_path(config) if domain and not markers.validate_against_domain(domain): rasa.shared.utils.cli.print_error_and_exit( diff --git a/rasa/core/evaluation/marker_base.py b/rasa/core/evaluation/marker_base.py index 1f5fe23b766b..27f6908e8efd 100644 --- a/rasa/core/evaluation/marker_base.py +++ b/rasa/core/evaluation/marker_base.py @@ -610,7 +610,7 @@ def evaluate_trackers( tracker_result = self.evaluate_events(tracker.events) processed_trackers[tracker.sender_id] = tracker_result - processed_trackers_count = len(processed_trackers) + processed_trackers_count = len(processed_trackers) telemetry.track_markers_extracted(processed_trackers_count) Marker._save_results(output_file, processed_trackers) @@ -627,7 +627,7 @@ def evaluate_trackers( meta_data_on_relevant_events_per_marker=session_result, ) - telemetry.track_markers_stats_computed(processed_trackers_count) + telemetry.track_markers_stats_computed(processed_trackers_count) if overall_stats_file: stats.overall_statistic_to_csv(path=overall_stats_file) if session_stats_file: From 7359438ff7fa428e295f1526edcfadd263a1cad5 Mon Sep 17 00:00:00 2001 From: Matthew Summers Date: Wed, 3 Nov 2021 14:36:33 +0000 Subject: [PATCH 08/18] Add markers parsed telemetry --- docs/docs/telemetry/events.json | 15 +++++++++++++++ rasa/cli/evaluate.py | 4 ++++ rasa/telemetry.py | 8 ++++++++ 3 files changed, 27 insertions(+) diff --git a/docs/docs/telemetry/events.json b/docs/docs/telemetry/events.json index a2e6c5f85aa5..b0cffa9fcaa3 100644 --- a/docs/docs/telemetry/events.json +++ b/docs/docs/telemetry/events.json @@ -469,6 +469,21 @@ "trackers_count" ] }, + "Markers Parsed": { + "description": "Triggered when markers have been successfully parsed.", + "type": "object", + "section": "Markers Extraction", + "properties": { + "marker_count": { + "type": "integer", + "description": "Number of parsed markers." + } + }, + "additionalProperties": false, + "required": [ + "marker_count" + ] + }, "Markers Stats Computed": { "description": "Triggered when marker stats has been computed.", "type": "object", diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index a4fc524b8c87..60c57796baa1 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -147,6 +147,10 @@ def _run_markers( "Please see errors listed above and fix before running again." ) + # Subtract one to remove the virtual OR over all markers + num_markers = len(markers) - 1 + telemetry.track_markers_parsed_count(num_markers) + tracker_loader = _create_tracker_loader(endpoint_config, strategy, count, seed) def _append_suffix(path: Optional[Path], suffix: Text) -> Optional[Path]: diff --git a/rasa/telemetry.py b/rasa/telemetry.py index 635f4885660d..b87e909b641c 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -97,6 +97,7 @@ TELEMETRY_MARKERS_EXTRACTION_INITIATED_EVENT = "Markers Extraction Initiated" TELEMETRY_MARKERS_EXTRACTED_EVENT = "Markers Extracted" TELEMETRY_MARKERS_STATS_COMPUTED_EVENT = "Markers Stats Computed" +TELEMETRY_MARKERS_PARSED_COUNT = "Markers Parsed" # used to calculate the context on the first call and cache it afterwards TELEMETRY_CONTEXT = None @@ -1037,3 +1038,10 @@ def track_markers_stats_computed(trackers_count: int) -> None: _track( TELEMETRY_MARKERS_STATS_COMPUTED_EVENT, {"trackers_count": trackers_count}, ) + + +@ensure_telemetry_enabled +def track_markers_parsed_count(marker_count: int) -> None: + _track( + TELEMETRY_MARKERS_PARSED_COUNT, {"marker_count": marker_count}, + ) From 1b4535a53ad366f6e360efabde852fef10295117 Mon Sep 17 00:00:00 2001 From: Matthew Summers Date: Wed, 3 Nov 2021 14:41:54 +0000 Subject: [PATCH 09/18] Document telemetry functions --- rasa/telemetry.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/rasa/telemetry.py b/rasa/telemetry.py index b87e909b641c..cc53d47befd4 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -1015,6 +1015,16 @@ def track_nlu_model_test(test_data: "TrainingData") -> None: def track_markers_extraction_initiated( strategy: Text, only_extract: bool, seed: Optional[int], count: Optional[int], ) -> None: + """Track when a user tries to extract success markers. + + Args: + strategy: The strategy the user is using for tracker selection + only_extract: Indicates if the user is only extracting markers or also + producing stats + seed: (Optional) The seed used if strategy is 'sample' and the user selects + one + count: (Optional) The number of trackers the user is trying to select. + """ _track( TELEMETRY_MARKERS_EXTRACTION_INITIATED_EVENT, { @@ -1028,6 +1038,11 @@ def track_markers_extraction_initiated( @ensure_telemetry_enabled def track_markers_extracted(trackers_count: int) -> None: + """Track when markers have been extracted by a user. + + Args: + trackers_count: The actual number of trackers processed + """ _track( TELEMETRY_MARKERS_EXTRACTED_EVENT, {"trackers_count": trackers_count}, ) @@ -1035,6 +1050,11 @@ def track_markers_extracted(trackers_count: int) -> None: @ensure_telemetry_enabled def track_markers_stats_computed(trackers_count: int) -> None: + """Track when stats over markers have been computed by a user. + + Args: + trackers_count: The actual number of trackers processed + """ _track( TELEMETRY_MARKERS_STATS_COMPUTED_EVENT, {"trackers_count": trackers_count}, ) @@ -1042,6 +1062,11 @@ def track_markers_stats_computed(trackers_count: int) -> None: @ensure_telemetry_enabled def track_markers_parsed_count(marker_count: int) -> None: + """Track when markers have been successfully parsed from config. + + Args: + marker_count: The number of markers found in the config + """ _track( TELEMETRY_MARKERS_PARSED_COUNT, {"marker_count": marker_count}, ) From 015cf4d3fa0fcff1c7c5c98bc9373ec10a8c715b Mon Sep 17 00:00:00 2001 From: ka-bu Date: Wed, 3 Nov 2021 16:39:34 +0100 Subject: [PATCH 10/18] always add ANY_MARKER; add test --- rasa/core/evaluation/marker_base.py | 10 +++------- tests/core/evaluation/test_marker.py | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/rasa/core/evaluation/marker_base.py b/rasa/core/evaluation/marker_base.py index 27f6908e8efd..83ca2ff85dee 100644 --- a/rasa/core/evaluation/marker_base.py +++ b/rasa/core/evaluation/marker_base.py @@ -407,7 +407,7 @@ def from_path(cls, path: Union[Path, Text]) -> Marker: that the names of the markers defined in these files are unique across all loaded files. - Note that all loaded markers will be combined into one marker via one + Note that all loaded markers will be combined into one marker via onetet artificial OR-operator. When evaluating the resulting marker, then the artificial OR-operator will be ignored and results will be reported for every sub-marker. @@ -448,12 +448,8 @@ def from_path(cls, path: Union[Path, Text]) -> Marker: loaded_markers.append(marker) # combine the markers - if len(loaded_markers) > 1: - marker = OrMarker(markers=loaded_markers) - marker.name = Marker.ANY_MARKER # cannot be set via name parameter - else: - marker = loaded_markers[0] - + marker = OrMarker(markers=loaded_markers) + marker.name = Marker.ANY_MARKER # cannot be set via name parameter return marker @staticmethod diff --git a/tests/core/evaluation/test_marker.py b/tests/core/evaluation/test_marker.py index cc1d9858d201..6745b49aa18f 100644 --- a/tests/core/evaluation/test_marker.py +++ b/tests/core/evaluation/test_marker.py @@ -618,6 +618,32 @@ def test_marker_from_path_only_reads_yamls(tmp_path: Path,): ) +@pytest.mark.parametrize( + "configs", + [ + {"my_marker1": {IntentDetectedMarker.positive_tag(): "this-intent"}}, + { + "my_marker1": {IntentDetectedMarker.positive_tag(): "this-intent"}, + "my_marker2": {IntentDetectedMarker.positive_tag(): "this-action"}, + }, + ], +) +def test_marker_from_path_adds_special_or_marker(tmp_path: Path, configs: Any): + + yaml_file = tmp_path / "config.yml" + rasa.shared.utils.io.write_yaml( + data=configs, target=yaml_file, + ) + loaded = Marker.from_path(tmp_path) + assert isinstance(loaded, OrMarker) + assert loaded.name == Marker.ANY_MARKER + assert len(loaded.sub_markers) == len(configs) + assert all( + isinstance(sub_marker, IntentDetectedMarker) + for sub_marker in loaded.sub_markers + ) + + @pytest.mark.parametrize( "path_config_tuples", [ From bef34e88c7722a72d90f1ce8e859a585381a7112 Mon Sep 17 00:00:00 2001 From: Matthew Summers Date: Wed, 3 Nov 2021 16:37:20 +0000 Subject: [PATCH 11/18] Add complexity telemetry --- docs/docs/telemetry/events.json | 16 +++++++++++++--- rasa/cli/evaluate.py | 7 ++++++- rasa/core/evaluation/marker_base.py | 15 ++++++++++++++- rasa/telemetry.py | 18 +++++++++++++----- tests/core/evaluation/test_marker.py | 20 ++++++++++++++++++++ 5 files changed, 66 insertions(+), 10 deletions(-) diff --git a/docs/docs/telemetry/events.json b/docs/docs/telemetry/events.json index b0cffa9fcaa3..18435cb29f55 100644 --- a/docs/docs/telemetry/events.json +++ b/docs/docs/telemetry/events.json @@ -438,11 +438,11 @@ "description": "Indicates if path to write out statistics hasn't been specified." }, "seed": { - "type": ["string", "null"], + "type": "boolean", "description": "The seed to initialise the random number generator for use with the 'sample' strategy." }, "count": { - "type": ["string", "null"], + "type": ["integer", "null"], "description": "Number of trackers to extract from (for any strategy except 'all')." } }, @@ -477,11 +477,21 @@ "marker_count": { "type": "integer", "description": "Number of parsed markers." + }, + "max_depth": { + "type": "integer", + "description": "Maximum depth of the parsed markers." + }, + "branching_factor": { + "type": "integer", + "description": "Maximum number of children of any of the parsed markers." } }, "additionalProperties": false, "required": [ - "marker_count" + "marker_count", + "max_depth", + "branching_factor" ] }, "Markers Stats Computed": { diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index 60c57796baa1..4335b0334c0d 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -147,9 +147,14 @@ def _run_markers( "Please see errors listed above and fix before running again." ) + # Calculate telemetry # Subtract one to remove the virtual OR over all markers num_markers = len(markers) - 1 - telemetry.track_markers_parsed_count(num_markers) + max_depth = markers.depth() - 1 + # Find maximum branching of marker + branching_factor = max(len(marker) - 1 for marker in markers) + + telemetry.track_markers_parsed_count(num_markers, max_depth, branching_factor) tracker_loader = _create_tracker_loader(endpoint_config, strategy, count, seed) diff --git a/rasa/core/evaluation/marker_base.py b/rasa/core/evaluation/marker_base.py index 83ca2ff85dee..cdad03162dda 100644 --- a/rasa/core/evaluation/marker_base.py +++ b/rasa/core/evaluation/marker_base.py @@ -272,6 +272,11 @@ def validate_against_domain(self, domain: Domain) -> bool: """ ... + @abstractmethod + def max_depth(self) -> int: + """Gets the maximum depth from this point in the marker tree.""" + ... + def evaluate_events( self, events: List[Event], recursive: bool = False ) -> List[SessionEvaluation]: @@ -407,7 +412,7 @@ def from_path(cls, path: Union[Path, Text]) -> Marker: that the names of the markers defined in these files are unique across all loaded files. - Note that all loaded markers will be combined into one marker via onetet + Note that all loaded markers will be combined into one marker via one artificial OR-operator. When evaluating the resulting marker, then the artificial OR-operator will be ignored and results will be reported for every sub-marker. @@ -758,6 +763,10 @@ def validate_against_domain(self, domain: Domain) -> bool: marker.validate_against_domain(domain) for marker in self.sub_markers ) + def max_depth(self) -> int: + """Gets the maximum depth from this point in the marker tree.""" + return 1 + max(child.max_depth() for child in self.sub_markers) + @staticmethod def from_tag_and_sub_config( tag: Text, sub_config: Any, name: Optional[Text] = None, @@ -839,6 +848,10 @@ def __len__(self) -> int: """Returns the count of all markers that are part of this marker.""" return 1 + def max_depth(self) -> int: + """Gets the maximum depth from this point in the marker tree.""" + return 1 + @staticmethod def from_tag_and_sub_config( tag: Text, sub_config: Any, name: Optional[Text] = None diff --git a/rasa/telemetry.py b/rasa/telemetry.py index cc53d47befd4..bf8b845a1716 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -1013,7 +1013,7 @@ def track_nlu_model_test(test_data: "TrainingData") -> None: @ensure_telemetry_enabled def track_markers_extraction_initiated( - strategy: Text, only_extract: bool, seed: Optional[int], count: Optional[int], + strategy: Text, only_extract: bool, seed: bool, count: Optional[int], ) -> None: """Track when a user tries to extract success markers. @@ -1021,8 +1021,7 @@ def track_markers_extraction_initiated( strategy: The strategy the user is using for tracker selection only_extract: Indicates if the user is only extracting markers or also producing stats - seed: (Optional) The seed used if strategy is 'sample' and the user selects - one + seed: Indicates if the user used a seed for this attempt count: (Optional) The number of trackers the user is trying to select. """ _track( @@ -1061,12 +1060,21 @@ def track_markers_stats_computed(trackers_count: int) -> None: @ensure_telemetry_enabled -def track_markers_parsed_count(marker_count: int) -> None: +def track_markers_parsed_count( + marker_count: int, max_depth: int, branching_factor: int +) -> None: """Track when markers have been successfully parsed from config. Args: marker_count: The number of markers found in the config + max_depth: The maximum depth of any marker in the config + branching_factor: The maximum number of children of any marker in the config. """ _track( - TELEMETRY_MARKERS_PARSED_COUNT, {"marker_count": marker_count}, + TELEMETRY_MARKERS_PARSED_COUNT, + { + "marker_count": marker_count, + "max_depth": max_depth, + "branching_factor": branching_factor, + }, ) diff --git a/tests/core/evaluation/test_marker.py b/tests/core/evaluation/test_marker.py index 6745b49aa18f..ced5fa514f4b 100644 --- a/tests/core/evaluation/test_marker.py +++ b/tests/core/evaluation/test_marker.py @@ -682,3 +682,23 @@ def test_marker_from_path_raises( rasa.shared.utils.io.write_yaml(data=config, target=full_path) with pytest.raises(InvalidMarkerConfig): Marker.from_path(tmp_path) + + +@pytest.mark.parametrize( + "marker,expected_depth", + [ + ( + AndMarker( + markers=[ + SlotSetMarker("s1"), + OrMarker([IntentDetectedMarker("4"), IntentDetectedMarker("6"),]), + ], + ), + 3, + ), + (SlotSetMarker("s1"), 1), + (AndMarker(markers=[SlotSetMarker("s1"), IntentDetectedMarker("6"),],), 2), + ], +) +def test_marker_depth(marker: Marker, expected_depth: int): + assert marker.max_depth() == expected_depth From 58174e6d0f219480e9b9058c03cf82813c24e3aa Mon Sep 17 00:00:00 2001 From: Matthew Summers Date: Wed, 3 Nov 2021 16:51:16 +0000 Subject: [PATCH 12/18] Skip root marker to avoid double reporting total marker count --- rasa/cli/evaluate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index 4335b0334c0d..cbab9312b5a1 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -152,7 +152,9 @@ def _run_markers( num_markers = len(markers) - 1 max_depth = markers.depth() - 1 # Find maximum branching of marker - branching_factor = max(len(marker) - 1 for marker in markers) + branching_factor = max( + len(sub_marker) - 1 for marker in markers.sub_markers for sub_marker in marker + ) telemetry.track_markers_parsed_count(num_markers, max_depth, branching_factor) From 4c98d1ac10576de550ae9013a985645db1ef1864 Mon Sep 17 00:00:00 2001 From: Matthew Summers Date: Wed, 3 Nov 2021 17:11:31 +0000 Subject: [PATCH 13/18] Unfix erroneous CLI fix, fix typos --- docs/docs/telemetry/events.json | 6 +++--- rasa/cli/arguments/evaluate.py | 1 - rasa/cli/evaluate.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/docs/telemetry/events.json b/docs/docs/telemetry/events.json index 18435cb29f55..2ca399ba116a 100644 --- a/docs/docs/telemetry/events.json +++ b/docs/docs/telemetry/events.json @@ -425,7 +425,7 @@ ] }, "Markers Extraction Initiated": { - "description": "Triggered when markers extraction has been initiated.", + "description": "Triggered when marker extraction has been initiated.", "type": "object", "section": "Markers Extraction", "properties": { @@ -494,8 +494,8 @@ "branching_factor" ] }, - "Markers Stats Computed": { - "description": "Triggered when marker stats has been computed.", + "Markers Statistics Computed": { + "description": "Triggered when marker statistics have been computed.", "type": "object", "section": "Markers Extraction", "properties": { diff --git a/rasa/cli/arguments/evaluate.py b/rasa/cli/arguments/evaluate.py index 262bd41674e8..5b4f7a6de5c2 100644 --- a/rasa/cli/arguments/evaluate.py +++ b/rasa/cli/arguments/evaluate.py @@ -13,7 +13,6 @@ def set_markers_arguments(parser: argparse.ArgumentParser) -> None: parser.add_argument( "--config", default="markers.yml", - nargs="?", type=str, help="The config file(s) containing marker definitions. This can be a single " "YAML file, or a directory that contains several files with marker " diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index cbab9312b5a1..151d648b9eef 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -150,7 +150,7 @@ def _run_markers( # Calculate telemetry # Subtract one to remove the virtual OR over all markers num_markers = len(markers) - 1 - max_depth = markers.depth() - 1 + max_depth = markers.max_depth() - 1 # Find maximum branching of marker branching_factor = max( len(sub_marker) - 1 for marker in markers.sub_markers for sub_marker in marker From 1df977602915e2d43f1c957e2999997fd6b7c3d1 Mon Sep 17 00:00:00 2001 From: Matthew Summers Date: Wed, 3 Nov 2021 17:47:05 +0000 Subject: [PATCH 14/18] Fix event schema test --- tests/test_telemetry.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py index dfaf6255d51c..2bf1cec1dd77 100644 --- a/tests/test_telemetry.py +++ b/tests/test_telemetry.py @@ -81,12 +81,14 @@ async def test_events_schema( telemetry.track_nlu_model_test(TrainingData()) - telemetry.track_markers_evaluation_initiated("all", False, None, None) + telemetry.track_markers_extraction_initiated("all", False, None, None) telemetry.track_markers_extracted(1) telemetry.track_markers_stats_computed(1) + telemetry.track_markers_parsed_count(1, 1, 1) + pending = asyncio.all_tasks() - initial await asyncio.gather(*pending) From 059be982829d9f801aff8e41999fe87c85d626a5 Mon Sep 17 00:00:00 2001 From: ka-bu Date: Thu, 4 Nov 2021 09:15:33 +0100 Subject: [PATCH 15/18] Fix mock count in telemetry test --- tests/test_telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py index 2bf1cec1dd77..6825f9da8e8a 100644 --- a/tests/test_telemetry.py +++ b/tests/test_telemetry.py @@ -92,7 +92,7 @@ async def test_events_schema( pending = asyncio.all_tasks() - initial await asyncio.gather(*pending) - assert mock.call_count == 18 + assert mock.call_count == 19 for args, _ in mock.call_args_list: event = args[0] From c617ebdb464b3082da17c75425dd55f39177a4ab Mon Sep 17 00:00:00 2001 From: ka-bu Date: Thu, 4 Nov 2021 09:40:31 +0100 Subject: [PATCH 16/18] trigger test re-run From b25b422282031206c46bbcce13b34ceb8ce997aa Mon Sep 17 00:00:00 2001 From: Matthew Summers Date: Thu, 4 Nov 2021 09:16:38 +0000 Subject: [PATCH 17/18] Make sure seed is converted to bool correctly --- rasa/cli/evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasa/cli/evaluate.py b/rasa/cli/evaluate.py index 151d648b9eef..3869112c1b88 100644 --- a/rasa/cli/evaluate.py +++ b/rasa/cli/evaluate.py @@ -135,7 +135,7 @@ def _run_markers( telemetry.track_markers_extraction_initiated( strategy=strategy, only_extract=stats_file_prefix is not None, - seed=seed, + seed=seed is not None, count=count, ) From 979ecd1f2d491f2b81da8d73b52ecbd44d42271e Mon Sep 17 00:00:00 2001 From: Matthew Summers Date: Thu, 4 Nov 2021 12:03:44 +0000 Subject: [PATCH 18/18] Fix telemetry test --- rasa/telemetry.py | 2 +- tests/test_telemetry.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rasa/telemetry.py b/rasa/telemetry.py index bf8b845a1716..4a5377525ee1 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -96,7 +96,7 @@ TELEMETRY_TEST_NLU_EVENT = "Model NLU Tested" TELEMETRY_MARKERS_EXTRACTION_INITIATED_EVENT = "Markers Extraction Initiated" TELEMETRY_MARKERS_EXTRACTED_EVENT = "Markers Extracted" -TELEMETRY_MARKERS_STATS_COMPUTED_EVENT = "Markers Stats Computed" +TELEMETRY_MARKERS_STATS_COMPUTED_EVENT = "Markers Statistics Computed" TELEMETRY_MARKERS_PARSED_COUNT = "Markers Parsed" # used to calculate the context on the first call and cache it afterwards diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py index 6825f9da8e8a..35b66de28fa9 100644 --- a/tests/test_telemetry.py +++ b/tests/test_telemetry.py @@ -81,7 +81,7 @@ async def test_events_schema( telemetry.track_nlu_model_test(TrainingData()) - telemetry.track_markers_extraction_initiated("all", False, None, None) + telemetry.track_markers_extraction_initiated("all", False, False, None) telemetry.track_markers_extracted(1)