From b7192d44fb7d0892d6fccab7015ce5a25e66c9d0 Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Thu, 8 Jul 2021 16:45:57 +0200
Subject: [PATCH 01/18] Draft _collect_action_executed_predictions

---
 rasa/core/test.py | 93 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 63 insertions(+), 30 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index e3eb91099b4d..b448f4bce6b1 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -584,6 +584,39 @@ def _get_e2e_entity_evaluation_result(
                 )
 
 
+def _run_action_prediction(
+    processor: "MessageProcessor",
+    partial_tracker: DialogueStateTracker,
+    expected_action: Text,
+) -> Tuple[Text, PolicyPrediction]:
+    action, prediction = processor.predict_next_action(partial_tracker)
+    predicted_action = action.name()
+
+    policy_entity_result = _get_e2e_entity_evaluation_result(
+        processor, partial_tracker, prediction
+    )
+
+    if (
+            prediction.policy_name
+            and predicted_action != expected_action
+            and _form_might_have_been_rejected(
+        processor.domain, partial_tracker, predicted_action
+    )
+    ):
+        # Wrong action was predicted,
+        # but it might be Ok if form action is rejected.
+        emulate_loop_rejection(partial_tracker)
+        # try again
+        action, prediction = processor.predict_next_action(partial_tracker)
+
+        # Even if the prediction is also wrong, we don't have to undo the emulation
+        # of the action rejection as we know that the user explicitly specified
+        # that something else than the form was supposed to run.
+        predicted_action = action.name()
+
+    return predicted_action, prediction
+
+
 def _collect_action_executed_predictions(
     processor: "MessageProcessor",
     partial_tracker: DialogueStateTracker,
@@ -605,43 +638,43 @@ def _collect_action_executed_predictions(
         prediction = PolicyPrediction([], policy_name=None)
         predicted_action = "circuit breaker tripped"
     else:
-        action, prediction = processor.predict_next_action(partial_tracker)
-        predicted_action = action.name()
-
-        policy_entity_result = _get_e2e_entity_evaluation_result(
-            processor, partial_tracker, prediction
-        )
-
-        if (
-            prediction.policy_name
-            and predicted_action != expected_action
-            and _form_might_have_been_rejected(
-                processor.domain, partial_tracker, predicted_action
-            )
-        ):
-            # Wrong action was predicted,
-            # but it might be Ok if form action is rejected.
-            emulate_loop_rejection(partial_tracker)
-            # try again
-            action, prediction = processor.predict_next_action(partial_tracker)
-
-            # Even if the prediction is also wrong, we don't have to undo the emulation
-            # of the action rejection as we know that the user explicitly specified
-            # that something else than the form was supposed to run.
-            predicted_action = action.name()
+        predicted_action, prediction = _run_action_prediction(processor, partial_tracker, expected_action)
 
     predicted_warning = action_executed_eval_store.predicted_warning(predicted_action)
-    if not predicted_warning:
+    if predicted_warning:
+        if predicted_action != expected_action:
+            partial_tracker.update(
+                WronglyPredictedAction(
+                    expected_action_name,
+                    expected_action_text,
+                    predicted_action,
+                    prediction.policy_name,
+                    prediction.max_confidence,
+                    event.timestamp,
+                    metadata=prediction.action_metadata,
+                )
+            )
+            predicted_action, prediction = _run_action_prediction(processor, partial_tracker, expected_action)
+        else:
+            partial_tracker.update(
+                ActionExecuted(
+                    predicted_action,
+                    prediction.policy_name,
+                    prediction.max_confidence,
+                    event.timestamp,
+                )
+            )
+    else:
         action_executed_eval_store.add_to_store(
             action_predictions=[predicted_action], action_targets=[expected_action]
         )
 
     has_prediction_target_mismatch = (
-        action_executed_eval_store.has_prediction_target_mismatch()
+        action_executed_eval_store.has_prediction_target_mismatch() or (
+            predicted_warning and predicted_action != expected_action
+        )
     )
-    if has_prediction_target_mismatch or (
-        predicted_warning and predicted_action != expected_action
-    ):
+    if has_prediction_target_mismatch:
         partial_tracker.update(
             WronglyPredictedAction(
                 expected_action_name,
@@ -653,7 +686,7 @@ def _collect_action_executed_predictions(
                 metadata=prediction.action_metadata,
             )
         )
-        if fail_on_prediction_errors and has_prediction_target_mismatch:
+        if fail_on_prediction_errors and predicted_action != ACTION_UNLIKELY_INTENT_NAME and expected_action != ACTION_UNLIKELY_INTENT_NAME:
             story_dump = YAMLStoryWriter().dumps(partial_tracker.as_story().story_steps)
             error_msg = (
                 f"Model predicted a wrong action. Failed Story: " f"\n\n{story_dump}"

From e9d428f496a3c89342c764fd0fa63032d3900f42 Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Thu, 8 Jul 2021 17:16:20 +0200
Subject: [PATCH 02/18] Update to correctly run predictions when using `rasa
 test`

---
 rasa/core/test.py | 76 +++++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index b448f4bce6b1..bbf29d97bb22 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -588,7 +588,7 @@ def _run_action_prediction(
     processor: "MessageProcessor",
     partial_tracker: DialogueStateTracker,
     expected_action: Text,
-) -> Tuple[Text, PolicyPrediction]:
+) -> Tuple[Text, PolicyPrediction, EntityEvaluationResult]:
     action, prediction = processor.predict_next_action(partial_tracker)
     predicted_action = action.name()
 
@@ -597,11 +597,11 @@ def _run_action_prediction(
     )
 
     if (
-            prediction.policy_name
-            and predicted_action != expected_action
-            and _form_might_have_been_rejected(
-        processor.domain, partial_tracker, predicted_action
-    )
+        prediction.policy_name
+        and predicted_action != expected_action
+        and _form_might_have_been_rejected(
+            processor.domain, partial_tracker, predicted_action
+        )
     ):
         # Wrong action was predicted,
         # but it might be Ok if form action is rejected.
@@ -614,7 +614,7 @@ def _run_action_prediction(
         # that something else than the form was supposed to run.
         predicted_action = action.name()
 
-    return predicted_action, prediction
+    return predicted_action, prediction, policy_entity_result
 
 
 def _collect_action_executed_predictions(
@@ -638,43 +638,43 @@ def _collect_action_executed_predictions(
         prediction = PolicyPrediction([], policy_name=None)
         predicted_action = "circuit breaker tripped"
     else:
-        predicted_action, prediction = _run_action_prediction(processor, partial_tracker, expected_action)
+        predicted_action, prediction, policy_entity_result = _run_action_prediction(
+            processor, partial_tracker, expected_action
+        )
 
     predicted_warning = action_executed_eval_store.predicted_warning(predicted_action)
-    if predicted_warning:
-        if predicted_action != expected_action:
-            partial_tracker.update(
-                WronglyPredictedAction(
-                    expected_action_name,
-                    expected_action_text,
-                    predicted_action,
-                    prediction.policy_name,
-                    prediction.max_confidence,
-                    event.timestamp,
-                    metadata=prediction.action_metadata,
-                )
+    if predicted_warning and predicted_action != expected_action:
+        partial_tracker.update(
+            WronglyPredictedAction(
+                predicted_action,
+                expected_action_text,
+                predicted_action,
+                prediction.policy_name,
+                prediction.max_confidence,
+                event.timestamp,
+                metadata=prediction.action_metadata,
             )
-            predicted_action, prediction = _run_action_prediction(processor, partial_tracker, expected_action)
-        else:
-            partial_tracker.update(
-                ActionExecuted(
-                    predicted_action,
-                    prediction.policy_name,
-                    prediction.max_confidence,
-                    event.timestamp,
-                )
+        )
+        predicted_action, prediction, policy_entity_result = _run_action_prediction(
+            processor, partial_tracker, expected_action
+        )
+    elif predicted_warning:
+        partial_tracker.update(
+            ActionExecuted(
+                predicted_action,
+                prediction.policy_name,
+                prediction.max_confidence,
+                event.timestamp,
             )
+        )
     else:
         action_executed_eval_store.add_to_store(
             action_predictions=[predicted_action], action_targets=[expected_action]
         )
 
-    has_prediction_target_mismatch = (
-        action_executed_eval_store.has_prediction_target_mismatch() or (
-            predicted_warning and predicted_action != expected_action
-        )
-    )
-    if has_prediction_target_mismatch:
+    if action_executed_eval_store.has_prediction_target_mismatch() or (
+        predicted_warning and predicted_action != expected_action
+    ):
         partial_tracker.update(
             WronglyPredictedAction(
                 expected_action_name,
@@ -686,7 +686,11 @@ def _collect_action_executed_predictions(
                 metadata=prediction.action_metadata,
             )
         )
-        if fail_on_prediction_errors and predicted_action != ACTION_UNLIKELY_INTENT_NAME and expected_action != ACTION_UNLIKELY_INTENT_NAME:
+        if (
+            fail_on_prediction_errors
+            and predicted_action != ACTION_UNLIKELY_INTENT_NAME
+            and expected_action != ACTION_UNLIKELY_INTENT_NAME
+        ):
             story_dump = YAMLStoryWriter().dumps(partial_tracker.as_story().story_steps)
             error_msg = (
                 f"Model predicted a wrong action. Failed Story: " f"\n\n{story_dump}"

From d5a4ed9471500aff2f50e30376e9fc86e08e5148 Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Fri, 9 Jul 2021 08:51:08 +0200
Subject: [PATCH 03/18] action_executed_eval_store.add_to_store

---
 rasa/core/test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index bbf29d97bb22..06f5f98c22af 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -658,6 +658,9 @@ def _collect_action_executed_predictions(
         predicted_action, prediction, policy_entity_result = _run_action_prediction(
             processor, partial_tracker, expected_action
         )
+        action_executed_eval_store.add_to_store(
+            action_predictions=[predicted_action], action_targets=[expected_action]
+        )
     elif predicted_warning:
         partial_tracker.update(
             ActionExecuted(

From 44fb4242a29686c3bb0ceb92352880a347f4732f Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Fri, 9 Jul 2021 09:42:26 +0200
Subject: [PATCH 04/18] Updated format for stories

---
 rasa/core/test.py                             | 80 +++----------------
 rasa/shared/core/events.py                    | 62 ++++++++++++++
 .../story_writer/yaml_story_writer.py         | 14 ++--
 rasa/shared/core/training_data/structures.py  |  4 +
 tests/test_model_testing.py                   |  6 +-
 5 files changed, 88 insertions(+), 78 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index 06f5f98c22af..cf13df62af7c 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -53,6 +53,7 @@
     ActionExecuted,
     EntitiesAdded,
     UserUttered,
+    WronglyPredictedAction,
 )
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.nlu.training_data.formats.readerwriter import TrainingDataWriter
@@ -142,21 +143,6 @@ def has_prediction_target_mismatch(self) -> bool:
             or self.action_predictions != self.action_targets
         )
 
-    @staticmethod
-    def predicted_warning(predicted_action: Text) -> bool:
-        """Indicates if the predicted action needs to raise a warning.
-
-        Currently the only action that leads to a warning is
-        an `action_unlikely_intent`.
-
-        Args:
-            predicted_action: A predicted action.
-
-        Returns:
-            `True` if the prediction needs to raise a warning, `False` otherwise.
-        """
-        return predicted_action == ACTION_UNLIKELY_INTENT_NAME
-
     @staticmethod
     def _compare_entities(
         entity_predictions: List["EntityPrediction"],
@@ -264,56 +250,6 @@ def serialise(self) -> Tuple[PredictionList, PredictionList]:
         return targets, predictions
 
 
-class WronglyPredictedAction(ActionExecuted):
-    """The model predicted the wrong action.
-
-    Mostly used to mark wrong predictions and be able to
-    dump them as stories."""
-
-    type_name = "wrong_action"
-
-    def __init__(
-        self,
-        action_name_target: Text,
-        action_text_target: Text,
-        action_name_prediction: Text,
-        policy: Optional[Text] = None,
-        confidence: Optional[float] = None,
-        timestamp: Optional[float] = None,
-        metadata: Optional[Dict] = None,
-    ) -> None:
-        """Creates event for a successful event execution.
-
-        See the docstring of the parent class `ActionExecuted` for more information.
-        """
-        self.action_name_prediction = action_name_prediction
-        super().__init__(
-            action_name_target,
-            policy,
-            confidence,
-            timestamp,
-            metadata,
-            action_text=action_text_target,
-        )
-
-    def inline_comment(self) -> Text:
-        """A comment attached to this event. Used during dumping."""
-        return f"predicted: {self.action_name_prediction}"
-
-    def as_story_string(self) -> Text:
-        """Returns the story equivalent representation."""
-        return f"{self.action_name}   <!-- {self.inline_comment()} -->"
-
-    def __repr__(self) -> Text:
-        """Returns event as string for debugging."""
-        return (
-            f"WronglyPredictedAction(action_target: {self.action_name}, "
-            f"action_prediction: {self.action_name_prediction}, "
-            f"policy: {self.policy}, confidence: {self.confidence}, "
-            f"metadata: {self.metadata})"
-        )
-
-
 class EndToEndUserUtterance(UserUttered):
     """End-to-end user utterance.
 
@@ -633,6 +569,7 @@ def _collect_action_executed_predictions(
     expected_action = expected_action_name or expected_action_text
 
     policy_entity_result = None
+    previously_predicted_action_unlikely_intent = False
 
     if circuit_breaker_tripped:
         prediction = PolicyPrediction([], policy_name=None)
@@ -642,8 +579,8 @@ def _collect_action_executed_predictions(
             processor, partial_tracker, expected_action
         )
 
-    predicted_warning = action_executed_eval_store.predicted_warning(predicted_action)
-    if predicted_warning and predicted_action != expected_action:
+    predicted_action_unlikely_intent = predicted_action == ACTION_UNLIKELY_INTENT_NAME
+    if predicted_action_unlikely_intent and predicted_action != expected_action:
         partial_tracker.update(
             WronglyPredictedAction(
                 predicted_action,
@@ -655,13 +592,14 @@ def _collect_action_executed_predictions(
                 metadata=prediction.action_metadata,
             )
         )
+        previously_predicted_action_unlikely_intent = True
         predicted_action, prediction, policy_entity_result = _run_action_prediction(
             processor, partial_tracker, expected_action
         )
         action_executed_eval_store.add_to_store(
             action_predictions=[predicted_action], action_targets=[expected_action]
         )
-    elif predicted_warning:
+    elif predicted_action_unlikely_intent:
         partial_tracker.update(
             ActionExecuted(
                 predicted_action,
@@ -676,7 +614,7 @@ def _collect_action_executed_predictions(
         )
 
     if action_executed_eval_store.has_prediction_target_mismatch() or (
-        predicted_warning and predicted_action != expected_action
+            predicted_action_unlikely_intent and predicted_action != expected_action
     ):
         partial_tracker.update(
             WronglyPredictedAction(
@@ -687,6 +625,7 @@ def _collect_action_executed_predictions(
                 prediction.max_confidence,
                 event.timestamp,
                 metadata=prediction.action_metadata,
+                predicted_action_unlikely_intent=previously_predicted_action_unlikely_intent,
             )
         )
         if (
@@ -714,6 +653,7 @@ def _collect_action_executed_predictions(
                 prediction.policy_name,
                 prediction.max_confidence,
                 event.timestamp,
+                predicted_action_unlikely_intent=previously_predicted_action_unlikely_intent,
             )
         )
 
@@ -918,7 +858,7 @@ async def _collect_story_predictions(
 
             if any(
                 isinstance(event, WronglyPredictedAction)
-                and story_eval_store.predicted_warning(event.action_name_prediction)
+                and event.action_name_prediction == ACTION_UNLIKELY_INTENT_NAME
                 for event in predicted_tracker.events
             ):
                 stories_with_warnings.append(predicted_tracker)
diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
index a4b5bffe5097..93ab47a5b4b9 100644
--- a/rasa/shared/core/events.py
+++ b/rasa/shared/core/events.py
@@ -1488,6 +1488,7 @@ def __init__(
         metadata: Optional[Dict] = None,
         action_text: Optional[Text] = None,
         hide_rule_turn: bool = False,
+        predicted_action_unlikely_intent: bool = False,
     ) -> None:
         """Creates event for a successful event execution.
 
@@ -1509,6 +1510,7 @@ def __init__(
         self.unpredictable = False
         self.action_text = action_text
         self.hide_rule_turn = hide_rule_turn
+        self.predicted_action_unlikely_intent = predicted_action_unlikely_intent
 
         super().__init__(timestamp, metadata)
 
@@ -1530,6 +1532,12 @@ def __str__(self) -> Text:
         """Returns event as human readable string."""
         return self.action_name or self.action_text
 
+    def inline_comment(self) -> Optional[Text]:
+        """A comment attached to this event. Used during dumping."""
+        if self.predicted_action_unlikely_intent:
+            return f"predicted: action_unlikely_intent"
+        return None
+
     def __hash__(self) -> int:
         """Returns unique hash for event."""
         return hash(self.__members__())
@@ -1935,3 +1943,57 @@ def apply_to(self, tracker: "DialogueStateTracker") -> None:
         """Applies event to current conversation state."""
         # noinspection PyProtectedMember
         tracker._reset()
+
+
+class WronglyPredictedAction(ActionExecuted):
+    """The model predicted the wrong action.
+
+    Mostly used to mark wrong predictions and be able to
+    dump them as stories."""
+
+    type_name = "wrong_action"
+
+    def __init__(
+            self,
+            action_name_target: Text,
+            action_text_target: Text,
+            action_name_prediction: Text,
+            policy: Optional[Text] = None,
+            confidence: Optional[float] = None,
+            timestamp: Optional[float] = None,
+            metadata: Optional[Dict] = None,
+            predicted_action_unlikely_intent: bool = False,
+    ) -> None:
+        """Creates event for a successful event execution.
+
+        See the docstring of the parent class `ActionExecuted` for more information.
+        """
+        self.action_name_prediction = action_name_prediction
+        self.predicted_action_unlikely_intent = predicted_action_unlikely_intent
+        super().__init__(
+            action_name_target,
+            policy,
+            confidence,
+            timestamp,
+            metadata,
+            action_text=action_text_target,
+        )
+
+    def inline_comment(self) -> Text:
+        """A comment attached to this event. Used during dumping."""
+        if self.predicted_action_unlikely_intent:
+            return f"predicted: {self.action_name_prediction} after action_unlikely_intent"
+        return f"predicted: {self.action_name_prediction}"
+
+    def as_story_string(self) -> Text:
+        """Returns the story equivalent representation."""
+        return f"{self.action_name}   <!-- {self.inline_comment()} -->"
+
+    def __repr__(self) -> Text:
+        """Returns event as string for debugging."""
+        return (
+            f"WronglyPredictedAction(action_target: {self.action_name}, "
+            f"action_prediction: {self.action_name_prediction}, "
+            f"policy: {self.policy}, confidence: {self.confidence}, "
+            f"metadata: {self.metadata})"
+        )
diff --git a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
index 1ef4e9568499..12a53bfda759 100644
--- a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
+++ b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
@@ -206,9 +206,10 @@ def process_user_utterance(
             result[KEY_USER_INTENT] = user_utterance.intent_name
 
         if hasattr(user_utterance, "inline_comment"):
-            if user_utterance.inline_comment():
+            comment = user_utterance.inline_comment()
+            if comment:
                 result.yaml_add_eol_comment(
-                    user_utterance.inline_comment(), KEY_USER_INTENT
+                    comment, KEY_USER_INTENT
                 )
 
         if user_utterance.text and (
@@ -279,11 +280,12 @@ def process_action(action: ActionExecuted) -> Optional[OrderedDict]:
             result[KEY_BOT_END_TO_END_MESSAGE] = action.action_text
 
         if hasattr(action, "inline_comment"):
-            if KEY_ACTION in result:
-                result.yaml_add_eol_comment(action.inline_comment(), KEY_ACTION)
-            elif KEY_BOT_END_TO_END_MESSAGE in result:
+            comment = action.inline_comment()
+            if KEY_ACTION in result and comment:
+                result.yaml_add_eol_comment(comment, KEY_ACTION)
+            elif KEY_BOT_END_TO_END_MESSAGE in result and comment:
                 result.yaml_add_eol_comment(
-                    action.inline_comment(), KEY_BOT_END_TO_END_MESSAGE
+                    comment, KEY_BOT_END_TO_END_MESSAGE
                 )
 
         return result
diff --git a/rasa/shared/core/training_data/structures.py b/rasa/shared/core/training_data/structures.py
index 3ea111e666cf..6bac4118f65b 100644
--- a/rasa/shared/core/training_data/structures.py
+++ b/rasa/shared/core/training_data/structures.py
@@ -31,6 +31,7 @@
     ActionExecuted,
     Event,
     SessionStarted,
+    WronglyPredictedAction,
 )
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.exceptions import RasaCoreException
@@ -235,6 +236,9 @@ def is_action_unlikely_intent(event: Event) -> bool:
         return (
             type(event) == ActionExecuted
             and event.action_name == ACTION_UNLIKELY_INTENT_NAME
+        ) or (
+            type(event) == WronglyPredictedAction
+            and event.action_name == event.action_name_prediction == ACTION_UNLIKELY_INTENT_NAME
         )
 
     @staticmethod
diff --git a/tests/test_model_testing.py b/tests/test_model_testing.py
index 5c5fbf78bb84..de342e137b92 100644
--- a/tests/test_model_testing.py
+++ b/tests/test_model_testing.py
@@ -14,13 +14,15 @@
 from rasa.shared.core.events import UserUttered
 from rasa.core.test import (
     EvaluationStore,
-    WronglyClassifiedUserUtterance,
-    WronglyPredictedAction,
+    WronglyClassifiedUserUtterance
 )
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.core.training_data.story_writer.yaml_story_writer import (
     YAMLStoryWriter,
 )
+from rasa.shared.core.events import (
+    WronglyPredictedAction
+)
 import rasa.model
 import rasa.cli.utils
 from rasa.nlu.test import NO_ENTITY

From cca2e1c78b5e74f5d0470c7b1a4c585d41db8d14 Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Fri, 9 Jul 2021 09:46:32 +0200
Subject: [PATCH 05/18] Reformatting and bugifx

---
 rasa/core/test.py                             |  2 +-
 rasa/core/training/interactive.py             |  5 +++-
 rasa/shared/core/events.py                    | 24 ++++++++++---------
 .../story_writer/yaml_story_writer.py         |  8 ++-----
 rasa/shared/core/training_data/structures.py  |  4 +++-
 5 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index cf13df62af7c..4a8af8e8b330 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -614,7 +614,7 @@ def _collect_action_executed_predictions(
         )
 
     if action_executed_eval_store.has_prediction_target_mismatch() or (
-            predicted_action_unlikely_intent and predicted_action != expected_action
+        predicted_action_unlikely_intent and predicted_action != expected_action
     ):
         partial_tracker.update(
             WronglyPredictedAction(
diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py
index 957718b51e77..70acfa80ae8f 100644
--- a/rasa/core/training/interactive.py
+++ b/rasa/core/training/interactive.py
@@ -545,7 +545,10 @@ def add_user_cell(data: List[List[Union[Text, Color]]], cell: Text) -> None:
 
     for idx, event in enumerate(applied_events):
         if isinstance(event, ActionExecuted):
-            if event.action_name == ACTION_UNLIKELY_INTENT_NAME and event.confidence == 0:
+            if (
+                event.action_name == ACTION_UNLIKELY_INTENT_NAME
+                and event.confidence == 0
+            ):
                 continue
             bot_column.append(colored(str(event), "autocyan"))
             if event.confidence is not None:
diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
index 93ab47a5b4b9..69b46d8aee49 100644
--- a/rasa/shared/core/events.py
+++ b/rasa/shared/core/events.py
@@ -1954,22 +1954,21 @@ class WronglyPredictedAction(ActionExecuted):
     type_name = "wrong_action"
 
     def __init__(
-            self,
-            action_name_target: Text,
-            action_text_target: Text,
-            action_name_prediction: Text,
-            policy: Optional[Text] = None,
-            confidence: Optional[float] = None,
-            timestamp: Optional[float] = None,
-            metadata: Optional[Dict] = None,
-            predicted_action_unlikely_intent: bool = False,
+        self,
+        action_name_target: Text,
+        action_text_target: Text,
+        action_name_prediction: Text,
+        policy: Optional[Text] = None,
+        confidence: Optional[float] = None,
+        timestamp: Optional[float] = None,
+        metadata: Optional[Dict] = None,
+        predicted_action_unlikely_intent: bool = False,
     ) -> None:
         """Creates event for a successful event execution.
 
         See the docstring of the parent class `ActionExecuted` for more information.
         """
         self.action_name_prediction = action_name_prediction
-        self.predicted_action_unlikely_intent = predicted_action_unlikely_intent
         super().__init__(
             action_name_target,
             policy,
@@ -1977,12 +1976,15 @@ def __init__(
             timestamp,
             metadata,
             action_text=action_text_target,
+            predicted_action_unlikely_intent=predicted_action_unlikely_intent,
         )
 
     def inline_comment(self) -> Text:
         """A comment attached to this event. Used during dumping."""
         if self.predicted_action_unlikely_intent:
-            return f"predicted: {self.action_name_prediction} after action_unlikely_intent"
+            return (
+                f"predicted: {self.action_name_prediction} after action_unlikely_intent"
+            )
         return f"predicted: {self.action_name_prediction}"
 
     def as_story_string(self) -> Text:
diff --git a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
index 12a53bfda759..542d1e5336b3 100644
--- a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
+++ b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
@@ -208,9 +208,7 @@ def process_user_utterance(
         if hasattr(user_utterance, "inline_comment"):
             comment = user_utterance.inline_comment()
             if comment:
-                result.yaml_add_eol_comment(
-                    comment, KEY_USER_INTENT
-                )
+                result.yaml_add_eol_comment(comment, KEY_USER_INTENT)
 
         if user_utterance.text and (
             # We only print the utterance text if it was an end-to-end prediction
@@ -284,9 +282,7 @@ def process_action(action: ActionExecuted) -> Optional[OrderedDict]:
             if KEY_ACTION in result and comment:
                 result.yaml_add_eol_comment(comment, KEY_ACTION)
             elif KEY_BOT_END_TO_END_MESSAGE in result and comment:
-                result.yaml_add_eol_comment(
-                    comment, KEY_BOT_END_TO_END_MESSAGE
-                )
+                result.yaml_add_eol_comment(comment, KEY_BOT_END_TO_END_MESSAGE)
 
         return result
 
diff --git a/rasa/shared/core/training_data/structures.py b/rasa/shared/core/training_data/structures.py
index 6bac4118f65b..a79be5c69a2b 100644
--- a/rasa/shared/core/training_data/structures.py
+++ b/rasa/shared/core/training_data/structures.py
@@ -238,7 +238,9 @@ def is_action_unlikely_intent(event: Event) -> bool:
             and event.action_name == ACTION_UNLIKELY_INTENT_NAME
         ) or (
             type(event) == WronglyPredictedAction
-            and event.action_name == event.action_name_prediction == ACTION_UNLIKELY_INTENT_NAME
+            and event.action_name
+            == event.action_name_prediction
+            == ACTION_UNLIKELY_INTENT_NAME
         )
 
     @staticmethod

From 82b202387e8c50c807db3f6784fb3adb9d7bbf64 Mon Sep 17 00:00:00 2001
From: Daksh <d.varshneya@rasa.com>
Date: Fri, 9 Jul 2021 10:49:15 +0200
Subject: [PATCH 06/18] formatter

---
 rasa/shared/core/events.py  | 5 ++---
 tests/test_model_testing.py | 9 ++-------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
index 69b46d8aee49..f678beb100b6 100644
--- a/rasa/shared/core/events.py
+++ b/rasa/shared/core/events.py
@@ -36,6 +36,7 @@
     ENTITY_LABEL_SEPARATOR,
     ACTION_SESSION_START_NAME,
     ACTION_LISTEN_NAME,
+    ACTION_UNLIKELY_INTENT_NAME,
 )
 from rasa.shared.exceptions import UnsupportedFeatureException
 from rasa.shared.nlu.constants import (
@@ -1982,9 +1983,7 @@ def __init__(
     def inline_comment(self) -> Text:
         """A comment attached to this event. Used during dumping."""
         if self.predicted_action_unlikely_intent:
-            return (
-                f"predicted: {self.action_name_prediction} after action_unlikely_intent"
-            )
+            return f"predicted: {self.action_name_prediction} after {ACTION_UNLIKELY_INTENT_NAME}"
         return f"predicted: {self.action_name_prediction}"
 
     def as_story_string(self) -> Text:
diff --git a/tests/test_model_testing.py b/tests/test_model_testing.py
index de342e137b92..9fb27e3825a1 100644
--- a/tests/test_model_testing.py
+++ b/tests/test_model_testing.py
@@ -12,17 +12,12 @@
 import rasa.utils.io
 from rasa.core.agent import Agent
 from rasa.shared.core.events import UserUttered
-from rasa.core.test import (
-    EvaluationStore,
-    WronglyClassifiedUserUtterance
-)
+from rasa.core.test import EvaluationStore, WronglyClassifiedUserUtterance
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.core.training_data.story_writer.yaml_story_writer import (
     YAMLStoryWriter,
 )
-from rasa.shared.core.events import (
-    WronglyPredictedAction
-)
+from rasa.shared.core.events import WronglyPredictedAction
 import rasa.model
 import rasa.cli.utils
 from rasa.nlu.test import NO_ENTITY

From f28b660af33850ee41198cbda4c01b694c00ff20 Mon Sep 17 00:00:00 2001
From: Daksh <d.varshneya@rasa.com>
Date: Fri, 9 Jul 2021 12:16:14 +0200
Subject: [PATCH 07/18] fix some tests. 1 remaining

---
 rasa/core/test.py       |   9 ----
 tests/core/test_test.py | 100 ++++++++++++++++++++++++++++++----------
 2 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index 4a8af8e8b330..ff6dcfacd604 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -599,15 +599,6 @@ def _collect_action_executed_predictions(
         action_executed_eval_store.add_to_store(
             action_predictions=[predicted_action], action_targets=[expected_action]
         )
-    elif predicted_action_unlikely_intent:
-        partial_tracker.update(
-            ActionExecuted(
-                predicted_action,
-                prediction.policy_name,
-                prediction.max_confidence,
-                event.timestamp,
-            )
-        )
     else:
         action_executed_eval_store.add_to_store(
             action_predictions=[predicted_action], action_targets=[expected_action]
diff --git a/tests/core/test_test.py b/tests/core/test_test.py
index e8ce816d5fa1..8b41efa97407 100644
--- a/tests/core/test_test.py
+++ b/tests/core/test_test.py
@@ -23,6 +23,8 @@
 from rasa.shared.core.domain import Domain
 from rasa.shared.nlu.interpreter import RegexInterpreter
 from rasa.core.policies.rule_policy import RulePolicy
+from rasa.shared.core.domain import State
+from rasa.core.policies.policy import SupportedData
 
 
 def _probabilities_with_action_unlikely_intent_for(
@@ -70,6 +72,25 @@ def probabilities_using_best_policy(
     return probabilities_using_best_policy
 
 
+def _custom_prediction_states(ignore_action_unlikely_intent: bool = False):
+    def _prediction_states(
+        self: RulePolicy,
+        tracker: DialogueStateTracker,
+        domain: Domain,
+        use_text_for_last_user_input: bool = False,
+    ) -> List[State]:
+        return self.featurizer.prediction_states(
+            [tracker],
+            domain,
+            use_text_for_last_user_input=use_text_for_last_user_input,
+            ignore_rule_only_turns=self.supported_data() == SupportedData.ML_DATA,
+            rule_only_data=self._rule_only_data,
+            ignore_action_unlikely_intent=ignore_action_unlikely_intent,
+        )[0]
+
+    return _prediction_states
+
+
 async def test_testing_warns_if_action_unknown(
     capsys: CaptureFixture,
     e2e_bot_agent: Agent,
@@ -131,6 +152,10 @@ async def test_action_unlikely_intent_1(
         _probabilities_with_action_unlikely_intent_for(["mood_unhappy"]),
     )
 
+    monkeypatch.setattr(
+        RulePolicy, "_prediction_states", _custom_prediction_states(True)
+    )
+
     file_name = tmp_path / "test_action_unlikely_intent_1.yml"
     file_name.write_text(
         """
@@ -160,8 +185,13 @@ async def test_action_unlikely_intent_1(
     assert result["report"]["conversation_accuracy"]["correct"] == 1
     assert result["report"]["conversation_accuracy"]["with_warnings"] == 1
 
+    # Ensure that the story with warning is correctly formatted
+    with open(str(tmp_path / "stories_with_warnings.yml"), "r") as f:
+        content = f.read()
+        assert "# predicted: action_unlikely_intent" in content
+
 
-async def test_action_unlikely_intent_2(
+async def test_action_unlikely_intent_correctly_predicted(
     monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
 ):
     monkeypatch.setattr(
@@ -170,6 +200,10 @@ async def test_action_unlikely_intent_2(
         _probabilities_with_action_unlikely_intent_for(["mood_unhappy"]),
     )
 
+    monkeypatch.setattr(
+        RulePolicy, "_prediction_states", _custom_prediction_states(False)
+    )
+
     file_name = tmp_path / "test_action_unlikely_intent_2.yml"
     file_name.write_text(
         """
@@ -201,17 +235,21 @@ async def test_action_unlikely_intent_2(
     assert result["report"]["conversation_accuracy"]["with_warnings"] == 0
 
 
-async def test_action_unlikely_intent_complete(
+async def test_wrong_action_after_action_unlikely_intent(
     monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
 ):
     monkeypatch.setattr(
         SimplePolicyEnsemble,
         "probabilities_using_best_policy",
-        _probabilities_with_action_unlikely_intent_for(["mood_unhappy"]),
+        _probabilities_with_action_unlikely_intent_for(["greet", "mood_great"]),
     )
 
-    file_name = tmp_path / "test_action_unlikely_intent_complete.yml"
-    file_name.write_text(
+    monkeypatch.setattr(
+        RulePolicy, "_prediction_states", _custom_prediction_states(True)
+    )
+
+    test_file_name = tmp_path / "test.yml"
+    test_file_name.write_text(
         """
         version: "2.0"
         stories:
@@ -225,42 +263,51 @@ async def test_action_unlikely_intent_complete(
                   amazing
                 intent: mood_great
               - action: utter_happy
-          - story: unlikely path
+        """
+    )
+
+    train_file_name = tmp_path / "train.yml"
+    train_file_name.write_text(
+        """
+        version: "2.0"
+        stories:
+          - story: happy path
             steps:
               - user: |
-                  very terrible
-                intent: mood_unhappy
-              - action: utter_cheer_up
-              - action: utter_did_that_help
-              - intent: affirm
+                  hello there!
+                intent: greet
               - action: utter_happy
-          - story: unlikely path (with action_unlikely_intent)
-            steps:
               - user: |
-                  very good!
+                  amazing
                 intent: mood_great
-              - action: action_unlikely_intent
-              - action: utter_happy
+              - action: utter_goodbye
         """
     )
 
     # We train on the above story so that RulePolicy can memorize
     # it and we don't have to worry about other actions being
     # predicted correctly.
-    agent = await _train_rule_based_agent(moodbot_domain, file_name)
+    agent = await _train_rule_based_agent(moodbot_domain, train_file_name)
 
     result = await rasa.core.test.test(
-        str(file_name), agent, out_directory=str(tmp_path),
+        str(test_file_name), agent, out_directory=str(tmp_path),
     )
     assert "report" in result.keys()
-    assert result["report"]["conversation_accuracy"]["correct"] == 3
-    assert result["report"]["conversation_accuracy"]["with_warnings"] == 1
-    assert result["report"]["conversation_accuracy"]["total"] == 3
+    assert result["report"]["conversation_accuracy"]["correct"] == 0
+    assert result["report"]["conversation_accuracy"]["with_warnings"] == 0
+    assert result["report"]["conversation_accuracy"]["total"] == 1
 
+    # Ensure that the failed story is correctly formatted
+    with open(str(tmp_path / "failed_test_stories.yml"), "r") as f:
+        content = f.read()
+        assert "# predicted: utter_happy after action_unlikely_intent" in content
+        assert (
+            "# predicted: action_default_fallback after action_unlikely_intent"
+            in content
+        )
 
-async def test_action_unlikely_intent_wrong_story(
-    monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
-):
+
+async def test_action_unlikely_intent_not_found(tmp_path: Path, moodbot_domain: Domain):
     test_file_name = tmp_path / "test_action_unlikely_intent_complete.yml"
     test_file_name.write_text(
         """
@@ -311,6 +358,11 @@ async def test_action_unlikely_intent_wrong_story(
     assert result["report"]["conversation_accuracy"]["with_warnings"] == 0
     assert result["report"]["conversation_accuracy"]["total"] == 1
 
+    # Ensure that the failed story is correctly formatted
+    with open(str(tmp_path / "failed_test_stories.yml"), "r") as f:
+        content = f.read()
+        assert "# predicted: utter_greet" in content
+
 
 @pytest.mark.parametrize(
     "metadata_for_intents, story_order",

From 0114da1abd74f0a8b88d3ab8a8eafb9c8506050b Mon Sep 17 00:00:00 2001
From: Daksh <d.varshneya@rasa.com>
Date: Fri, 9 Jul 2021 12:28:42 +0200
Subject: [PATCH 08/18] more tests

---
 tests/core/test_test.py | 71 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/tests/core/test_test.py b/tests/core/test_test.py
index 8b41efa97407..d07066cc1b08 100644
--- a/tests/core/test_test.py
+++ b/tests/core/test_test.py
@@ -143,7 +143,7 @@ async def _train_rule_based_agent(
     return agent
 
 
-async def test_action_unlikely_intent_1(
+async def test_action_unlikely_intent_warning(
     monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
 ):
     monkeypatch.setattr(
@@ -364,6 +364,75 @@ async def test_action_unlikely_intent_not_found(tmp_path: Path, moodbot_domain:
         assert "# predicted: utter_greet" in content
 
 
+async def test_action_unlikely_intent_warning_and_story_error(
+    monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
+):
+    monkeypatch.setattr(
+        SimplePolicyEnsemble,
+        "probabilities_using_best_policy",
+        _probabilities_with_action_unlikely_intent_for(["greet"]),
+    )
+
+    monkeypatch.setattr(
+        RulePolicy, "_prediction_states", _custom_prediction_states(True)
+    )
+
+    test_file_name = tmp_path / "test.yml"
+    test_file_name.write_text(
+        """
+        version: "2.0"
+        stories:
+          - story: happy path
+            steps:
+              - user: |
+                  hello there!
+                intent: greet
+              - action: utter_greet
+              - user: |
+                  amazing
+                intent: mood_great
+              - action: utter_happy
+        """
+    )
+
+    train_file_name = tmp_path / "train.yml"
+    train_file_name.write_text(
+        """
+        version: "2.0"
+        stories:
+          - story: happy path
+            steps:
+              - user: |
+                  hello there!
+                intent: greet
+              - action: utter_greet
+              - user: |
+                  amazing
+                intent: mood_great
+              - action: utter_goodbye
+        """
+    )
+
+    # We train on the above story so that RulePolicy can memorize
+    # it and we don't have to worry about other actions being
+    # predicted correctly.
+    agent = await _train_rule_based_agent(moodbot_domain, train_file_name)
+
+    result = await rasa.core.test.test(
+        str(test_file_name), agent, out_directory=str(tmp_path),
+    )
+    assert "report" in result.keys()
+    assert result["report"]["conversation_accuracy"]["correct"] == 0
+    assert result["report"]["conversation_accuracy"]["with_warnings"] == 0
+    assert result["report"]["conversation_accuracy"]["total"] == 1
+
+    # Ensure that the failed story is correctly formatted
+    with open(str(tmp_path / "failed_test_stories.yml"), "r") as f:
+        content = f.read()
+        assert "# predicted: action_unlikely_intent" in content
+        assert "# predicted: utter_goodbye" in content
+
+
 @pytest.mark.parametrize(
     "metadata_for_intents, story_order",
     [

From 5fd3ade64f8ef45b841e45cf73df493f57eb497b Mon Sep 17 00:00:00 2001
From: Daksh <d.varshneya@rasa.com>
Date: Fri, 9 Jul 2021 12:37:08 +0200
Subject: [PATCH 09/18] fix remaining test

---
 tests/core/test_test.py | 62 ++++++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/tests/core/test_test.py b/tests/core/test_test.py
index d07066cc1b08..57bbfe6a038a 100644
--- a/tests/core/test_test.py
+++ b/tests/core/test_test.py
@@ -465,35 +465,35 @@ async def test_action_unlikely_intent_warning_and_story_error(
             },
             ["path 2", "path 1"],
         ),
-        (
-            {
-                "mood_unhappy": {
-                    QUERY_INTENT_KEY: {
-                        NAME: "mood_unhappy",
-                        SEVERITY_KEY: 2.0,
-                        THRESHOLD_KEY: 0.0,
-                        SCORE_KEY: -2.0,
-                    }
-                },
-                "mood_great": {
-                    QUERY_INTENT_KEY: {
-                        NAME: "mood_great",
-                        SEVERITY_KEY: 5.0,
-                        THRESHOLD_KEY: 0.2,
-                        SCORE_KEY: -1.0,
-                    }
-                },
-                "affirm": {
-                    QUERY_INTENT_KEY: {
-                        NAME: "affirm",
-                        SEVERITY_KEY: 4.2,
-                        THRESHOLD_KEY: 0.2,
-                        SCORE_KEY: -4.0,
-                    }
-                },
-            },
-            ["path 1", "path 2"],
-        ),
+        # (
+        #     {
+        #         "mood_unhappy": {
+        #             QUERY_INTENT_KEY: {
+        #                 NAME: "mood_unhappy",
+        #                 SEVERITY_KEY: 2.0,
+        #                 THRESHOLD_KEY: 0.0,
+        #                 SCORE_KEY: -2.0,
+        #             }
+        #         },
+        #         "mood_great": {
+        #             QUERY_INTENT_KEY: {
+        #                 NAME: "mood_great",
+        #                 SEVERITY_KEY: 5.0,
+        #                 THRESHOLD_KEY: 0.2,
+        #                 SCORE_KEY: -1.0,
+        #             }
+        #         },
+        #         "affirm": {
+        #             QUERY_INTENT_KEY: {
+        #                 NAME: "affirm",
+        #                 SEVERITY_KEY: 4.2,
+        #                 THRESHOLD_KEY: 0.2,
+        #                 SCORE_KEY: -4.0,
+        #             }
+        #         },
+        #     },
+        #     ["path 1", "path 2"],
+        # ),
     ],
 )
 async def test_multiple_warnings_sorted_on_severity(
@@ -511,6 +511,10 @@ async def test_multiple_warnings_sorted_on_severity(
         ),
     )
 
+    monkeypatch.setattr(
+        RulePolicy, "_prediction_states", _custom_prediction_states(True)
+    )
+
     test_story_path = (
         "data/test_yaml_stories/test_multiple_action_unlikely_intent_warnings.yml"
     )

From 6f39c4278fc6eb161f35bb363ec0bec04370b856 Mon Sep 17 00:00:00 2001
From: Daksh <d.varshneya@rasa.com>
Date: Fri, 9 Jul 2021 13:06:03 +0200
Subject: [PATCH 10/18] restructure it a bit

---
 tests/core/test_test.py | 148 ++++++++++++++++++++++++----------------
 1 file changed, 90 insertions(+), 58 deletions(-)

diff --git a/tests/core/test_test.py b/tests/core/test_test.py
index 57bbfe6a038a..0e50ef1b3f37 100644
--- a/tests/core/test_test.py
+++ b/tests/core/test_test.py
@@ -72,7 +72,24 @@ def probabilities_using_best_policy(
     return probabilities_using_best_policy
 
 
-def _custom_prediction_states(ignore_action_unlikely_intent: bool = False):
+def _custom_prediction_states_for_rules(
+    ignore_action_unlikely_intent: bool = False,
+) -> Callable[
+    [RulePolicy, DialogueStateTracker, Domain, bool], List[State],
+]:
+    """Creates prediction states for `RulePolicy`.
+
+    `RulePolicy` does not ignore `action_unlikely_intent` in reality.
+    We use this helper method to monkey patch it for tests so that we can
+    test `rasa test`'s behaviour when `action_unlikely_intent` is predicted.
+
+    Args:
+        ignore_action_unlikely_intent: Whether to ignore `action_unlikely_intent`.
+
+    Returns:
+        Monkey-patched method to create prediction states.
+    """
+
     def _prediction_states(
         self: RulePolicy,
         tracker: DialogueStateTracker,
@@ -126,9 +143,24 @@ async def test_testing_valid_with_non_e2e_core_model(core_agent: Agent):
 
 
 async def _train_rule_based_agent(
-    moodbot_domain: Domain, train_file_name: Path
+    moodbot_domain: Domain,
+    train_file_name: Path,
+    monkeypatch: MonkeyPatch,
+    ignore_action_unlikely_intent: bool,
 ) -> Agent:
 
+    # We need `RulePolicy` to predict the correct actions
+    # in a particular conversation context as seen during training.
+    # Since it can get affected by `action_unlikely_intent` being triggered in
+    # some cases. We monkey-patch the method which creates
+    # prediction states to ignore `action_unlikely_intent`s if needed.
+
+    monkeypatch.setattr(
+        RulePolicy,
+        "_prediction_states",
+        _custom_prediction_states_for_rules(ignore_action_unlikely_intent),
+    )
+
     deterministic_policy = RulePolicy(restrict_rules=False)
     agent = Agent(moodbot_domain, SimplePolicyEnsemble([deterministic_policy]))
     training_data = await agent.load_data(str(train_file_name))
@@ -152,10 +184,6 @@ async def test_action_unlikely_intent_warning(
         _probabilities_with_action_unlikely_intent_for(["mood_unhappy"]),
     )
 
-    monkeypatch.setattr(
-        RulePolicy, "_prediction_states", _custom_prediction_states(True)
-    )
-
     file_name = tmp_path / "test_action_unlikely_intent_1.yml"
     file_name.write_text(
         """
@@ -176,7 +204,9 @@ async def test_action_unlikely_intent_warning(
     # We train on the above story so that RulePolicy can memorize
     # it and we don't have to worry about other actions being
     # predicted correctly.
-    agent = await _train_rule_based_agent(moodbot_domain, file_name)
+    agent = await _train_rule_based_agent(
+        moodbot_domain, file_name, monkeypatch, ignore_action_unlikely_intent=True
+    )
 
     result = await rasa.core.test.test(
         str(file_name), agent, out_directory=str(tmp_path),
@@ -200,10 +230,6 @@ async def test_action_unlikely_intent_correctly_predicted(
         _probabilities_with_action_unlikely_intent_for(["mood_unhappy"]),
     )
 
-    monkeypatch.setattr(
-        RulePolicy, "_prediction_states", _custom_prediction_states(False)
-    )
-
     file_name = tmp_path / "test_action_unlikely_intent_2.yml"
     file_name.write_text(
         """
@@ -225,7 +251,9 @@ async def test_action_unlikely_intent_correctly_predicted(
     # We train on the above story so that RulePolicy can memorize
     # it and we don't have to worry about other actions being
     # predicted correctly.
-    agent = await _train_rule_based_agent(moodbot_domain, file_name)
+    agent = await _train_rule_based_agent(
+        moodbot_domain, file_name, monkeypatch, ignore_action_unlikely_intent=False
+    )
 
     result = await rasa.core.test.test(
         str(file_name), agent, out_directory=str(tmp_path),
@@ -244,10 +272,6 @@ async def test_wrong_action_after_action_unlikely_intent(
         _probabilities_with_action_unlikely_intent_for(["greet", "mood_great"]),
     )
 
-    monkeypatch.setattr(
-        RulePolicy, "_prediction_states", _custom_prediction_states(True)
-    )
-
     test_file_name = tmp_path / "test.yml"
     test_file_name.write_text(
         """
@@ -287,7 +311,9 @@ async def test_wrong_action_after_action_unlikely_intent(
     # We train on the above story so that RulePolicy can memorize
     # it and we don't have to worry about other actions being
     # predicted correctly.
-    agent = await _train_rule_based_agent(moodbot_domain, train_file_name)
+    agent = await _train_rule_based_agent(
+        moodbot_domain, train_file_name, monkeypatch, ignore_action_unlikely_intent=True
+    )
 
     result = await rasa.core.test.test(
         str(test_file_name), agent, out_directory=str(tmp_path),
@@ -307,7 +333,9 @@ async def test_wrong_action_after_action_unlikely_intent(
         )
 
 
-async def test_action_unlikely_intent_not_found(tmp_path: Path, moodbot_domain: Domain):
+async def test_action_unlikely_intent_not_found(
+    monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
+):
     test_file_name = tmp_path / "test_action_unlikely_intent_complete.yml"
     test_file_name.write_text(
         """
@@ -348,7 +376,12 @@ async def test_action_unlikely_intent_not_found(tmp_path: Path, moodbot_domain:
     # We train on the above story so that RulePolicy can memorize
     # it and we don't have to worry about other actions being
     # predicted correctly.
-    agent = await _train_rule_based_agent(moodbot_domain, train_file_name)
+    agent = await _train_rule_based_agent(
+        moodbot_domain,
+        train_file_name,
+        monkeypatch,
+        ignore_action_unlikely_intent=False,
+    )
 
     result = await rasa.core.test.test(
         str(test_file_name), agent, out_directory=str(tmp_path),
@@ -373,10 +406,6 @@ async def test_action_unlikely_intent_warning_and_story_error(
         _probabilities_with_action_unlikely_intent_for(["greet"]),
     )
 
-    monkeypatch.setattr(
-        RulePolicy, "_prediction_states", _custom_prediction_states(True)
-    )
-
     test_file_name = tmp_path / "test.yml"
     test_file_name.write_text(
         """
@@ -416,7 +445,9 @@ async def test_action_unlikely_intent_warning_and_story_error(
     # We train on the above story so that RulePolicy can memorize
     # it and we don't have to worry about other actions being
     # predicted correctly.
-    agent = await _train_rule_based_agent(moodbot_domain, train_file_name)
+    agent = await _train_rule_based_agent(
+        moodbot_domain, train_file_name, monkeypatch, ignore_action_unlikely_intent=True
+    )
 
     result = await rasa.core.test.test(
         str(test_file_name), agent, out_directory=str(tmp_path),
@@ -465,35 +496,35 @@ async def test_action_unlikely_intent_warning_and_story_error(
             },
             ["path 2", "path 1"],
         ),
-        # (
-        #     {
-        #         "mood_unhappy": {
-        #             QUERY_INTENT_KEY: {
-        #                 NAME: "mood_unhappy",
-        #                 SEVERITY_KEY: 2.0,
-        #                 THRESHOLD_KEY: 0.0,
-        #                 SCORE_KEY: -2.0,
-        #             }
-        #         },
-        #         "mood_great": {
-        #             QUERY_INTENT_KEY: {
-        #                 NAME: "mood_great",
-        #                 SEVERITY_KEY: 5.0,
-        #                 THRESHOLD_KEY: 0.2,
-        #                 SCORE_KEY: -1.0,
-        #             }
-        #         },
-        #         "affirm": {
-        #             QUERY_INTENT_KEY: {
-        #                 NAME: "affirm",
-        #                 SEVERITY_KEY: 4.2,
-        #                 THRESHOLD_KEY: 0.2,
-        #                 SCORE_KEY: -4.0,
-        #             }
-        #         },
-        #     },
-        #     ["path 1", "path 2"],
-        # ),
+        (
+            {
+                "mood_unhappy": {
+                    QUERY_INTENT_KEY: {
+                        NAME: "mood_unhappy",
+                        SEVERITY_KEY: 2.0,
+                        THRESHOLD_KEY: 0.0,
+                        SCORE_KEY: -2.0,
+                    }
+                },
+                "mood_great": {
+                    QUERY_INTENT_KEY: {
+                        NAME: "mood_great",
+                        SEVERITY_KEY: 5.0,
+                        THRESHOLD_KEY: 0.2,
+                        SCORE_KEY: -1.0,
+                    }
+                },
+                "affirm": {
+                    QUERY_INTENT_KEY: {
+                        NAME: "affirm",
+                        SEVERITY_KEY: 4.2,
+                        THRESHOLD_KEY: 0.2,
+                        SCORE_KEY: -4.0,
+                    }
+                },
+            },
+            ["path 1", "path 2"],
+        ),
     ],
 )
 async def test_multiple_warnings_sorted_on_severity(
@@ -511,10 +542,6 @@ async def test_multiple_warnings_sorted_on_severity(
         ),
     )
 
-    monkeypatch.setattr(
-        RulePolicy, "_prediction_states", _custom_prediction_states(True)
-    )
-
     test_story_path = (
         "data/test_yaml_stories/test_multiple_action_unlikely_intent_warnings.yml"
     )
@@ -522,7 +549,12 @@ async def test_multiple_warnings_sorted_on_severity(
     # We train on the stories as it is so that RulePolicy can memorize
     # it and we don't have to worry about other actions being
     # predicted correctly.
-    agent = await _train_rule_based_agent(moodbot_domain, Path(test_story_path))
+    agent = await _train_rule_based_agent(
+        moodbot_domain,
+        Path(test_story_path),
+        monkeypatch,
+        ignore_action_unlikely_intent=True,
+    )
 
     await rasa.core.test.test(
         test_story_path, agent, out_directory=str(tmp_path),

From 8aa33271806e05e9b9eb399095f765198edd34e9 Mon Sep 17 00:00:00 2001
From: Daksh <d.varshneya@rasa.com>
Date: Fri, 9 Jul 2021 13:18:21 +0200
Subject: [PATCH 11/18] formatting and hardcoded strings removed

---
 rasa/shared/core/events.py |  7 ++++---
 tests/core/test_test.py    | 10 ++++++----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
index f678beb100b6..38667713c728 100644
--- a/rasa/shared/core/events.py
+++ b/rasa/shared/core/events.py
@@ -1536,7 +1536,7 @@ def __str__(self) -> Text:
     def inline_comment(self) -> Optional[Text]:
         """A comment attached to this event. Used during dumping."""
         if self.predicted_action_unlikely_intent:
-            return f"predicted: action_unlikely_intent"
+            return f"predicted: {ACTION_UNLIKELY_INTENT_NAME}"
         return None
 
     def __hash__(self) -> int:
@@ -1982,9 +1982,10 @@ def __init__(
 
     def inline_comment(self) -> Text:
         """A comment attached to this event. Used during dumping."""
+        comment = f"predicted: {self.action_name_prediction}"
         if self.predicted_action_unlikely_intent:
-            return f"predicted: {self.action_name_prediction} after {ACTION_UNLIKELY_INTENT_NAME}"
-        return f"predicted: {self.action_name_prediction}"
+            return f"{comment} after {ACTION_UNLIKELY_INTENT_NAME}"
+        return comment
 
     def as_story_string(self) -> Text:
         """Returns the story equivalent representation."""
diff --git a/tests/core/test_test.py b/tests/core/test_test.py
index 0e50ef1b3f37..a9aca5fefcf1 100644
--- a/tests/core/test_test.py
+++ b/tests/core/test_test.py
@@ -218,7 +218,7 @@ async def test_action_unlikely_intent_warning(
     # Ensure that the story with warning is correctly formatted
     with open(str(tmp_path / "stories_with_warnings.yml"), "r") as f:
         content = f.read()
-        assert "# predicted: action_unlikely_intent" in content
+        assert f"# predicted: {ACTION_UNLIKELY_INTENT_NAME}" in content
 
 
 async def test_action_unlikely_intent_correctly_predicted(
@@ -326,9 +326,11 @@ async def test_wrong_action_after_action_unlikely_intent(
     # Ensure that the failed story is correctly formatted
     with open(str(tmp_path / "failed_test_stories.yml"), "r") as f:
         content = f.read()
-        assert "# predicted: utter_happy after action_unlikely_intent" in content
         assert (
-            "# predicted: action_default_fallback after action_unlikely_intent"
+            f"# predicted: utter_happy after {ACTION_UNLIKELY_INTENT_NAME}" in content
+        )
+        assert (
+            f"# predicted: action_default_fallback after {ACTION_UNLIKELY_INTENT_NAME}"
             in content
         )
 
@@ -460,7 +462,7 @@ async def test_action_unlikely_intent_warning_and_story_error(
     # Ensure that the failed story is correctly formatted
     with open(str(tmp_path / "failed_test_stories.yml"), "r") as f:
         content = f.read()
-        assert "# predicted: action_unlikely_intent" in content
+        assert f"# predicted: {ACTION_UNLIKELY_INTENT_NAME}" in content
         assert "# predicted: utter_goodbye" in content
 
 

From 4f0bc5081bdbeebbe1a7f344fecc1bfa073c093b Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Fri, 9 Jul 2021 14:09:44 +0200
Subject: [PATCH 12/18] WarningPredictedAction

---
 rasa/core/test.py          | 14 ++++++++++++--
 rasa/shared/core/events.py | 31 ++++++++++++++++++++++---------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index ff6dcfacd604..22e6a7811f82 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -54,6 +54,7 @@
     EntitiesAdded,
     UserUttered,
     WronglyPredictedAction,
+    WarningPredictedAction,
 )
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.nlu.training_data.formats.readerwriter import TrainingDataWriter
@@ -637,14 +638,23 @@ def _collect_action_executed_predictions(
                     "training stories and retrain."
                 )
             raise WrongPredictionException(error_msg)
+    elif previously_predicted_action_unlikely_intent:
+        partial_tracker.update(
+            WarningPredictedAction(
+                ACTION_UNLIKELY_INTENT_NAME,
+                predicted_action,
+                prediction.policy_name,
+                prediction.max_confidence,
+                event.timestamp,
+            )
+        )
     else:
         partial_tracker.update(
             ActionExecuted(
                 predicted_action,
                 prediction.policy_name,
                 prediction.max_confidence,
-                event.timestamp,
-                predicted_action_unlikely_intent=previously_predicted_action_unlikely_intent,
+                event.timestamp
             )
         )
 
diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
index 38667713c728..99041943d42c 100644
--- a/rasa/shared/core/events.py
+++ b/rasa/shared/core/events.py
@@ -1489,7 +1489,6 @@ def __init__(
         metadata: Optional[Dict] = None,
         action_text: Optional[Text] = None,
         hide_rule_turn: bool = False,
-        predicted_action_unlikely_intent: bool = False,
     ) -> None:
         """Creates event for a successful event execution.
 
@@ -1511,7 +1510,6 @@ def __init__(
         self.unpredictable = False
         self.action_text = action_text
         self.hide_rule_turn = hide_rule_turn
-        self.predicted_action_unlikely_intent = predicted_action_unlikely_intent
 
         super().__init__(timestamp, metadata)
 
@@ -1533,12 +1531,6 @@ def __str__(self) -> Text:
         """Returns event as human readable string."""
         return self.action_name or self.action_text
 
-    def inline_comment(self) -> Optional[Text]:
-        """A comment attached to this event. Used during dumping."""
-        if self.predicted_action_unlikely_intent:
-            return f"predicted: {ACTION_UNLIKELY_INTENT_NAME}"
-        return None
-
     def __hash__(self) -> int:
         """Returns unique hash for event."""
         return hash(self.__members__())
@@ -1970,6 +1962,7 @@ def __init__(
         See the docstring of the parent class `ActionExecuted` for more information.
         """
         self.action_name_prediction = action_name_prediction
+        self.predicted_action_unlikely_intent = predicted_action_unlikely_intent
         super().__init__(
             action_name_target,
             policy,
@@ -1977,7 +1970,6 @@ def __init__(
             timestamp,
             metadata,
             action_text=action_text_target,
-            predicted_action_unlikely_intent=predicted_action_unlikely_intent,
         )
 
     def inline_comment(self) -> Text:
@@ -1999,3 +1991,24 @@ def __repr__(self) -> Text:
             f"policy: {self.policy}, confidence: {self.confidence}, "
             f"metadata: {self.metadata})"
         )
+
+
+class WarningPredictedAction(ActionExecuted):
+    """The model predicted the correct action with warning."""
+
+    type_name = "warning_predicted"
+
+    def __init__(
+            self,
+            action_name_prediction: Text,
+            action_name: Optional[Text] = None,
+            policy: Optional[Text] = None,
+            confidence: Optional[float] = None,
+            timestamp: Optional[float] = None
+    ):
+        self.action_name_prediction = action_name_prediction
+        super().__init__(action_name, policy, confidence, timestamp)
+
+    def inline_comment(self) -> Text:
+        """A comment attached to this event. Used during dumping."""
+        return f"predicted: {self.action_name_prediction}"

From a4840cd223cc677e794d22f7eb9fe2c36e70cfce Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Fri, 9 Jul 2021 14:12:43 +0200
Subject: [PATCH 13/18] Code style fix

---
 rasa/core/test.py          |  2 +-
 rasa/shared/core/events.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index 22e6a7811f82..a72197ad3258 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -654,7 +654,7 @@ def _collect_action_executed_predictions(
                 predicted_action,
                 prediction.policy_name,
                 prediction.max_confidence,
-                event.timestamp
+                event.timestamp,
             )
         )
 
diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
index 99041943d42c..2bad1e6c5193 100644
--- a/rasa/shared/core/events.py
+++ b/rasa/shared/core/events.py
@@ -1999,12 +1999,12 @@ class WarningPredictedAction(ActionExecuted):
     type_name = "warning_predicted"
 
     def __init__(
-            self,
-            action_name_prediction: Text,
-            action_name: Optional[Text] = None,
-            policy: Optional[Text] = None,
-            confidence: Optional[float] = None,
-            timestamp: Optional[float] = None
+        self,
+        action_name_prediction: Text,
+        action_name: Optional[Text] = None,
+        policy: Optional[Text] = None,
+        confidence: Optional[float] = None,
+        timestamp: Optional[float] = None,
     ):
         self.action_name_prediction = action_name_prediction
         super().__init__(action_name, policy, confidence, timestamp)

From ea4fe8619669cf679cdb1bfdb12fd02a57329d0e Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Fri, 9 Jul 2021 17:06:43 +0200
Subject: [PATCH 14/18] Code style updates

---
 rasa/core/test.py                            | 91 ++++++++++++++++++--
 rasa/shared/core/events.py                   | 76 ----------------
 rasa/shared/core/training_data/structures.py |  6 --
 tests/test_model_testing.py                  |  7 +-
 4 files changed, 89 insertions(+), 91 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index a72197ad3258..5baf75aea3a1 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -52,9 +52,7 @@
 from rasa.shared.core.events import (
     ActionExecuted,
     EntitiesAdded,
-    UserUttered,
-    WronglyPredictedAction,
-    WarningPredictedAction,
+    UserUttered
 )
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.nlu.training_data.formats.readerwriter import TrainingDataWriter
@@ -89,6 +87,83 @@ class WrongPredictionException(RasaException, ValueError):
     """Raised if a wrong prediction is encountered."""
 
 
+class WarningPredictedAction(ActionExecuted):
+    """The model predicted the correct action with warning."""
+
+    type_name = "warning_predicted"
+
+    def __init__(
+            self,
+            action_name_prediction: Text,
+            action_name: Optional[Text] = None,
+            policy: Optional[Text] = None,
+            confidence: Optional[float] = None,
+            timestamp: Optional[float] = None,
+            metadata: Optional[Dict] = None,
+    ):
+        self.action_name_prediction = action_name_prediction
+        super().__init__(action_name, policy, confidence, timestamp, metadata)
+
+    def inline_comment(self) -> Text:
+        """A comment attached to this event. Used during dumping."""
+        return f"predicted: {self.action_name_prediction}"
+
+
+class WronglyPredictedAction(ActionExecuted):
+    """The model predicted the wrong action.
+
+    Mostly used to mark wrong predictions and be able to
+    dump them as stories."""
+
+    type_name = "wrong_action"
+
+    def __init__(
+            self,
+            action_name_target: Text,
+            action_text_target: Text,
+            action_name_prediction: Text,
+            policy: Optional[Text] = None,
+            confidence: Optional[float] = None,
+            timestamp: Optional[float] = None,
+            metadata: Optional[Dict] = None,
+            predicted_action_unlikely_intent: bool = False,
+    ) -> None:
+        """Creates event for a successful event execution.
+
+        See the docstring of the parent class `ActionExecuted` for more information.
+        """
+        self.action_name_prediction = action_name_prediction
+        self.predicted_action_unlikely_intent = predicted_action_unlikely_intent
+        super().__init__(
+            action_name_target,
+            policy,
+            confidence,
+            timestamp,
+            metadata,
+            action_text=action_text_target,
+        )
+
+    def inline_comment(self) -> Text:
+        """A comment attached to this event. Used during dumping."""
+        comment = f"predicted: {self.action_name_prediction}"
+        if self.predicted_action_unlikely_intent:
+            return f"{comment} after {ACTION_UNLIKELY_INTENT_NAME}"
+        return comment
+
+    def as_story_string(self) -> Text:
+        """Returns the story equivalent representation."""
+        return f"{self.action_name}   <!-- {self.inline_comment()} -->"
+
+    def __repr__(self) -> Text:
+        """Returns event as string for debugging."""
+        return (
+            f"WronglyPredictedAction(action_target: {self.action_name}, "
+            f"action_prediction: {self.action_name_prediction}, "
+            f"policy: {self.policy}, confidence: {self.confidence}, "
+            f"metadata: {self.metadata})"
+        )
+
+
 class EvaluationStore:
     """Class storing action, intent and entity predictions and targets."""
 
@@ -570,7 +645,7 @@ def _collect_action_executed_predictions(
     expected_action = expected_action_name or expected_action_text
 
     policy_entity_result = None
-    previously_predicted_action_unlikely_intent = False
+    prev_action_unlikely_intent = False
 
     if circuit_breaker_tripped:
         prediction = PolicyPrediction([], policy_name=None)
@@ -593,7 +668,7 @@ def _collect_action_executed_predictions(
                 metadata=prediction.action_metadata,
             )
         )
-        previously_predicted_action_unlikely_intent = True
+        prev_action_unlikely_intent = True
         predicted_action, prediction, policy_entity_result = _run_action_prediction(
             processor, partial_tracker, expected_action
         )
@@ -617,7 +692,7 @@ def _collect_action_executed_predictions(
                 prediction.max_confidence,
                 event.timestamp,
                 metadata=prediction.action_metadata,
-                predicted_action_unlikely_intent=previously_predicted_action_unlikely_intent,
+                predicted_action_unlikely_intent=prev_action_unlikely_intent,
             )
         )
         if (
@@ -638,7 +713,7 @@ def _collect_action_executed_predictions(
                     "training stories and retrain."
                 )
             raise WrongPredictionException(error_msg)
-    elif previously_predicted_action_unlikely_intent:
+    elif prev_action_unlikely_intent:
         partial_tracker.update(
             WarningPredictedAction(
                 ACTION_UNLIKELY_INTENT_NAME,
@@ -646,6 +721,7 @@ def _collect_action_executed_predictions(
                 prediction.policy_name,
                 prediction.max_confidence,
                 event.timestamp,
+                prediction.action_metadata
             )
         )
     else:
@@ -655,6 +731,7 @@ def _collect_action_executed_predictions(
                 prediction.policy_name,
                 prediction.max_confidence,
                 event.timestamp,
+                metadata=prediction.action_metadata
             )
         )
 
diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
index 2bad1e6c5193..8f845856535f 100644
--- a/rasa/shared/core/events.py
+++ b/rasa/shared/core/events.py
@@ -1936,79 +1936,3 @@ def apply_to(self, tracker: "DialogueStateTracker") -> None:
         """Applies event to current conversation state."""
         # noinspection PyProtectedMember
         tracker._reset()
-
-
-class WronglyPredictedAction(ActionExecuted):
-    """The model predicted the wrong action.
-
-    Mostly used to mark wrong predictions and be able to
-    dump them as stories."""
-
-    type_name = "wrong_action"
-
-    def __init__(
-        self,
-        action_name_target: Text,
-        action_text_target: Text,
-        action_name_prediction: Text,
-        policy: Optional[Text] = None,
-        confidence: Optional[float] = None,
-        timestamp: Optional[float] = None,
-        metadata: Optional[Dict] = None,
-        predicted_action_unlikely_intent: bool = False,
-    ) -> None:
-        """Creates event for a successful event execution.
-
-        See the docstring of the parent class `ActionExecuted` for more information.
-        """
-        self.action_name_prediction = action_name_prediction
-        self.predicted_action_unlikely_intent = predicted_action_unlikely_intent
-        super().__init__(
-            action_name_target,
-            policy,
-            confidence,
-            timestamp,
-            metadata,
-            action_text=action_text_target,
-        )
-
-    def inline_comment(self) -> Text:
-        """A comment attached to this event. Used during dumping."""
-        comment = f"predicted: {self.action_name_prediction}"
-        if self.predicted_action_unlikely_intent:
-            return f"{comment} after {ACTION_UNLIKELY_INTENT_NAME}"
-        return comment
-
-    def as_story_string(self) -> Text:
-        """Returns the story equivalent representation."""
-        return f"{self.action_name}   <!-- {self.inline_comment()} -->"
-
-    def __repr__(self) -> Text:
-        """Returns event as string for debugging."""
-        return (
-            f"WronglyPredictedAction(action_target: {self.action_name}, "
-            f"action_prediction: {self.action_name_prediction}, "
-            f"policy: {self.policy}, confidence: {self.confidence}, "
-            f"metadata: {self.metadata})"
-        )
-
-
-class WarningPredictedAction(ActionExecuted):
-    """The model predicted the correct action with warning."""
-
-    type_name = "warning_predicted"
-
-    def __init__(
-        self,
-        action_name_prediction: Text,
-        action_name: Optional[Text] = None,
-        policy: Optional[Text] = None,
-        confidence: Optional[float] = None,
-        timestamp: Optional[float] = None,
-    ):
-        self.action_name_prediction = action_name_prediction
-        super().__init__(action_name, policy, confidence, timestamp)
-
-    def inline_comment(self) -> Text:
-        """A comment attached to this event. Used during dumping."""
-        return f"predicted: {self.action_name_prediction}"
diff --git a/rasa/shared/core/training_data/structures.py b/rasa/shared/core/training_data/structures.py
index a79be5c69a2b..3ea111e666cf 100644
--- a/rasa/shared/core/training_data/structures.py
+++ b/rasa/shared/core/training_data/structures.py
@@ -31,7 +31,6 @@
     ActionExecuted,
     Event,
     SessionStarted,
-    WronglyPredictedAction,
 )
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.exceptions import RasaCoreException
@@ -236,11 +235,6 @@ def is_action_unlikely_intent(event: Event) -> bool:
         return (
             type(event) == ActionExecuted
             and event.action_name == ACTION_UNLIKELY_INTENT_NAME
-        ) or (
-            type(event) == WronglyPredictedAction
-            and event.action_name
-            == event.action_name_prediction
-            == ACTION_UNLIKELY_INTENT_NAME
         )
 
     @staticmethod
diff --git a/tests/test_model_testing.py b/tests/test_model_testing.py
index 9fb27e3825a1..5c5fbf78bb84 100644
--- a/tests/test_model_testing.py
+++ b/tests/test_model_testing.py
@@ -12,12 +12,15 @@
 import rasa.utils.io
 from rasa.core.agent import Agent
 from rasa.shared.core.events import UserUttered
-from rasa.core.test import EvaluationStore, WronglyClassifiedUserUtterance
+from rasa.core.test import (
+    EvaluationStore,
+    WronglyClassifiedUserUtterance,
+    WronglyPredictedAction,
+)
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.core.training_data.story_writer.yaml_story_writer import (
     YAMLStoryWriter,
 )
-from rasa.shared.core.events import WronglyPredictedAction
 import rasa.model
 import rasa.cli.utils
 from rasa.nlu.test import NO_ENTITY

From acf2a44278ac928cb58028aa8cc1374cf18184ed Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Mon, 12 Jul 2021 10:17:13 +0200
Subject: [PATCH 15/18] Fixes and
 `test_action_unlikely_intent_fail_on_prediction_errors`

---
 rasa/core/test.py       | 25 +++++++++++++++++++-----
 tests/core/test_test.py | 43 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index 5baf75aea3a1..c42237cdd140 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -26,6 +26,9 @@
 from rasa.shared.core.training_data.story_writer.yaml_story_writer import (
     YAMLStoryWriter,
 )
+from rasa.shared.core.training_data.structures import (
+    StoryStep,
+)
 from rasa.shared.core.domain import Domain
 from rasa.nlu.constants import (
     RESPONSE_SELECTOR_DEFAULT_INTENT,
@@ -680,9 +683,7 @@ def _collect_action_executed_predictions(
             action_predictions=[predicted_action], action_targets=[expected_action]
         )
 
-    if action_executed_eval_store.has_prediction_target_mismatch() or (
-        predicted_action_unlikely_intent and predicted_action != expected_action
-    ):
+    if action_executed_eval_store.has_prediction_target_mismatch():
         partial_tracker.update(
             WronglyPredictedAction(
                 expected_action_name,
@@ -698,7 +699,7 @@ def _collect_action_executed_predictions(
         if (
             fail_on_prediction_errors
             and predicted_action != ACTION_UNLIKELY_INTENT_NAME
-            and expected_action != ACTION_UNLIKELY_INTENT_NAME
+            and predicted_action != expected_action
         ):
             story_dump = YAMLStoryWriter().dumps(partial_tracker.as_story().story_steps)
             error_msg = (
@@ -972,6 +973,20 @@ async def _collect_story_predictions(
     )
 
 
+def _filter_step_events(step: StoryStep) -> StoryStep:
+    events = []
+    for event in step.events:
+        if (
+                type(event) == WronglyPredictedAction
+                and event.action_name == event.action_name_prediction == ACTION_UNLIKELY_INTENT_NAME
+        ):
+            continue
+        events.append(event)
+    updated_step = step.create_copy(use_new_id=False)
+    updated_step.events = events
+    return updated_step
+
+
 def _log_stories(
     trackers: List[DialogueStateTracker], file_path: Text, message_if_no_trackers: Text
 ) -> None:
@@ -981,7 +996,7 @@ def _log_stories(
             f.write(f"# {message_if_no_trackers}")
         else:
             stories = [tracker.as_story(include_source=True) for tracker in trackers]
-            steps = [step for story in stories for step in story.story_steps]
+            steps = [_filter_step_events(step) for story in stories for step in story.story_steps]
             f.write(YAMLStoryWriter().dumps(steps))
 
 
diff --git a/tests/core/test_test.py b/tests/core/test_test.py
index a9aca5fefcf1..65ddd6962e34 100644
--- a/tests/core/test_test.py
+++ b/tests/core/test_test.py
@@ -256,13 +256,54 @@ async def test_action_unlikely_intent_correctly_predicted(
     )
 
     result = await rasa.core.test.test(
-        str(file_name), agent, out_directory=str(tmp_path),
+        str(file_name), agent, out_directory=str(tmp_path), fail_on_prediction_errors = True
     )
     assert "report" in result.keys()
     assert result["report"]["conversation_accuracy"]["correct"] == 1
     assert result["report"]["conversation_accuracy"]["with_warnings"] == 0
 
 
+async def test_action_unlikely_intent_fail_on_prediction_errors(
+        monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
+):
+    monkeypatch.setattr(
+        SimplePolicyEnsemble,
+        "probabilities_using_best_policy",
+        _probabilities_with_action_unlikely_intent_for(["mood_unhappy"]),
+    )
+
+    file_name = tmp_path / "test_action_unlikely_intent_2.yml"
+    file_name.write_text(
+        """
+        version: "2.0"
+        stories:
+          - story: unlikely path (with action_unlikely_intent)
+            steps:
+              - user: |
+                  very terrible
+                intent: mood_unhappy
+              - action: utter_cheer_up
+              - action: action_unlikely_intent
+              - action: utter_did_that_help
+              - intent: affirm
+              - action: utter_happy
+        """
+    )
+
+    # We train on the above story so that RulePolicy can memorize
+    # it and we don't have to worry about other actions being
+    # predicted correctly.
+    agent = await _train_rule_based_agent(
+        moodbot_domain, file_name, monkeypatch, ignore_action_unlikely_intent=False
+    )
+
+    with pytest.raises(rasa.core.test.WrongPredictionException):
+        result = await rasa.core.test.test(
+            str(file_name), agent, out_directory=str(tmp_path), fail_on_prediction_errors=True
+        )
+
+
+
 async def test_wrong_action_after_action_unlikely_intent(
     monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
 ):

From 5c958556393e6e36e7df75a1b67252f989f06fe9 Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Mon, 12 Jul 2021 10:17:55 +0200
Subject: [PATCH 16/18] Formatting

---
 rasa/core/test.py       | 58 ++++++++++++++++++++---------------------
 tests/core/test_test.py | 13 ++++++---
 2 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index c42237cdd140..b8620253f5ce 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -26,9 +26,7 @@
 from rasa.shared.core.training_data.story_writer.yaml_story_writer import (
     YAMLStoryWriter,
 )
-from rasa.shared.core.training_data.structures import (
-    StoryStep,
-)
+from rasa.shared.core.training_data.structures import StoryStep
 from rasa.shared.core.domain import Domain
 from rasa.nlu.constants import (
     RESPONSE_SELECTOR_DEFAULT_INTENT,
@@ -52,11 +50,7 @@
     ENTITY_ATTRIBUTE_TEXT,
 )
 from rasa.constants import RESULTS_FILE, PERCENTAGE_KEY
-from rasa.shared.core.events import (
-    ActionExecuted,
-    EntitiesAdded,
-    UserUttered
-)
+from rasa.shared.core.events import ActionExecuted, EntitiesAdded, UserUttered
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.nlu.training_data.formats.readerwriter import TrainingDataWriter
 from rasa.shared.importers.importer import TrainingDataImporter
@@ -96,13 +90,13 @@ class WarningPredictedAction(ActionExecuted):
     type_name = "warning_predicted"
 
     def __init__(
-            self,
-            action_name_prediction: Text,
-            action_name: Optional[Text] = None,
-            policy: Optional[Text] = None,
-            confidence: Optional[float] = None,
-            timestamp: Optional[float] = None,
-            metadata: Optional[Dict] = None,
+        self,
+        action_name_prediction: Text,
+        action_name: Optional[Text] = None,
+        policy: Optional[Text] = None,
+        confidence: Optional[float] = None,
+        timestamp: Optional[float] = None,
+        metadata: Optional[Dict] = None,
     ):
         self.action_name_prediction = action_name_prediction
         super().__init__(action_name, policy, confidence, timestamp, metadata)
@@ -121,15 +115,15 @@ class WronglyPredictedAction(ActionExecuted):
     type_name = "wrong_action"
 
     def __init__(
-            self,
-            action_name_target: Text,
-            action_text_target: Text,
-            action_name_prediction: Text,
-            policy: Optional[Text] = None,
-            confidence: Optional[float] = None,
-            timestamp: Optional[float] = None,
-            metadata: Optional[Dict] = None,
-            predicted_action_unlikely_intent: bool = False,
+        self,
+        action_name_target: Text,
+        action_text_target: Text,
+        action_name_prediction: Text,
+        policy: Optional[Text] = None,
+        confidence: Optional[float] = None,
+        timestamp: Optional[float] = None,
+        metadata: Optional[Dict] = None,
+        predicted_action_unlikely_intent: bool = False,
     ) -> None:
         """Creates event for a successful event execution.
 
@@ -722,7 +716,7 @@ def _collect_action_executed_predictions(
                 prediction.policy_name,
                 prediction.max_confidence,
                 event.timestamp,
-                prediction.action_metadata
+                prediction.action_metadata,
             )
         )
     else:
@@ -732,7 +726,7 @@ def _collect_action_executed_predictions(
                 prediction.policy_name,
                 prediction.max_confidence,
                 event.timestamp,
-                metadata=prediction.action_metadata
+                metadata=prediction.action_metadata,
             )
         )
 
@@ -977,8 +971,10 @@ def _filter_step_events(step: StoryStep) -> StoryStep:
     events = []
     for event in step.events:
         if (
-                type(event) == WronglyPredictedAction
-                and event.action_name == event.action_name_prediction == ACTION_UNLIKELY_INTENT_NAME
+            type(event) == WronglyPredictedAction
+            and event.action_name
+            == event.action_name_prediction
+            == ACTION_UNLIKELY_INTENT_NAME
         ):
             continue
         events.append(event)
@@ -996,7 +992,11 @@ def _log_stories(
             f.write(f"# {message_if_no_trackers}")
         else:
             stories = [tracker.as_story(include_source=True) for tracker in trackers]
-            steps = [_filter_step_events(step) for story in stories for step in story.story_steps]
+            steps = [
+                _filter_step_events(step)
+                for story in stories
+                for step in story.story_steps
+            ]
             f.write(YAMLStoryWriter().dumps(steps))
 
 
diff --git a/tests/core/test_test.py b/tests/core/test_test.py
index 65ddd6962e34..647871ec94fa 100644
--- a/tests/core/test_test.py
+++ b/tests/core/test_test.py
@@ -256,7 +256,10 @@ async def test_action_unlikely_intent_correctly_predicted(
     )
 
     result = await rasa.core.test.test(
-        str(file_name), agent, out_directory=str(tmp_path), fail_on_prediction_errors = True
+        str(file_name),
+        agent,
+        out_directory=str(tmp_path),
+        fail_on_prediction_errors=True,
     )
     assert "report" in result.keys()
     assert result["report"]["conversation_accuracy"]["correct"] == 1
@@ -264,7 +267,7 @@ async def test_action_unlikely_intent_correctly_predicted(
 
 
 async def test_action_unlikely_intent_fail_on_prediction_errors(
-        monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
+    monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
 ):
     monkeypatch.setattr(
         SimplePolicyEnsemble,
@@ -299,11 +302,13 @@ async def test_action_unlikely_intent_fail_on_prediction_errors(
 
     with pytest.raises(rasa.core.test.WrongPredictionException):
         result = await rasa.core.test.test(
-            str(file_name), agent, out_directory=str(tmp_path), fail_on_prediction_errors=True
+            str(file_name),
+            agent,
+            out_directory=str(tmp_path),
+            fail_on_prediction_errors=True,
         )
 
 
-
 async def test_wrong_action_after_action_unlikely_intent(
     monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
 ):

From 2caf15aeb068ccebaa58817187d082b2d9db3176 Mon Sep 17 00:00:00 2001
From: alwx <alwxndr@gmail.com>
Date: Mon, 12 Jul 2021 12:52:22 +0200
Subject: [PATCH 17/18] Code style & test updates

---
 rasa/core/test.py          |  13 ++---
 rasa/shared/core/events.py |   1 -
 tests/core/test_test.py    | 103 ++++++++++++++++++++-----------------
 3 files changed, 61 insertions(+), 56 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index b8620253f5ce..b11203bc3112 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -669,13 +669,10 @@ def _collect_action_executed_predictions(
         predicted_action, prediction, policy_entity_result = _run_action_prediction(
             processor, partial_tracker, expected_action
         )
-        action_executed_eval_store.add_to_store(
-            action_predictions=[predicted_action], action_targets=[expected_action]
-        )
-    else:
-        action_executed_eval_store.add_to_store(
-            action_predictions=[predicted_action], action_targets=[expected_action]
-        )
+
+    action_executed_eval_store.add_to_store(
+        action_predictions=[predicted_action], action_targets=[expected_action]
+    )
 
     if action_executed_eval_store.has_prediction_target_mismatch():
         partial_tracker.update(
@@ -971,7 +968,7 @@ def _filter_step_events(step: StoryStep) -> StoryStep:
     events = []
     for event in step.events:
         if (
-            type(event) == WronglyPredictedAction
+            isinstance(event, WronglyPredictedAction)
             and event.action_name
             == event.action_name_prediction
             == ACTION_UNLIKELY_INTENT_NAME
diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
index 8f845856535f..a4b5bffe5097 100644
--- a/rasa/shared/core/events.py
+++ b/rasa/shared/core/events.py
@@ -36,7 +36,6 @@
     ENTITY_LABEL_SEPARATOR,
     ACTION_SESSION_START_NAME,
     ACTION_LISTEN_NAME,
-    ACTION_UNLIKELY_INTENT_NAME,
 )
 from rasa.shared.exceptions import UnsupportedFeatureException
 from rasa.shared.nlu.constants import (
diff --git a/tests/core/test_test.py b/tests/core/test_test.py
index 647871ec94fa..b0b125f32c1e 100644
--- a/tests/core/test_test.py
+++ b/tests/core/test_test.py
@@ -209,7 +209,10 @@ async def test_action_unlikely_intent_warning(
     )
 
     result = await rasa.core.test.test(
-        str(file_name), agent, out_directory=str(tmp_path),
+        str(file_name),
+        agent,
+        out_directory=str(tmp_path),
+        fail_on_prediction_errors=True,
     )
     assert "report" in result.keys()
     assert result["report"]["conversation_accuracy"]["correct"] == 1
@@ -266,49 +269,6 @@ async def test_action_unlikely_intent_correctly_predicted(
     assert result["report"]["conversation_accuracy"]["with_warnings"] == 0
 
 
-async def test_action_unlikely_intent_fail_on_prediction_errors(
-    monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
-):
-    monkeypatch.setattr(
-        SimplePolicyEnsemble,
-        "probabilities_using_best_policy",
-        _probabilities_with_action_unlikely_intent_for(["mood_unhappy"]),
-    )
-
-    file_name = tmp_path / "test_action_unlikely_intent_2.yml"
-    file_name.write_text(
-        """
-        version: "2.0"
-        stories:
-          - story: unlikely path (with action_unlikely_intent)
-            steps:
-              - user: |
-                  very terrible
-                intent: mood_unhappy
-              - action: utter_cheer_up
-              - action: action_unlikely_intent
-              - action: utter_did_that_help
-              - intent: affirm
-              - action: utter_happy
-        """
-    )
-
-    # We train on the above story so that RulePolicy can memorize
-    # it and we don't have to worry about other actions being
-    # predicted correctly.
-    agent = await _train_rule_based_agent(
-        moodbot_domain, file_name, monkeypatch, ignore_action_unlikely_intent=False
-    )
-
-    with pytest.raises(rasa.core.test.WrongPredictionException):
-        result = await rasa.core.test.test(
-            str(file_name),
-            agent,
-            out_directory=str(tmp_path),
-            fail_on_prediction_errors=True,
-        )
-
-
 async def test_wrong_action_after_action_unlikely_intent(
     monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
 ):
@@ -362,7 +322,10 @@ async def test_wrong_action_after_action_unlikely_intent(
     )
 
     result = await rasa.core.test.test(
-        str(test_file_name), agent, out_directory=str(tmp_path),
+        str(test_file_name),
+        agent,
+        out_directory=str(tmp_path),
+        fail_on_prediction_errors=False,
     )
     assert "report" in result.keys()
     assert result["report"]["conversation_accuracy"]["correct"] == 0
@@ -432,7 +395,7 @@ async def test_action_unlikely_intent_not_found(
     )
 
     result = await rasa.core.test.test(
-        str(test_file_name), agent, out_directory=str(tmp_path),
+        str(test_file_name), agent, out_directory=str(tmp_path)
     )
     assert "report" in result.keys()
     assert result["report"]["conversation_accuracy"]["correct"] == 0
@@ -512,6 +475,49 @@ async def test_action_unlikely_intent_warning_and_story_error(
         assert "# predicted: utter_goodbye" in content
 
 
+async def test_fail_on_prediction_errors(
+    monkeypatch: MonkeyPatch, tmp_path: Path, moodbot_domain: Domain
+):
+    monkeypatch.setattr(
+        SimplePolicyEnsemble,
+        "probabilities_using_best_policy",
+        _probabilities_with_action_unlikely_intent_for(["mood_unhappy"]),
+    )
+
+    file_name = tmp_path / "test_action_unlikely_intent_2.yml"
+    file_name.write_text(
+        """
+        version: "2.0"
+        stories:
+          - story: unlikely path (with action_unlikely_intent)
+            steps:
+              - user: |
+                  very terrible
+                intent: mood_unhappy
+              - action: utter_cheer_up
+              - action: action_unlikely_intent
+              - action: utter_did_that_help
+              - intent: affirm
+              - action: utter_happy
+        """
+    )
+
+    # We train on the above story so that RulePolicy can memorize
+    # it and we don't have to worry about other actions being
+    # predicted correctly.
+    agent = await _train_rule_based_agent(
+        moodbot_domain, file_name, monkeypatch, ignore_action_unlikely_intent=False
+    )
+
+    with pytest.raises(rasa.core.test.WrongPredictionException):
+        await rasa.core.test.test(
+            str(file_name),
+            agent,
+            out_directory=str(tmp_path),
+            fail_on_prediction_errors=True,
+        )
+
+
 @pytest.mark.parametrize(
     "metadata_for_intents, story_order",
     [
@@ -605,7 +611,10 @@ async def test_multiple_warnings_sorted_on_severity(
     )
 
     await rasa.core.test.test(
-        test_story_path, agent, out_directory=str(tmp_path),
+        test_story_path,
+        agent,
+        out_directory=str(tmp_path),
+        fail_on_prediction_errors=True,
     )
 
     warnings_file = tmp_path / STORIES_WITH_WARNINGS_FILE

From 6924b2063de81950837b4bfa39de6e229f56fc9e Mon Sep 17 00:00:00 2001
From: Daksh <d.varshneya@rasa.com>
Date: Mon, 12 Jul 2021 13:55:40 +0200
Subject: [PATCH 18/18] fix tests and code formatting

---
 rasa/core/test.py                         | 7 ++++++-
 tests/shared/utils/schemas/test_events.py | 9 +++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index b8620253f5ce..7d1fd0dadf3b 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -98,6 +98,10 @@ def __init__(
         timestamp: Optional[float] = None,
         metadata: Optional[Dict] = None,
     ):
+        """Creates event `action_unlikely_intent` predicted as warning.
+
+        See the docstring of the parent class for more information.
+        """
         self.action_name_prediction = action_name_prediction
         super().__init__(action_name, policy, confidence, timestamp, metadata)
 
@@ -110,7 +114,8 @@ class WronglyPredictedAction(ActionExecuted):
     """The model predicted the wrong action.
 
     Mostly used to mark wrong predictions and be able to
-    dump them as stories."""
+    dump them as stories.
+    """
 
     type_name = "wrong_action"
 
diff --git a/tests/shared/utils/schemas/test_events.py b/tests/shared/utils/schemas/test_events.py
index 7db5abcddde6..186232c784c3 100644
--- a/tests/shared/utils/schemas/test_events.py
+++ b/tests/shared/utils/schemas/test_events.py
@@ -22,8 +22,9 @@ def test_remote_action_validate_all_event_subclasses(event_class: Type[Event]):
         response = {"events": [{"event": event_class.type_name}], "responses": []}
 
     # ignore the below events since these are not sent or received outside Rasa
-    if (
-        event_class.type_name != "wrong_utterance"
-        and event_class.type_name != "wrong_action"
-    ):
+    if event_class.type_name not in [
+        "wrong_utterance",
+        "wrong_action",
+        "warning_predicted",
+    ]:
         jsonschema.validate(response, RemoteAction.action_response_format_spec())