Include story source filename in core output for failed stories

Include story source filename in the story name in the failed stories output to help find the failed story more easily (see RasaHQ#3419). Passed the source filename starting from the `StoryFileReader` to a `StoryStep`. Besides the story block names the source filename is included in the tracker events which are used for outputting the failed stories. Because the story files are copied to a temporary folder it is not possible to include the original full story path.Instead only the file name is included. If a recursive folder structure is used with the same story file names it can still be hard to find the problem file.
cheemingli · Mar 25, 2020 · 8537252 · 8537252
1 parent 99cff55
commit 8537252
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 8 deletions.
diff --git a/rasa/core/training/dsl.py b/rasa/core/training/dsl.py
@@ -6,6 +6,7 @@
 from typing import Optional, List, Text, Any, Dict, TYPE_CHECKING, Iterable
 
 import rasa.utils.io as io_utils
+from rasa import data
 from rasa.constants import DOCS_BASE_URL, DOCS_URL_STORIES, DOCS_URL_DOMAINS
 from rasa.core import utils
 from rasa.core.constants import INTENT_MESSAGE_PREFIX
@@ -75,8 +76,9 @@ def _parse_item(self, line: Text) -> Optional["Message"]:
 
 
 class StoryStepBuilder:
-    def __init__(self, name):
+    def __init__(self, name, source_name):
         self.name = name
+        self.source_name = source_name
         self.story_steps = []
         self.current_steps = []
         self.start_checkpoints = []
@@ -160,7 +162,11 @@ def _next_story_steps(self):
         if not start_checkpoints:
             start_checkpoints = [Checkpoint(STORY_START)]
         current_turns = [
-            StoryStep(block_name=self.name, start_checkpoints=start_checkpoints)
+            StoryStep(
+                block_name=self.name,
+                start_checkpoints=start_checkpoints,
+                source_name=self.source_name,
+            )
         ]
         return current_turns
 
@@ -174,13 +180,15 @@ def __init__(
         domain: Optional[Domain] = None,
         template_vars: Optional[Dict] = None,
         use_e2e: bool = False,
+        source_name=None,
     ):
         self.story_steps = []
         self.current_step_builder: Optional[StoryStepBuilder] = None
         self.domain = domain
         self.interpreter = interpreter
         self.template_variables = template_vars if template_vars else {}
         self.use_e2e = use_e2e
+        self.source_name = source_name
 
     @staticmethod
     async def read_from_folder(
@@ -250,7 +258,10 @@ async def read_from_file(
         try:
             with open(filename, "r", encoding=io_utils.DEFAULT_ENCODING) as f:
                 lines = f.readlines()
-            reader = StoryFileReader(interpreter, domain, template_variables, use_e2e)
+            source_name = data.get_source_file_name(filename)
+            reader = StoryFileReader(
+                interpreter, domain, template_variables, use_e2e, source_name
+            )
             return await reader.process_lines(lines)
         except ValueError as err:
             file_info = "Invalid story file format. Failed to parse '{}'".format(
@@ -327,7 +338,7 @@ async def process_lines(self, lines: List[Text]) -> List[StoryStep]:
                 elif line.startswith("#"):
                     # reached a new story block
                     name = line[1:].strip("# ")
-                    self.new_story_part(name)
+                    self.new_story_part(name, self.source_name)
                 elif line.startswith(">"):
                     # reached a checkpoint
                     name, conditions = self._parse_event_line(line[1:].strip())
@@ -389,9 +400,9 @@ def _add_current_stories_to_result(self):
             self.current_step_builder.flush()
             self.story_steps.extend(self.current_step_builder.story_steps)
 
-    def new_story_part(self, name):
+    def new_story_part(self, name, source_name):
         self._add_current_stories_to_result()
-        self.current_step_builder = StoryStepBuilder(name)
+        self.current_step_builder = StoryStepBuilder(name, source_name)
 
     def add_checkpoint(self, name: Text, conditions: Optional[Dict[Text, Any]]) -> None:
 

diff --git a/rasa/core/training/generator.py b/rasa/core/training/generator.py
@@ -521,11 +521,17 @@ def _process_step(
                 # contribute to the trackers events
                 if tracker.sender_id:
                     if step.block_name not in tracker.sender_id.split(" > "):
-                        new_sender = tracker.sender_id + " > " + step.block_name
+                        new_sender = (
+                            tracker.sender_id
+                            + " > "
+                            + step.block_name
+                            + " > "
+                            + step.source_name
+                        )
                     else:
                         new_sender = tracker.sender_id
                 else:
-                    new_sender = step.block_name
+                    new_sender = step.block_name + " > " + step.source_name
                 trackers.append(tracker.copy(new_sender))
 
         end_trackers = []

diff --git a/rasa/core/training/structures.py b/rasa/core/training/structures.py
@@ -113,12 +113,14 @@ def __init__(
         start_checkpoints: Optional[List[Checkpoint]] = None,
         end_checkpoints: Optional[List[Checkpoint]] = None,
         events: Optional[List[Event]] = None,
+        source_name: Optional[Text] = None,
     ) -> None:
 
         self.end_checkpoints = end_checkpoints if end_checkpoints else []
         self.start_checkpoints = start_checkpoints if start_checkpoints else []
         self.events = events if events else []
         self.block_name = block_name
+        self.source_name = source_name
         # put a counter prefix to uuid to get reproducible sorting results
         global STEP_COUNT
         self.id = "{}_{}".format(STEP_COUNT, uuid.uuid4().hex)
@@ -132,6 +134,7 @@ def create_copy(self, use_new_id: bool) -> "StoryStep":
             self.start_checkpoints,
             self.end_checkpoints,
             self.events[:],
+            self.source_name,
         )
         if not use_new_id:
             copied.id = self.id

diff --git a/rasa/data.py b/rasa/data.py
@@ -212,3 +212,10 @@ def _copy_files_to_new_dir(files: Iterable[Text]) -> Text:
         shutil.copy2(f, os.path.join(directory, unique_file_name))
 
     return directory
+
+
+def get_source_file_name(filename: Text):
+    src_filename = os.path.basename(filename)
+    # Removing unique prefix
+    filename_parts = src_filename.split("_", 1)
+    return filename_parts[1] if len(filename_parts) > 1 else filename_parts[0]
diff --git a/tests/core/test_data.py b/tests/core/test_data.py
@@ -184,3 +184,10 @@ def test_is_not_nlu_file_with_json():
     io.write_text_file('{"test": "a"}', file)
 
     assert not data.is_nlu_file(file)
+
+
+def test_get_source_file_name():
+    assert data.get_source_file_name("") == ""
+    assert data.get_source_file_name("/tmp/stories.md") == "stories.md"
+    assert data.get_source_file_name("/tmp/123_stories.md") == "stories.md"
+    assert data.get_source_file_name("/tmp/123_my_stories.md") == "my_stories.md"