From c89ee0378fa341c8a1885419ea52601a78a1d781 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Mon, 17 Aug 2020 12:18:16 +0200
Subject: [PATCH 01/34] Create conversation_tests.yml

---
 .../tests/conversation_tests.yml              | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 rasa/cli/initial_project/tests/conversation_tests.yml

diff --git a/rasa/cli/initial_project/tests/conversation_tests.yml b/rasa/cli/initial_project/tests/conversation_tests.yml
new file mode 100644
index 000000000000..6aafaa98e701
--- /dev/null
+++ b/rasa/cli/initial_project/tests/conversation_tests.yml
@@ -0,0 +1,91 @@
+#### This file contains tests to evaluate that your bot behaves as expected.
+#### If you want to learn more, please see the docs: https://rasa.com/docs/rasa/user-guide/testing-your-assistant/
+
+e2e_tests:
+- story: happy path 1
+  steps:
+  - user: |
+      hello there!
+    intent: greet
+  - action: utter_greet
+  - user: |
+      amazing
+    intent: mood_great
+  - action: utter_happy
+
+- story: happy path 2
+  steps:
+  - user: |
+      hello there!
+    intent: greet
+  - action: utter_greet
+  - user: |
+      amazing
+    intent: mood_great
+  - action: utter_happy
+  - user: |
+      bye-bye!
+    intent: goodbye
+  - action: utter_goodbye
+
+- story: sad path 1
+  steps:
+  - user: |
+      hello
+    intent: greet
+  - action: utter_greet
+  - user: |
+      not good
+    intent: mood_unhappy
+  - action: utter_cheer_up
+  - action: utter_did_that_help
+  - user: |
+      yes
+    intent: affirm
+  - action: utter_happy
+
+- story: sad path 2
+  steps:
+  - user: |
+      hello
+    intent: greet
+  - action: utter_greet
+  - user: |
+      not good
+    intent: mood_unhappy
+  - action: utter_cheer_up
+  - action: utter_did_that_help
+  - user: |
+      not really
+    intent: deny
+  - action: utter_goodbye
+
+- story: sad path 3
+  steps:
+  - user: |
+      hi
+    intent: greet
+  - action: utter_greet
+  - user: |
+      very terrible
+    intent: mood_unhappy
+  - action: utter_cheer_up
+  - action: utter_did_that_help
+  - user: |
+      no
+    intent: deny
+  - action: utter_goodbye
+
+- story: say goodbye
+  steps:
+  - user: |
+      bye-bye!
+    intent: goodbye
+  - action: utter_goodbye
+
+- story: bot challenge
+  steps:
+  - user: |
+      are you a bot?
+    intent: bot_challenge
+  - action: utter_iamabot

From 2c1d6e1257489e66fc7444b10586365287700593 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 19 Aug 2020 14:06:01 +0200
Subject: [PATCH 02/34] implement conversation tests using the new yml format

---
 data/test_dialogues/default.json              |   2 +
 data/test_trackers/tracker_moodbot.json       |   6 +
 docs/docs/business-logic.mdx                  |   4 +-
 docs/docs/chitchat-faqs.mdx                   |  10 +-
 docs/docs/command-line-interface.mdx          |  10 +-
 docs/docs/fallback-handoff.mdx                |   4 +-
 docs/docs/jupyter-notebooks.mdx               |   5 +-
 docs/docs/setting-up-ci-cd.mdx                |   4 +-
 docs/docs/testing-your-assistant.mdx          |  12 +-
 docs/docs/unexpected-input.mdx                |   4 +-
 examples/formbot/README.md                    |   4 +-
 examples/formbot/tests/end-to-end-stories.md  |  37 -----
 examples/formbot/tests/test_conversations.yml |  63 +++++++++
 examples/knowledgebasebot/README.md           |   4 +-
 examples/moodbot/README.md                    |   4 +-
 rasa/cli/data.py                              |   3 +-
 .../tests/conversation_tests.md               |  51 -------
 ...ation_tests.yml => test_conversations.yml} |   2 +-
 rasa/cli/test.py                              |   2 +-
 rasa/constants.py                             |   1 +
 rasa/core/agent.py                            |   2 +-
 rasa/core/domain.py                           |   4 +-
 rasa/core/events/__init__.py                  |  36 ++---
 rasa/core/interpreter.py                      |  10 +-
 rasa/core/processor.py                        |  32 +++--
 rasa/core/schemas/stories.yml                 |  49 ++++++-
 rasa/core/test.py                             | 107 ++++++++-------
 rasa/core/trackers.py                         |  18 ++-
 rasa/core/training/__init__.py                |  11 --
 rasa/core/training/dsl.py                     |  58 --------
 rasa/core/training/interactive.py             |  23 ++--
 rasa/core/training/loading.py                 |  35 ++---
 .../story_reader/markdown_story_reader.py     |  94 ++++++++++---
 .../training/story_reader/story_reader.py     |   4 -
 .../story_reader/yaml_story_reader.py         |  95 +++++++++----
 .../story_writer/yaml_story_writer.py         |  74 ++++++++--
 rasa/core/training/structures.py              |   2 +-
 rasa/data.py                                  | 103 +++++++-------
 rasa/importers/importer.py                    |  12 +-
 rasa/importers/multi_project.py               |  10 +-
 rasa/importers/rasa.py                        |   7 +-
 rasa/importers/utils.py                       |   4 +-
 rasa/nlu/test.py                              |   2 +-
 rasa/nlu/training_data/entities_parser.py     |  11 ++
 rasa/nlu/training_data/formats/markdown.py    |  26 +---
 rasa/nlu/training_data/formats/rasa_yaml.py   |   4 +-
 rasa/nlu/training_data/synonyms_parser.py     |   6 +-
 rasa/nlu/training_data/training_data.py       |  18 +--
 rasa/utils/io.py                              |   2 +
 rasa/utils/plotting.py                        |  44 ++++--
 tests/core/test_data.py                       |  10 +-
 tests/core/test_dsl.py                        | 127 ------------------
 tests/core/test_evaluation.py                 |   8 +-
 tests/core/test_processor.py                  |  10 +-
 tests/core/test_training.py                   |   4 +-
 tests/core/test_visualization.py              |   4 +-
 .../test_markdown_story_reader.py             | 121 ++++++++++++++++-
 .../story_reader/test_yaml_story_reader.py    |  14 +-
 .../story_writer/test_yaml_story_writer.py    |  17 +--
 tests/importers/test_multi_project.py         |  56 ++++----
 tests/test_data.py                            |  20 ++-
 tests/test_server.py                          |   1 +
 62 files changed, 825 insertions(+), 702 deletions(-)
 delete mode 100644 examples/formbot/tests/end-to-end-stories.md
 create mode 100644 examples/formbot/tests/test_conversations.yml
 delete mode 100644 rasa/cli/initial_project/tests/conversation_tests.md
 rename rasa/cli/initial_project/tests/{conversation_tests.yml => test_conversations.yml} (98%)
 delete mode 100644 rasa/core/training/dsl.py

diff --git a/data/test_dialogues/default.json b/data/test_dialogues/default.json
index abadf0e407ad..1a8ce03e8000 100644
--- a/data/test_dialogues/default.json
+++ b/data/test_dialogues/default.json
@@ -39,6 +39,8 @@
           "confidence": 0.0,
           "name": "greet"
         },
+        "message_id": null,
+        "metadata": {},
         "text": "Hi my name is Peter"
       },
       "text": "Hi my name is Peter",
diff --git a/data/test_trackers/tracker_moodbot.json b/data/test_trackers/tracker_moodbot.json
index 62f258312296..3df54cdcac43 100644
--- a/data/test_trackers/tracker_moodbot.json
+++ b/data/test_trackers/tracker_moodbot.json
@@ -5,6 +5,8 @@
       "confidence": 0.60,
       "name": "mood_great"
     },
+    "message_id": null,
+    "metadata": {},
     "text": "/mood_great",
     "intent_ranking": [
       {
@@ -46,6 +48,8 @@
           "confidence": 0.54,
           "name": "greet"
         },
+        "message_id": null,
+        "metadata": {},
         "text": "/greet",
         "intent_ranking": [
           {
@@ -89,6 +93,8 @@
           "confidence": 0.60,
           "name": "mood_great"
         },
+        "message_id": null,
+        "metadata": {},
         "text": "/mood_great",
         "intent_ranking": [
           {
diff --git a/docs/docs/business-logic.mdx b/docs/docs/business-logic.mdx
index 47ef53c72aef..7758e89842ef 100644
--- a/docs/docs/business-logic.mdx
+++ b/docs/docs/business-logic.mdx
@@ -292,7 +292,7 @@ Here's a minimal checklist of files we modified to handle business logic using a
 
 * `actions.py`: Define the form action, including the `required_slots`, `slot_mappings` and `submit` methods
 
-* `data/nlu.md`:
+* `data/nlu.yml`:
 
     * Add examples for an intent to activate the form
 
@@ -308,7 +308,7 @@ Here's a minimal checklist of files we modified to handle business logic using a
 
     * Add all intents and entities from your NLU training data
 
-* `data/stories.md`: Add a story for the form
+* `data/stories.yml`: Add a story for the form
 
 * `config.yml`:
 
diff --git a/docs/docs/chitchat-faqs.mdx b/docs/docs/chitchat-faqs.mdx
index 4fda0474cba8..bca96c02e717 100644
--- a/docs/docs/chitchat-faqs.mdx
+++ b/docs/docs/chitchat-faqs.mdx
@@ -161,10 +161,10 @@ For example:
 While it's good to test the bot interactively, we should also add end to end test cases that
 can later be included as part of a [CI/CD system](./setting-up-ci-cd). End-to-end [test conversations](./testing-your-assistant#end-to-end-testing)
 include NLU data, so that both components of Rasa can be tested.
-The file `tests/conversation_tests.md` contains example test conversations. Delete all the test conversations and replace
+The file `tests/test_conversations.yml` contains example test conversations. Delete all the test conversations and replace
 them with some test conversations for your assistant so far:
 
-```yaml title="tests/conversation_tests.yml"
+```yaml title="tests/test_conversations.yml"
 e2e_tests:
 - story: greet and goodybe
   steps:
@@ -207,7 +207,7 @@ e2e_tests:
 To test our model against the test file, run the command:
 
 ```bash
-rasa test --stories tests/conversation_tests.yml
+rasa test --stories tests/test_conversations.yml
 ```
 
 The test command will produce a directory named `results`. It should contain a file
@@ -342,7 +342,7 @@ rasa shell
 
 At this stage it makes sense to add a few test cases for our conversations:
 
-```yaml title="tests/conversation_tests.yml"
+```yaml title="tests/test_conversations.yml"
 e2e_tests:
 - story: ask channels
   steps:
@@ -384,6 +384,6 @@ Here's a minimal checklist of files we modified to build a basic FAQ assistant:
 
 * `data/stories.yml`: Add a simple story for FAQs
 
-* `tests/conversation_tests.yml`: Add E2E test stories for your FAQs
+* `tests/test_conversations.yml`: Add E2E test stories for your FAQs
 
 :::
diff --git a/docs/docs/command-line-interface.mdx b/docs/docs/command-line-interface.mdx
index afe619ba1fb2..2c4afe9158a7 100644
--- a/docs/docs/command-line-interface.mdx
+++ b/docs/docs/command-line-interface.mdx
@@ -42,14 +42,14 @@ This creates the following files:
 ├── config.yml
 ├── credentials.yml
 ├── data
-│   ├── nlu.md
-│   └── stories.md
+│   ├── nlu.yml
+│   └── stories.yml
 ├── domain.yml
 ├── endpoints.yml
 ├── models
 │   └── <timestamp>.tar.gz
 └── tests
-   └── conversation_tests.md
+   └── test_conversations.yml
 ```
 
 The `rasa init` command will ask you if you want to train an initial model using this data.
@@ -239,8 +239,8 @@ If you have NLG data for retrieval actions, this will be saved to seperate files
 ```bash
 ls train_test_split
 
-      nlg_test_data.md     test_data.json
-      nlg_training_data.md training_data.json
+      nlg_test_data.yml     test_data.json
+      nlg_training_data.yml training_data.json
 ```
 
 ## Convert Data Between Markdown and JSON
diff --git a/docs/docs/fallback-handoff.mdx b/docs/docs/fallback-handoff.mdx
index bbc03e94495f..3bfa2509e647 100644
--- a/docs/docs/fallback-handoff.mdx
+++ b/docs/docs/fallback-handoff.mdx
@@ -285,11 +285,11 @@ let's say the user asks “I want to apply for a job at Rasa”, we can then rep
 :::note
 Here's a minimal checklist of files we modified to help our assistant fail gracefully:
 
-* `data/nlu.md`:
+* `data/nlu.yml`:
 
     * Add training data for the `out_of_scope` intent & any specific out of scope intents that you want to handle seperately
 
-* `data/stories.md`:
+* `data/stories.yml`:
 
     * Add stories for any specific out of scope intents
 
diff --git a/docs/docs/jupyter-notebooks.mdx b/docs/docs/jupyter-notebooks.mdx
index 5b0b86e68861..3d421f004c5f 100644
--- a/docs/docs/jupyter-notebooks.mdx
+++ b/docs/docs/jupyter-notebooks.mdx
@@ -82,7 +82,8 @@ The return values are the paths to these newly created directories.
 
 ```bash
 import rasa.data as data
-stories_directory, nlu_data_directory = data.get_core_nlu_directories(training_files)
+nlu_data_directory = data.get_data_directories(training_files, data.is_nlu_file)
+stories_directory = data.get_data_directories(training_files, data.is_story_file)
 print(stories_directory, nlu_data_directory)
 ```
 To test your model, call the `test` function, passing in the path
@@ -108,5 +109,5 @@ else:
 if os.path.isdir("results"):
       print("\n")
       print("Core Errors:")
-      print(open("results/failed_stories.md").read())
+      print(open("results/failed_conversations.yml").read())
 ```
diff --git a/docs/docs/setting-up-ci-cd.mdx b/docs/docs/setting-up-ci-cd.mdx
index 960fbec395ce..696d82fc68ba 100644
--- a/docs/docs/setting-up-ci-cd.mdx
+++ b/docs/docs/setting-up-ci-cd.mdx
@@ -93,7 +93,7 @@ important as you start introducing more complicated stories from user
 conversations.
 
 ```bash
-rasa test --stories tests/conversation_tests.md --fail-on-prediction-errors
+rasa test --stories tests/test_conversations.yml --fail-on-prediction-errors
 ```
 
 The `--fail-on-prediction-errors` flag ensures the test will fail if any test
@@ -106,7 +106,7 @@ to be representative of the true distribution of real conversations.
 Rasa X makes it easy to [add test conversations based on real conversations](https://rasa.com/docs/rasa-x/user-guide/test-assistant/#how-to-create-tests).
 
 Note: End-to-end testing does **not** execute your action code. You will need to
-[test your action code](./setting-up-ci-cd.mdx#testing-action-code) in a seperate step.
+[test your action code](./setting-up-ci-cd.mdx#testing-action-code) in a separate step.
 
 ### Compare NLU Performance
 
diff --git a/docs/docs/testing-your-assistant.mdx b/docs/docs/testing-your-assistant.mdx
index d430d93b66ba..cb42bcdb557f 100644
--- a/docs/docs/testing-your-assistant.mdx
+++ b/docs/docs/testing-your-assistant.mdx
@@ -91,7 +91,7 @@ Here are some examples:
   </TabItem>
 </Tabs>
 
-By default Rasa Open Source saves conversation tests to `tests/conversation_tests.md`.
+By default Rasa Open Source saves conversation tests to `tests/test_conversations.yml`.
 You can test your assistant against them by running:
 
 ```bash
@@ -132,7 +132,7 @@ rasa data split nlu
 If you've done this, you can see how well your NLU model predicts the test cases using this command:
 
 ```bash
-rasa test nlu -u train_test_split/test_data.md --model models/nlu-20180323-145833.tar.gz
+rasa test nlu -u train_test_split/test_data.yml --model models/nlu-20180323-145833.tar.gz
 ```
 
 If you don't want to create a separate test set, you can
@@ -140,7 +140,7 @@ still estimate how well your model generalises using cross-validation.
 To do this, add the flag `--cross-validation`:
 
 ```bash
-rasa test nlu -u data/nlu.md --config config.yml --cross-validation
+rasa test nlu -u data/nlu.ml --config config.yml --cross-validation
 ```
 
 The full list of options for the script is:
@@ -155,7 +155,7 @@ a comparative examination between the pipelines.
 
 ```bash
 rasa test nlu --config pretrained_embeddings_spacy.yml supervised_embeddings.yml
-  --nlu data/nlu.md --runs 3 --percentages 0 25 50 70 90
+  --nlu data/nlu.yml --runs 3 --percentages 0 25 50 70 90
 ```
 
 The command in the example above will create a train/test split from your data,
@@ -245,10 +245,10 @@ You can evaluate your trained model on a set of test stories
 by using the evaluate script:
 
 ```bash
-rasa test core --stories test_stories.md --out results
+rasa test core --stories test_conversations.yml --out results
 ```
 
-This will print the failed stories to `results/failed_stories.md`.
+This will print the failed stories to `results/failed_conversations.yml`.
 We count any story as failed if at least one of the actions
 was predicted incorrectly.
 
diff --git a/docs/docs/unexpected-input.mdx b/docs/docs/unexpected-input.mdx
index 806fdd3591ac..f4008ba886e5 100644
--- a/docs/docs/unexpected-input.mdx
+++ b/docs/docs/unexpected-input.mdx
@@ -220,7 +220,7 @@ Here's a minimal checklist of  of files we modified to handle unexpected user in
 
 * `actions.py`: Define `action_greet`
 
-* `data/nlu.md`: Add training data for an `explain` intent
+* `data/nlu.yml`: Add training data for an `explain` intent
 
 * `domain.yml`:
 
@@ -232,7 +232,7 @@ Here's a minimal checklist of  of files we modified to handle unexpected user in
 
     * Add responses for contextual question interruptions
 
-* `data/stories.md`:
+* `data/stories.yml`:
 
     * Remove stories using mapped intents if you have them
 
diff --git a/examples/formbot/README.md b/examples/formbot/README.md
index c95a7e940059..b0fc8e41bce2 100644
--- a/examples/formbot/README.md
+++ b/examples/formbot/README.md
@@ -10,8 +10,8 @@ restaurants based on user preferences.
 This example contains some training data and the main files needed to build an
 assistant on your local machine. The `formbot` consists of the following files:
 
-- **data/nlu.md** contains training examples for the NLU model  
-- **data/stories.md** contains training stories for the Core model
+- **data/nlu.yml** contains training examples for the NLU model  
+- **data/stories.yml** contains training stories for the Core model
 - **actions.py** contains the implementation of a custom `FormAction`
 - **config.yml** contains the model configuration
 - **domain.yml** contains the domain of the assistant  
diff --git a/examples/formbot/tests/end-to-end-stories.md b/examples/formbot/tests/end-to-end-stories.md
deleted file mode 100644
index 1b9c4700514d..000000000000
--- a/examples/formbot/tests/end-to-end-stories.md
+++ /dev/null
@@ -1,37 +0,0 @@
-## Happy path
-* greet: hi
-    - utter_greet
-* request_restaurant: im looking for a restaurant
-    - restaurant_form
-    - form{"name": "restaurant_form"}
-    - form{"name": null}
-    - utter_slots_values
-* thankyou: thanks
-    - utter_noworries
-
-## Happy path with message providing requested value
-* greet: hi
-    - utter_greet
-* request_restaurant: im looking for a restaurant
-    - restaurant_form
-    - form{"name": "restaurant_form"}
-* inform: [afghan](cuisine) food
-    - restaurant_form
-    - form{"name": null}
-    - utter_slots_values
-* thankyou: thanks
-    - utter_noworries
- 
-## unhappy path
-* greet: hi
-    - utter_greet
-* request_restaurant: im looking for a restaurant
-    - restaurant_form
-    - form{"name": "restaurant_form"}
-* chitchat: can you share your boss with me?
-    - utter_chitchat
-    - restaurant_form
-    - form{"name": null}
-    - utter_slots_values
-* thankyou: thanks
-    - utter_noworries
\ No newline at end of file
diff --git a/examples/formbot/tests/test_conversations.yml b/examples/formbot/tests/test_conversations.yml
new file mode 100644
index 000000000000..39c6c43cb118
--- /dev/null
+++ b/examples/formbot/tests/test_conversations.yml
@@ -0,0 +1,63 @@
+test_conversations:
+- story: Happy path
+  steps:
+  - user: |
+      hi
+    intent: greet
+  - action: utter_greet
+  - user: |
+      im looking for a restaurant
+    intent: request_restaurant
+  - action: restaurant_form
+  - active_loop: restaurant_form
+  - active_loop: null
+  - action: utter_slots_values
+  - user: |
+      thanks
+    intent: thankyou
+  - action: utter_noworries
+
+- story: Happy path with message providing requested value
+  steps:
+  - user: |
+      hi
+    intent: greet
+  - action: utter_greet
+  - user: |
+      im looking for a restaurant
+    intent: request_restaurant
+  - action: restaurant_form
+  - active_loop: restaurant_form
+  - user: |
+      [afghan](cuisine) food
+    intent: inform
+  - action: restaurant_form
+  - active_loop: null
+  - action: utter_slots_values
+  - user: |
+      thanks
+    intent: thankyou
+  - action: utter_noworries
+
+- story: unhappy path
+  steps:
+  - user: |
+      hi
+    intent: greet
+  - action: utter_greet
+  - user: |
+      im looking for a restaurant
+    intent: request_restaurant
+  - action: restaurant_form
+  - active_loop: restaurant_form
+  - user: |
+      can you share your boss with me?
+    intent: chitchat
+  - action: utter_chitchat
+  - action: restaurant_form
+  - active_loop: null
+  - action: utter_slots_values
+  - user: |
+      thanks
+    intent: thankyou
+  - action: utter_noworries
diff --git a/examples/knowledgebasebot/README.md b/examples/knowledgebasebot/README.md
index 83dcfb06fb82..2a0e271718b1 100644
--- a/examples/knowledgebasebot/README.md
+++ b/examples/knowledgebasebot/README.md
@@ -7,8 +7,8 @@ This example bot uses a knowledge base to answer user's requests.
 This example contains some training data and the main files needed to build an 
 assistant on your local machine. The `knowledgebasebot` consists of the following files:
 
-- **data/nlu.md** contains training examples for the NLU model  
-- **data/stories.md** contains training stories for the Core model  
+- **data/nlu.yml** contains training examples for the NLU model  
+- **data/stories.yml** contains training stories for the Core model  
 - **actions.py** contains the custom action for querying the knowledge base
 - **config.yml** contains the model configuration
 - **domain.yml** contains the domain of the assistant  
diff --git a/examples/moodbot/README.md b/examples/moodbot/README.md
index 426358e66cd3..61578fb6369a 100644
--- a/examples/moodbot/README.md
+++ b/examples/moodbot/README.md
@@ -7,8 +7,8 @@ The `moodbot` example simulates how you can use your bot on different channels.
 This example contains some training data and the main files needed to build an
 assistant on your local machine. The `moodbot` consists of the following files:
 
-- **data/nlu.md** contains training examples for the NLU model  
-- **data/stories.md** contains training stories for the Core model  
+- **data/nlu.yml** contains training examples for the NLU model  
+- **data/stories.yml** contains training stories for the Core model  
 - **config.yml** contains the model configuration
 - **domain.yml** contains the domain of the assistant  
 - **credentials.yml** contains credentials for the different channels
diff --git a/rasa/cli/data.py b/rasa/cli/data.py
index 9153a25e37db..82b721ea4572 100644
--- a/rasa/cli/data.py
+++ b/rasa/cli/data.py
@@ -9,7 +9,6 @@
 from rasa.cli.arguments import data as arguments
 import rasa.cli.utils
 from rasa.constants import DEFAULT_DATA_PATH
-from rasa.core.interpreter import RegexInterpreter
 from rasa.core.training.story_reader.markdown_story_reader import MarkdownStoryReader
 from rasa.core.training.story_writer.yaml_story_writer import YAMLStoryWriter
 from rasa.nlu.convert import convert_training_data
@@ -281,7 +280,7 @@ def _write_nlu_yaml(
 def _write_core_yaml(
     training_data_path: Path, output_path: Path, source_path: Path
 ) -> None:
-    reader = MarkdownStoryReader(RegexInterpreter())
+    reader = MarkdownStoryReader()
     writer = YAMLStoryWriter()
 
     loop = asyncio.get_event_loop()
diff --git a/rasa/cli/initial_project/tests/conversation_tests.md b/rasa/cli/initial_project/tests/conversation_tests.md
deleted file mode 100644
index d7bcbfcbfe4e..000000000000
--- a/rasa/cli/initial_project/tests/conversation_tests.md
+++ /dev/null
@@ -1,51 +0,0 @@
-#### This file contains tests to evaluate that your bot behaves as expected.
-#### If you want to learn more, please see the docs: https://rasa.com/docs/rasa/user-guide/testing-your-assistant/
-
-## happy path 1
-* greet: hello there!
-  - utter_greet
-* mood_great: amazing
-  - utter_happy
-
-## happy path 2
-* greet: hello there!
-  - utter_greet
-* mood_great: amazing
-  - utter_happy
-* goodbye: bye-bye!
-  - utter_goodbye
-
-## sad path 1
-* greet: hello
-  - utter_greet
-* mood_unhappy: not good
-  - utter_cheer_up
-  - utter_did_that_help
-* affirm: yes
-  - utter_happy
-
-## sad path 2
-* greet: hello
-  - utter_greet
-* mood_unhappy: not good
-  - utter_cheer_up
-  - utter_did_that_help
-* deny: not really
-  - utter_goodbye
-
-## sad path 3
-* greet: hi
-  - utter_greet
-* mood_unhappy: very terrible
-  - utter_cheer_up
-  - utter_did_that_help
-* deny: no
-  - utter_goodbye
-
-## say goodbye
-* goodbye: bye-bye!
-  - utter_goodbye
-
-## bot challenge
-* bot_challenge: are you a bot?
-  - utter_iamabot
diff --git a/rasa/cli/initial_project/tests/conversation_tests.yml b/rasa/cli/initial_project/tests/test_conversations.yml
similarity index 98%
rename from rasa/cli/initial_project/tests/conversation_tests.yml
rename to rasa/cli/initial_project/tests/test_conversations.yml
index 6aafaa98e701..f08392fef8e4 100644
--- a/rasa/cli/initial_project/tests/conversation_tests.yml
+++ b/rasa/cli/initial_project/tests/test_conversations.yml
@@ -1,7 +1,7 @@
 #### This file contains tests to evaluate that your bot behaves as expected.
 #### If you want to learn more, please see the docs: https://rasa.com/docs/rasa/user-guide/testing-your-assistant/
 
-e2e_tests:
+test_conversations:
 - story: happy path 1
   steps:
   - user: |
diff --git a/rasa/cli/test.py b/rasa/cli/test.py
index 0666da94adf2..a9b108f52416 100644
--- a/rasa/cli/test.py
+++ b/rasa/cli/test.py
@@ -67,7 +67,7 @@ def run_core_test(args: argparse.Namespace) -> None:
         args.endpoints, "endpoints", DEFAULT_ENDPOINTS_PATH, True
     )
     stories = cli_utils.get_validated_path(args.stories, "stories", DEFAULT_DATA_PATH)
-    stories = data.get_core_directory(stories)
+    stories = data.get_test_directory(stories)
     output = args.out or DEFAULT_RESULTS_PATH
     args.errors = not args.no_errors
 
diff --git a/rasa/constants.py b/rasa/constants.py
index e3fc0dda8c48..f59d68aa698a 100644
--- a/rasa/constants.py
+++ b/rasa/constants.py
@@ -36,6 +36,7 @@
 DOCS_URL_DOMAINS = DOCS_BASE_URL + "/core/domains/"
 DOCS_URL_STORIES = DOCS_BASE_URL + "/core/stories/"
 DOCS_URL_RULES = DOCS_BASE_URL + "/core/rules/"
+DOCS_URL_TEST_CONVERSATIONS = DOCS_BASE_URL + "/testing-your-assistant"
 DOCS_URL_ACTIONS = DOCS_BASE_URL + "/core/actions/"
 DOCS_URL_CONNECTORS = DOCS_BASE_URL + "/user-guide/connectors/"
 DOCS_URL_EVENT_BROKERS = DOCS_BASE_URL + "/api/event-brokers/"
diff --git a/rasa/core/agent.py b/rasa/core/agent.py
index f1772db7e18b..f7efa5e4b465 100644
--- a/rasa/core/agent.py
+++ b/rasa/core/agent.py
@@ -487,7 +487,7 @@ async def parse_message_using_nlu_interpreter(
 
         processor = self.create_processor()
         message = UserMessage(message_data)
-        return await processor._parse_message(message, tracker)
+        return await processor.parse_message(message, tracker)
 
     async def handle_message(
         self,
diff --git a/rasa/core/domain.py b/rasa/core/domain.py
index e6a05572df9e..4cac47c3360f 100644
--- a/rasa/core/domain.py
+++ b/rasa/core/domain.py
@@ -1174,9 +1174,9 @@ def is_domain_file(filename: Text) -> bool:
         Returns:
             `True` if it's a domain file, otherwise `False`.
         """
-        from rasa.data import YAML_FILE_EXTENSIONS
+        from rasa.data import is_likely_yaml_file
 
-        if not Path(filename).suffix in YAML_FILE_EXTENSIONS:
+        if not is_likely_yaml_file(filename):
             return False
         try:
             content = rasa.utils.io.read_yaml_file(filename)
diff --git a/rasa/core/events/__init__.py b/rasa/core/events/__init__.py
index 9282f1121a1d..2beb14fadf36 100644
--- a/rasa/core/events/__init__.py
+++ b/rasa/core/events/__init__.py
@@ -56,18 +56,16 @@ def deserialise_entities(entities: Union[Text, List[Any]]) -> List[Dict[Text, An
     return [e for e in entities if isinstance(e, dict)]
 
 
-def md_format_message(text, intent, entities) -> Text:
-    from rasa.nlu.training_data.formats import MarkdownReader
+def md_format_message(
+    text: Text, intent: Optional[Text], entities: Union[Text, List[Any]]
+) -> Text:
     from rasa.nlu.training_data.formats.readerwriter import TrainingDataWriter
+    from rasa.nlu.training_data import entities_parser
 
-    message_from_md = MarkdownReader().parse_training_example(text)
+    message_from_md = entities_parser.parse_training_example(text, intent)
     deserialised_entities = deserialise_entities(entities)
     return TrainingDataWriter.generate_message(
-        {
-            "text": message_from_md.text,
-            "intent": intent,
-            "entities": deserialised_entities,
-        }
+        {"text": message_from_md.text, "entities": deserialised_entities,}
     )
 
 
@@ -227,16 +225,16 @@ def __init__(
 
         super().__init__(timestamp, metadata)
 
+        self.parse_data = {
+            "intent": self.intent,
+            "entities": self.entities,
+            "text": text,
+            "message_id": self.message_id,
+            "metadata": self.metadata,
+        }
+
         if parse_data:
-            self.parse_data = parse_data
-        else:
-            self.parse_data = {
-                "intent": self.intent,
-                "entities": self.entities,
-                "text": text,
-                "message_id": self.message_id,
-                "metadata": self.metadata,
-            }
+            self.parse_data.update(**parse_data)
 
     @staticmethod
     def _from_parse_data(
@@ -333,7 +331,9 @@ def as_story_string(self, e2e: bool = False) -> Text:
                 intent=self.intent.get(INTENT_NAME_KEY, ""), entities=ent_string
             )
             if e2e:
-                message = md_format_message(self.text, self.intent, self.entities)
+                message = md_format_message(
+                    self.text, self.intent.get("name"), self.entities
+                )
                 return "{}: {}".format(self.intent.get(INTENT_NAME_KEY), message)
             else:
                 return parse_string
diff --git a/rasa/core/interpreter.py b/rasa/core/interpreter.py
index b0ac7395091b..925cfdfc9d61 100644
--- a/rasa/core/interpreter.py
+++ b/rasa/core/interpreter.py
@@ -23,7 +23,7 @@ async def parse(
         self,
         text: Text,
         message_id: Optional[Text] = None,
-        tracker: DialogueStateTracker = None,
+        tracker: Optional[DialogueStateTracker] = None,
     ) -> Dict[Text, Any]:
         raise NotImplementedError(
             "Interpreter needs to be able to parse messages into structured output."
@@ -149,7 +149,7 @@ async def parse(
         self,
         text: Text,
         message_id: Optional[Text] = None,
-        tracker: DialogueStateTracker = None,
+        tracker: Optional[DialogueStateTracker] = None,
     ) -> Dict[Text, Any]:
         """Parse a text message."""
 
@@ -159,7 +159,7 @@ def synchronous_parse(
         self,
         text: Text,
         message_id: Optional[Text] = None,
-        tracker: DialogueStateTracker = None,
+        tracker: Optional[DialogueStateTracker] = None,
     ) -> Dict[Text, Any]:
         """Parse a text message."""
 
@@ -189,7 +189,7 @@ async def parse(
         self,
         text: Text,
         message_id: Optional[Text] = None,
-        tracker: DialogueStateTracker = None,
+        tracker: Optional[DialogueStateTracker] = None,
     ) -> Dict[Text, Any]:
         """Parse a text message.
 
@@ -266,7 +266,7 @@ async def parse(
         self,
         text: Text,
         message_id: Optional[Text] = None,
-        tracker: DialogueStateTracker = None,
+        tracker: Optional[DialogueStateTracker] = None,
     ) -> Dict[Text, Any]:
         """Parse a text message.
 
diff --git a/rasa/core/processor.py b/rasa/core/processor.py
index 1926f2c88672..7d6e00c3c8e4 100644
--- a/rasa/core/processor.py
+++ b/rasa/core/processor.py
@@ -224,9 +224,6 @@ async def log_message(
         processing and saved at a later stage.
         """
 
-        # preprocess message if necessary
-        if self.message_preprocessor is not None:
-            message.text = self.message_preprocessor(message.text)
         # we have a Tracker instance for each user
         # which maintains conversation state
         tracker = await self.get_tracker_with_session_start(
@@ -441,18 +438,33 @@ def _check_for_unseen_features(self, parse_data: Dict[Text, Any]) -> None:
     def _get_action(self, action_name) -> Optional[Action]:
         return self.domain.action_for_name(action_name, self.action_endpoint)
 
-    async def _parse_message(self, message, tracker: DialogueStateTracker = None):
+    async def parse_message(
+        self, message: UserMessage, tracker: Optional[DialogueStateTracker] = None
+    ) -> Dict[Text, Any]:
+        """Interprete the passed message using the NLU interpreter.
+
+        Arguments:
+            message: Message to handle
+            tracker: Dialogue context of the message
+
+        Returns:
+            Parsed data extracted from the message.
+        """
+        # preprocess message if necessary
+        if self.message_preprocessor is not None:
+            text = self.message_preprocessor(message.text)
+        else:
+            text = message.text
+
         # for testing - you can short-cut the NLU part with a message
         # in the format /intent{"entity1": val1, "entity2": val2}
         # parse_data is a dict of intent & entities
-        if message.text.startswith(INTENT_MESSAGE_PREFIX):
+        if text.startswith(INTENT_MESSAGE_PREFIX):
             parse_data = await RegexInterpreter().parse(
-                message.text, message.message_id, tracker
+                text, message.message_id, tracker
             )
         else:
-            parse_data = await self.interpreter.parse(
-                message.text, message.message_id, tracker
-            )
+            parse_data = await self.interpreter.parse(text, message.message_id, tracker)
 
         logger.debug(
             "Received user message '{}' with intent '{}' "
@@ -472,7 +484,7 @@ async def _handle_message_with_tracker(
         if message.parse_data:
             parse_data = message.parse_data
         else:
-            parse_data = await self._parse_message(message, tracker)
+            parse_data = await self.parse_message(message, tracker)
 
         # don't ever directly mutate the tracker
         # - instead pass its events to log
diff --git a/rasa/core/schemas/stories.yml b/rasa/core/schemas/stories.yml
index 6fa74c67e973..a072437815ab 100644
--- a/rasa/core/schemas/stories.yml
+++ b/rasa/core/schemas/stories.yml
@@ -22,11 +22,11 @@ mapping:
           sequence:
           - type: "map"
             mapping: &intent_and_entities
-              intent:
+              intent: &intent
                 type: "str"
                 required: True
                 allowempty: False
-              entities:
+              entities: &entities
                 type: "seq"
                 matching: "any"
                 sequence:
@@ -89,6 +89,51 @@ mapping:
                 sequence:
                 - type: "map"
                   mapping: *intent_and_entities
+  test_conversations:
+    type: "seq"
+    matching: "any"
+    sequence:
+    - type: "map"
+      mapping:
+        story:
+          type: "str"
+          allowempty: False
+        metadata:
+          type: "any"
+          required: False
+        steps:
+          type: "seq"
+          matching: "any"
+          sequence:
+          - type: "map"
+            mapping:
+              user:
+                type: "str"
+                required: False
+                allowempty: False
+              intent: *intent
+              entities: *entities
+          - type: "map"
+            mapping: *active_loop
+          - type: "map"
+            mapping: *action
+          - type: "map"
+            mapping: *slot_was_set_seq
+          - type: "map"
+            matching-rule: 'any'
+            mapping:
+              checkpoint:
+                type: "str"
+                allowempty: False
+              slot_was_set: *slot_was_set_seq_value
+          - type: "map"
+            mapping:
+              or:
+                type: "seq"
+                matching: "any"
+                sequence:
+                - type: "map"
+                  mapping: *intent_and_entities
   rules:
     type: "seq"
     matching: "any"
diff --git a/rasa/core/test.py b/rasa/core/test.py
index abfd665144fe..821ca644e632 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -3,22 +3,25 @@
 import warnings
 import typing
 from collections import defaultdict, namedtuple
-from typing import Any, Dict, List, Optional, Text, Tuple, Union
+from typing import Any, Dict, List, Optional, Text, Tuple
 
+from rasa.core.channels import UserMessage
+from rasa.core.training.story_writer.yaml_story_writer import YAMLStoryWriter
 import rasa.utils.io as io_utils
 from rasa.core.domain import Domain
 from rasa.nlu.constants import (
+    ENTITIES,
     EXTRACTOR,
     ENTITY_ATTRIBUTE_VALUE,
     ENTITY_ATTRIBUTE_TEXT,
     ENTITY_ATTRIBUTE_START,
     ENTITY_ATTRIBUTE_END,
     ENTITY_ATTRIBUTE_TYPE,
+    INTENT,
 )
 from rasa.constants import RESULTS_FILE, PERCENTAGE_KEY
 from rasa.core.utils import pad_lists_to_size
 from rasa.core.events import ActionExecuted, UserUttered
-from rasa.nlu.training_data.formats.markdown import MarkdownWriter
 from rasa.core.trackers import DialogueStateTracker
 from rasa.nlu.training_data.formats.readerwriter import TrainingDataWriter
 from rasa.utils.io import DEFAULT_ENCODING
@@ -30,8 +33,8 @@
 
 CONFUSION_MATRIX_STORIES_FILE = "story_confusion_matrix.png"
 REPORT_STORIES_FILE = "story_report.json"
-FAILED_STORIES_FILE = "failed_stories.md"
-SUCCESSFUL_STORIES_FILE = "successful_stories.md"
+FAILED_STORIES_FILE = "failed_conversations.yml"
+SUCCESSFUL_STORIES_FILE = "successful_conversations.md"
 
 
 logger = logging.getLogger(__name__)
@@ -69,21 +72,21 @@ def __init__(
 
     def add_to_store(
         self,
-        action_predictions: Optional[Union[Text, List[Text]]] = None,
-        action_targets: Optional[Union[Text, List[Text]]] = None,
-        intent_predictions: Optional[Union[Text, List[Text]]] = None,
-        intent_targets: Optional[Union[Text, List[Text]]] = None,
+        action_predictions: Optional[List[Text]] = None,
+        action_targets: Optional[List[Text]] = None,
+        intent_predictions: Optional[List[Text]] = None,
+        intent_targets: Optional[List[Text]] = None,
         entity_predictions: Optional[List[Dict[Text, Any]]] = None,
         entity_targets: Optional[List[Dict[Text, Any]]] = None,
     ) -> None:
         """Add items or lists of items to the store"""
-        for k, v in locals().items():
-            if k != "self" and v:
-                attr = getattr(self, k)
-                if isinstance(v, list):
-                    attr.extend(v)
-                else:
-                    attr.append(v)
+
+        self.action_predictions.extend(action_predictions or [])
+        self.action_targets.extend(action_targets or [])
+        self.intent_targets.extend(intent_targets or [])
+        self.intent_predictions.extend(intent_predictions or [])
+        self.entity_predictions.extend(entity_predictions or [])
+        self.entity_targets.extend(entity_targets or [])
 
     def merge_store(self, other: "EvaluationStore") -> None:
         """Add the contents of other to self"""
@@ -141,15 +144,18 @@ def __init__(
         self.predicted_action = predicted_action
         super().__init__(correct_action, policy, confidence, timestamp=timestamp)
 
+    def comment(self) -> Text:
+        return f"predicted: {self.predicted_action}"
+
     def as_story_string(self) -> Text:
-        return f"{self.action_name}   <!-- predicted: {self.predicted_action} -->"
+        return f"{self.action_name}   <!-- {self.comment()} -->"
 
 
 class EndToEndUserUtterance(UserUttered):
     """End-to-end user utterance.
 
     Mostly used to print the full end-to-end user message in the
-    `failed_stories.md` output file."""
+    `failed_conversations.yml` output file."""
 
     def as_story_string(self, e2e: bool = True) -> Text:
         return super().as_story_string(e2e=True)
@@ -182,16 +188,22 @@ def __init__(self, event: UserUttered, eval_store: EvaluationStore) -> None:
             event.input_channel,
         )
 
-    def as_story_string(self, e2e: bool = True) -> Text:
+    def comment(self) -> Text:
         from rasa.core.events import md_format_message
 
-        correct_message = md_format_message(self.text, self.intent, self.entities)
         predicted_message = md_format_message(
             self.text, self.predicted_intent, self.predicted_entities
         )
+        return f"predicted: {self.predicted_intent}: {predicted_message} "
+
+    def as_story_string(self, e2e: bool = True) -> Text:
+        from rasa.core.events import md_format_message
+
+        correct_message = md_format_message(
+            self.text, self.intent.get("name"), self.entities
+        )
         return (
-            f"{self.intent.get('name')}: {correct_message}   <!-- predicted: "
-            f"{self.predicted_intent}: {predicted_message} -->"
+            f"{self.intent.get('name')}: {correct_message}   <!-- {self.comment()} -->"
         )
 
 
@@ -206,7 +218,7 @@ async def _generate_trackers(
     from rasa.core import training
 
     story_graph = await training.extract_story_graph(
-        resource_name, agent.domain, agent.interpreter, use_e2e
+        resource_name, agent.domain, use_e2e
     )
     g = TrainingDataGenerator(
         story_graph,
@@ -245,23 +257,21 @@ def _clean_entity_results(
 
 def _collect_user_uttered_predictions(
     event: UserUttered,
+    predicted: Dict[Text, Any],
     partial_tracker: DialogueStateTracker,
     fail_on_prediction_errors: bool,
 ) -> EvaluationStore:
     user_uttered_eval_store = EvaluationStore()
 
-    intent_gold = event.parse_data.get("true_intent")
-    predicted_intent = event.parse_data.get("intent", {}).get("name")
-
-    if not predicted_intent:
-        predicted_intent = [None]
+    intent_gold = event.intent.get("name")
+    predicted_intent = predicted.get(INTENT, {}).get("name")
 
     user_uttered_eval_store.add_to_store(
-        intent_predictions=predicted_intent, intent_targets=intent_gold
+        intent_predictions=[predicted_intent], intent_targets=[intent_gold]
     )
 
-    entity_gold = event.parse_data.get("true_entities")
-    predicted_entities = event.parse_data.get("entities")
+    entity_gold = event.entities
+    predicted_entities = predicted.get(ENTITIES)
 
     if entity_gold or predicted_entities:
         user_uttered_eval_store.add_to_store(
@@ -334,7 +344,7 @@ def _collect_action_executed_predictions(
             predicted = action.name()
 
     action_executed_eval_store.add_to_store(
-        action_predictions=predicted, action_targets=gold
+        action_predictions=[predicted], action_targets=[gold]
     )
 
     if action_executed_eval_store.has_prediction_target_mismatch():
@@ -372,7 +382,7 @@ def _form_might_have_been_rejected(
     )
 
 
-def _predict_tracker_actions(
+async def _predict_tracker_actions(
     tracker: DialogueStateTracker,
     agent: "Agent",
     fail_on_prediction_errors: bool = False,
@@ -426,8 +436,9 @@ def _predict_tracker_actions(
             num_predicted_actions += 1
 
         elif use_e2e and isinstance(event, UserUttered):
+            predicted = await processor.parse_message(UserMessage(event.text))
             user_uttered_result = _collect_user_uttered_predictions(
-                event, partial_tracker, fail_on_prediction_errors
+                event, predicted, partial_tracker, fail_on_prediction_errors
             )
 
             tracker_eval_store.merge_store(user_uttered_result)
@@ -451,10 +462,10 @@ def _in_training_data_fraction(action_list: List[Dict[Text, Any]]) -> float:
         if a["policy"] and not SimplePolicyEnsemble.is_not_memo_policy(a["policy"])
     ]
 
-    return len(in_training_data) / len(action_list)
+    return len(in_training_data) / len(action_list) if len(action_list) else 0
 
 
-def _collect_story_predictions(
+async def _collect_story_predictions(
     completed_trackers: List["DialogueStateTracker"],
     agent: "Agent",
     fail_on_prediction_errors: bool = False,
@@ -475,7 +486,11 @@ def _collect_story_predictions(
     action_list = []
 
     for tracker in tqdm(completed_trackers):
-        tracker_results, predicted_tracker, tracker_actions = _predict_tracker_actions(
+        (
+            tracker_results,
+            predicted_tracker,
+            tracker_actions,
+        ) = await _predict_tracker_actions(
             tracker, agent, fail_on_prediction_errors, use_e2e
         )
 
@@ -526,7 +541,7 @@ def _collect_story_predictions(
 
 
 def _log_stories(
-    stories: List[DialogueStateTracker], filename: Text, out_directory: Text
+    trackers: List[DialogueStateTracker], filename: Text, out_directory: Text
 ) -> None:
     """Write given stories to the given file."""
     if not out_directory:
@@ -535,12 +550,14 @@ def _log_stories(
     with open(
         os.path.join(out_directory, filename), "w", encoding=DEFAULT_ENCODING
     ) as f:
-        if not stories:
-            f.write("<!-- No stories found. -->")
-
-        for story in stories:
-            f.write(story.export_stories(include_source=True))
-            f.write("\n\n")
+        if not trackers:
+            f.write("# No stories found.")
+        else:
+            stories = [
+                tracker.as_story(include_source=True) for tracker in trackers
+            ]  # TODO: revisit `include_source=True` - what do we need it for?
+            steps = [step for story in stories for step in story.story_steps]
+            f.write(YAMLStoryWriter().dumps(steps, as_test_conversations=True))
 
 
 async def test(
@@ -576,7 +593,7 @@ async def test(
 
     completed_trackers = await _generate_trackers(stories, agent, max_stories, e2e)
 
-    story_evaluation, _ = _collect_story_predictions(
+    story_evaluation, _ = await _collect_story_predictions(
         completed_trackers, agent, fail_on_prediction_errors, e2e
     )
 
@@ -745,7 +762,7 @@ async def _evaluate_core_model(model: Text, stories_file: Text) -> int:
 
     agent = Agent.load(model)
     completed_trackers = await _generate_trackers(stories_file, agent)
-    story_eval_store, number_of_stories = _collect_story_predictions(
+    story_eval_store, number_of_stories = await _collect_story_predictions(
         completed_trackers, agent
     )
     failed_stories = story_eval_store.failed_stories
diff --git a/rasa/core/trackers.py b/rasa/core/trackers.py
index c354b67fa222..e0c5c83e69c6 100644
--- a/rasa/core/trackers.py
+++ b/rasa/core/trackers.py
@@ -16,6 +16,8 @@
     Union,
 )
 
+import typing
+
 from rasa.nlu.constants import (
     ENTITY_ATTRIBUTE_VALUE,
     ENTITY_ATTRIBUTE_TYPE,
@@ -41,6 +43,10 @@
 from rasa.core.domain import Domain  # pytype: disable=pyi-error
 from rasa.core.slots import Slot
 
+
+if typing.TYPE_CHECKING:
+    from rasa.core.training.structures import Story
+
 logger = logging.getLogger(__name__)
 
 
@@ -512,7 +518,7 @@ def update(self, event: Event, domain: Optional[Domain] = None) -> None:
             for e in domain.slots_for_entities(event.parse_data["entities"]):
                 self.update(e)
 
-    def export_stories(self, e2e: bool = False, include_source: bool = False) -> Text:
+    def as_story(self, include_source: bool = False) -> "Story":
         """Dump the tracker as a story in the Rasa Core story format.
 
         Returns the dumped tracker as a string."""
@@ -523,7 +529,15 @@ def export_stories(self, e2e: bool = False, include_source: bool = False) -> Tex
             if include_source
             else self.sender_id
         )
-        story = Story.from_events(self.applied_events(), story_name)
+        return Story.from_events(self.applied_events(), story_name)
+
+    def export_stories(
+        self, e2e: bool = False, include_source: bool = False
+    ) -> Text:  # TODO: this should not be used, deprecate, use yaml instead
+        """Dump the tracker as a story in the Rasa Core story format.
+
+        Returns the dumped tracker as a string."""
+        story = self.as_story(include_source)
         return story.as_story_string(flat=True, e2e=e2e)
 
     def export_stories_to_file(self, export_path: Text = "debug.md") -> None:
diff --git a/rasa/core/training/__init__.py b/rasa/core/training/__init__.py
index 3b77dc45606b..f6add029508c 100644
--- a/rasa/core/training/__init__.py
+++ b/rasa/core/training/__init__.py
@@ -2,7 +2,6 @@
 
 if TYPE_CHECKING:
     from rasa.core.domain import Domain
-    from rasa.core.interpreter import NaturalLanguageInterpreter
     from rasa.core.trackers import DialogueStateTracker
     from rasa.core.training.structures import StoryGraph
     from rasa.importers.importer import TrainingDataImporter
@@ -11,20 +10,15 @@
 async def extract_rule_data(
     resource_name: Text,
     domain: "Domain",
-    interpreter: Optional["NaturalLanguageInterpreter"] = None,
     use_e2e: bool = False,
     exclusion_percentage: int = None,
 ) -> "StoryGraph":
-    from rasa.core.interpreter import RegexInterpreter
     from rasa.core.training import loading
     from rasa.core.training.structures import StoryGraph
 
-    if not interpreter:
-        interpreter = RegexInterpreter()
     story_steps = await loading.load_data_from_resource(
         resource_name,
         domain,
-        interpreter,
         use_e2e=use_e2e,
         exclusion_percentage=exclusion_percentage,
     )
@@ -34,20 +28,15 @@ async def extract_rule_data(
 async def extract_story_graph(
     resource_name: Text,
     domain: "Domain",
-    interpreter: Optional["NaturalLanguageInterpreter"] = None,
     use_e2e: bool = False,
     exclusion_percentage: Optional[int] = None,
 ) -> "StoryGraph":
-    from rasa.core.interpreter import RegexInterpreter
     from rasa.core.training.structures import StoryGraph
     import rasa.core.training.loading as core_loading
 
-    if not interpreter:
-        interpreter = RegexInterpreter()
     story_steps = await core_loading.load_data_from_resource(
         resource_name,
         domain,
-        interpreter,
         use_e2e=use_e2e,
         exclusion_percentage=exclusion_percentage,
     )
diff --git a/rasa/core/training/dsl.py b/rasa/core/training/dsl.py
deleted file mode 100644
index 4a52a414cb1a..000000000000
--- a/rasa/core/training/dsl.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import logging
-import re
-from typing import Optional, Text, TYPE_CHECKING
-
-from rasa.constants import DOCS_BASE_URL
-from rasa.core.constants import INTENT_MESSAGE_PREFIX
-from rasa.core.interpreter import RegexInterpreter
-from rasa.core.training.structures import FORM_PREFIX
-from rasa.nlu.training_data.formats import MarkdownReader
-
-if TYPE_CHECKING:
-    from rasa.nlu.training_data import Message
-
-logger = logging.getLogger(__name__)
-
-
-class EndToEndReader(MarkdownReader):
-    def __init__(self) -> None:
-        super().__init__()
-        self._regex_interpreter = RegexInterpreter()
-
-    def _parse_item(self, line: Text) -> Optional["Message"]:
-        f"""Parses an md list item line based on the current section type.
-
-        Matches expressions of the form `<intent>:<example>. For the
-        syntax of <example> see the Rasa docs on NLU training data:
-        {DOCS_BASE_URL}/nlu/training-data-format/#markdown-format"""
-
-        # Match three groups:
-        # 1) Potential "form" annotation
-        # 2) The correct intent
-        # 3) Optional entities
-        # 4) The message text
-        form_group = fr"({FORM_PREFIX}\s*)*"
-        item_regex = re.compile(r"\s*" + form_group + r"([^{}]+?)({.*})*:\s*(.*)")
-        match = re.match(item_regex, line)
-
-        if not match:
-            raise ValueError(
-                "Encountered invalid end-to-end format for message "
-                "`{}`. Please visit the documentation page on "
-                "end-to-end testing at {}/user-guide/testing-your-assistant/"
-                "#end-to-end-testing/".format(line, DOCS_BASE_URL)
-            )
-
-        intent = match.group(2)
-        self.current_title = intent
-        message = match.group(4)
-        example = self.parse_training_example(message)
-
-        # If the message starts with the `INTENT_MESSAGE_PREFIX` potential entities
-        # are annotated in the json format (e.g. `/greet{"name": "Rasa"})
-        if message.startswith(INTENT_MESSAGE_PREFIX):
-            parsed = self._regex_interpreter.synchronous_parse(message)
-            example.data["entities"] = parsed["entities"]
-
-        example.data["true_intent"] = intent
-        return example
diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py
index 70bdf4272eeb..8f99ddfd9277 100644
--- a/rasa/core/training/interactive.py
+++ b/rasa/core/training/interactive.py
@@ -73,9 +73,9 @@
 MAX_VISUAL_HISTORY = 3
 
 PATHS = {
-    "stories": "data/stories.md",
-    "nlu": "data/nlu.md",
-    "backup": "data/nlu_interactive.md",
+    "stories": "data/stories.yml",
+    "nlu": "data/nlu.yml",
+    "backup": "data/nlu_interactive.yml",
     "domain": "domain.yml",
 }
 
@@ -843,20 +843,16 @@ def _write_nlu_to_file(export_nlu_path: Text, events: List[Dict[Text, Any]]) ->
 
 
 def _get_nlu_target_format(export_path: Text) -> Text:
-    from rasa.data import (
-        YAML_FILE_EXTENSIONS,
-        MARKDOWN_FILE_EXTENSIONS,
-        JSON_FILE_EXTENSIONS,
-    )
+    from rasa import data
 
     guessed_format = loading.guess_format(export_path)
 
     if guessed_format not in {MARKDOWN, RASA, RASA_YAML}:
-        if Path(export_path).suffix in JSON_FILE_EXTENSIONS:
+        if data.is_likely_json_file(export_path):
             guessed_format = RASA
-        elif Path(export_path).suffix in MARKDOWN_FILE_EXTENSIONS:
+        elif data.is_likely_markdown_file(export_path):
             guessed_format = MARKDOWN
-        elif Path(export_path).suffix in YAML_FILE_EXTENSIONS:
+        elif data.is_likely_yaml_file(export_path):
             guessed_format = RASA_YAML
 
     return guessed_format
@@ -1224,7 +1220,7 @@ async def _correct_entities(
     """Validate the entities of a user message.
 
     Returns the corrected entities"""
-    from rasa.nlu.training_data.formats import MarkdownReader
+    from rasa.nlu.training_data import entities_parser
 
     parse_original = latest_message.get("parse_data", {})
     entity_str = _as_md_message(parse_original)
@@ -1233,8 +1229,7 @@ async def _correct_entities(
     )
 
     annotation = await _ask_questions(question, conversation_id, endpoint)
-    # noinspection PyProtectedMember
-    parse_annotated = MarkdownReader().parse_training_example(annotation)
+    parse_annotated = entities_parser.parse_training_example(annotation, intent=None)
 
     corrected_entities = _merge_annotated_and_original_entities(
         parse_annotated, parse_original
diff --git a/rasa/core/training/loading.py b/rasa/core/training/loading.py
index fd80553bbbc2..5a10095c97e6 100644
--- a/rasa/core/training/loading.py
+++ b/rasa/core/training/loading.py
@@ -3,9 +3,9 @@
 from pathlib import Path
 from typing import Text, Optional, Dict, List, Union
 
+from rasa import data
 import rasa.utils.io as io_utils
 from rasa.core.domain import Domain
-from rasa.core.interpreter import NaturalLanguageInterpreter, RegexInterpreter
 from rasa.core.training.story_reader.markdown_story_reader import MarkdownStoryReader
 from rasa.core.training.story_reader.story_reader import StoryReader
 from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader
@@ -18,40 +18,30 @@
 def _get_reader(
     filename: Text,
     domain: Domain,
-    interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
     template_variables: Optional[Dict] = None,
     use_e2e: bool = False,
 ) -> StoryReader:
 
-    if Path(filename).suffix in MARKDOWN_FILE_EXTENSIONS:
-        return MarkdownStoryReader(
-            interpreter, domain, template_variables, use_e2e, filename
-        )
-    elif Path(filename).suffix in YAML_FILE_EXTENSIONS:
-        return YAMLStoryReader(
-            interpreter, domain, template_variables, use_e2e, filename
-        )
+    if data.is_likely_markdown_file(filename):
+        return MarkdownStoryReader(domain, template_variables, use_e2e, filename)
+    elif data.is_likely_yaml_file(filename):
+        return YAMLStoryReader(domain, template_variables, use_e2e, filename)
     else:
         # This is a use case for uploading the story over REST API.
         # The source file has a random name.
-        return _guess_reader(filename, domain, interpreter, template_variables, use_e2e)
+        return _guess_reader(filename, domain, template_variables, use_e2e)
 
 
 def _guess_reader(
     filename: Text,
     domain: Domain,
-    interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
     template_variables: Optional[Dict] = None,
     use_e2e: bool = False,
 ) -> StoryReader:
     if YAMLStoryReader.is_yaml_story_file(filename):
-        return YAMLStoryReader(
-            interpreter, domain, template_variables, use_e2e, filename
-        )
+        return YAMLStoryReader(domain, template_variables, use_e2e, filename)
     elif MarkdownStoryReader.is_markdown_story_file(filename):
-        return MarkdownStoryReader(
-            interpreter, domain, template_variables, use_e2e, filename
-        )
+        return MarkdownStoryReader(domain, template_variables, use_e2e, filename)
     raise ValueError(
         f"Failed to find a reader class for the story file `{filename}`. "
         f"Supported formats are "
@@ -62,7 +52,6 @@ def _guess_reader(
 async def load_data_from_resource(
     resource: Union[Text, Path],
     domain: Domain,
-    interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
     template_variables: Optional[Dict] = None,
     use_e2e: bool = False,
     exclusion_percentage: Optional[int] = None,
@@ -72,7 +61,6 @@ async def load_data_from_resource(
     Args:
         resource: Folder/File with core training data files.
         domain: Domain object.
-        interpreter: Interpreter to be used for parsing user's utterances.
         template_variables: Variables that have to be replaced in the training data.
         use_e2e: Identifies if the e2e reader should be used.
         exclusion_percentage: Identifies the percentage of training data that
@@ -87,7 +75,6 @@ async def load_data_from_resource(
     return await load_data_from_files(
         io_utils.list_files(resource),
         domain,
-        interpreter,
         template_variables,
         use_e2e,
         exclusion_percentage,
@@ -97,7 +84,6 @@ async def load_data_from_resource(
 async def load_data_from_files(
     story_files: List[Text],
     domain: Domain,
-    interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
     template_variables: Optional[Dict] = None,
     use_e2e: bool = False,
     exclusion_percentage: Optional[int] = None,
@@ -107,7 +93,6 @@ async def load_data_from_files(
     Args:
         story_files: List of files with training data in it.
         domain: Domain object.
-        interpreter: Interpreter to be used for parsing user's utterances.
         template_variables: Variables that have to be replaced in the training data.
         use_e2e: Identifies whether the e2e reader should be used.
         exclusion_percentage: Identifies the percentage of training data that
@@ -120,9 +105,7 @@ async def load_data_from_files(
 
     for story_file in story_files:
 
-        reader = _get_reader(
-            story_file, domain, interpreter, template_variables, use_e2e
-        )
+        reader = _get_reader(story_file, domain, template_variables, use_e2e)
 
         steps = await reader.read_from_file(story_file)
         story_steps.extend(steps)
diff --git a/rasa/core/training/story_reader/markdown_story_reader.py b/rasa/core/training/story_reader/markdown_story_reader.py
index c51191ede65b..0bc8d6c45502 100644
--- a/rasa/core/training/story_reader/markdown_story_reader.py
+++ b/rasa/core/training/story_reader/markdown_story_reader.py
@@ -4,15 +4,22 @@
 import os
 import re
 from pathlib import PurePath, Path
-from typing import Dict, Text, List, Any, Union
+from typing import Dict, Optional, Text, List, Any, Union
 
+import rasa.data
+from rasa.nlu.training_data import Message
+from rasa.nlu.training_data.formats import MarkdownReader
 import rasa.utils.io as io_utils
-from rasa.constants import DOCS_URL_DOMAINS, DOCS_URL_STORIES
+from rasa.constants import (
+    DEFAULT_E2E_TESTS_PATH,
+    DOCS_BASE_URL,
+    DOCS_URL_DOMAINS,
+    DOCS_URL_STORIES,
+)
 from rasa.core.constants import INTENT_MESSAGE_PREFIX
 from rasa.core.events import UserUttered
 from rasa.core.exceptions import StoryParseError
 from rasa.core.interpreter import RegexInterpreter
-from rasa.core.training.dsl import EndToEndReader
 from rasa.core.training.story_reader.story_reader import StoryReader
 from rasa.core.training.structures import StoryStep, FORM_PREFIX
 from rasa.data import MARKDOWN_FILE_EXTENSIONS
@@ -101,7 +108,7 @@ async def _process_lines(self, lines: List[Text]) -> List[StoryStep]:
             except Exception as e:
                 msg = f"Error in line {line_num}: {e}"
                 logger.error(msg, exc_info=1)  # pytype: disable=wrong-arg-types
-                raise ValueError(msg)
+                raise ValueError(msg) from e
         self._add_current_stories_to_result()
         return self.story_steps
 
@@ -173,15 +180,13 @@ def _parse_event_line(line):
             )
             return "", {}
 
-    async def _add_user_messages(self, messages, line_num):
+    async def _add_user_messages(self, messages: List[Text], line_num: int) -> None:
         if not self.current_step_builder:
             raise StoryParseError(
                 "User message '{}' at invalid location. "
                 "Expected story start.".format(messages)
             )
-        parsed_messages = await asyncio.gather(
-            *[self._parse_message(m, line_num) for m in messages]
-        )
+        parsed_messages = [self._parse_message(m, line_num) for m in messages]
         self.current_step_builder.add_user_messages(
             parsed_messages, self.unfold_or_utterances
         )
@@ -193,25 +198,58 @@ async def _add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> No
                 "location. Expected story start."
                 "".format(e2e_messages)
             )
-        e2e_reader = EndToEndReader()
+
         parsed_messages = []
         for m in e2e_messages:
-            message = e2e_reader._parse_item(m)
-            parsed = await self._parse_message(message.text, line_num)
-
-            parsed.parse_data["true_intent"] = message.data["true_intent"]
-            parsed.parse_data["true_entities"] = message.data.get("entities") or []
+            message = self.parse_e2e_message(m)
+            parsed = self._parse_message(message.text, line_num)
             parsed_messages.append(parsed)
         self.current_step_builder.add_user_messages(parsed_messages)
 
-    async def _parse_message(self, message: Text, line_num: int) -> UserUttered:
+    @staticmethod
+    def parse_e2e_message(line: Text) -> Optional["Message"]:
+        f"""Parses an md list item line based on the current section type.
+
+        Matches expressions of the form `<intent>:<example>. For the
+        syntax of <example> see the Rasa docs on NLU training data:
+        {DOCS_BASE_URL}/nlu/training-data-format/#markdown-format"""
+
+        # Match three groups:
+        # 1) Potential "form" annotation
+        # 2) The correct intent
+        # 3) Optional entities
+        # 4) The message text
+        form_group = fr"({FORM_PREFIX}\s*)*"
+        item_regex = re.compile(r"\s*" + form_group + r"([^{}]+?)({.*})*:\s*(.*)")
+        match = re.match(item_regex, line)
+
+        if not match:
+            raise ValueError(
+                "Encountered invalid end-to-end format for message "
+                "`{}`. Please visit the documentation page on "
+                "end-to-end testing at {}/user-guide/testing-your-assistant/"
+                "#end-to-end-testing/".format(line, DOCS_BASE_URL)
+            )
+        from rasa.nlu.training_data import entities_parser
+
+        intent = match.group(2)
+        message = match.group(4)
+        example = entities_parser.parse_training_example(message, intent)
+
+        # If the message starts with the `INTENT_MESSAGE_PREFIX` potential entities
+        # are annotated in the json format (e.g. `/greet{"name": "Rasa"})
         if message.startswith(INTENT_MESSAGE_PREFIX):
-            parse_data = await RegexInterpreter().parse(message)
-        else:
-            parse_data = await self.interpreter.parse(message)
+            parsed = RegexInterpreter().synchronous_parse(message)
+            example.data["entities"] = parsed["entities"]
+
+        return example
+
+    def _parse_message(self, message: Text, line_num: int) -> UserUttered:
+        parse_data = RegexInterpreter().synchronous_parse(message)
         utterance = UserUttered(
             message, parse_data.get("intent"), parse_data.get("entities"), parse_data
         )
+
         intent_name = utterance.intent.get(INTENT_NAME_KEY)
         if self.domain and intent_name not in self.domain.intents:
             raise_warning(
@@ -257,6 +295,26 @@ def is_markdown_story_file(file_path: Union[Text, Path]) -> bool:
             )
             return False
 
+    @staticmethod
+    def is_markdown_test_conversations_file(file_path: Union[Text, Path]) -> bool:
+        """Checks if a file is a test conversations file.
+
+        Args:
+            file_path: Path of the file which should be checked.
+
+        Returns:
+            `True` if it's a conversation test file, otherwise `False`.
+        """
+        if Path(file_path).suffix not in MARKDOWN_FILE_EXTENSIONS:
+            return False
+
+        dirname = os.path.dirname(file_path)
+        return (
+            DEFAULT_E2E_TESTS_PATH in dirname
+            and rasa.data.is_story_file(file_path)
+            and not rasa.data.is_nlu_file(file_path)
+        )
+
     @staticmethod
     def _contains_story_or_rule_pattern(text: Text) -> bool:
         story_pattern = r".*##.+"
diff --git a/rasa/core/training/story_reader/story_reader.py b/rasa/core/training/story_reader/story_reader.py
index ffa028fc6b53..f0512110df33 100644
--- a/rasa/core/training/story_reader/story_reader.py
+++ b/rasa/core/training/story_reader/story_reader.py
@@ -4,7 +4,6 @@
 from rasa.core.domain import Domain
 from rasa.core.events import SlotSet, ActionExecuted, Event
 from rasa.core.exceptions import StoryParseError
-from rasa.core.interpreter import NaturalLanguageInterpreter
 from rasa.core.training.story_reader.story_step_builder import StoryStepBuilder
 from rasa.core.training.structures import StoryStep
 
@@ -16,7 +15,6 @@ class StoryReader:
 
     def __init__(
         self,
-        interpreter: NaturalLanguageInterpreter,
         domain: Optional[Domain] = None,
         template_vars: Optional[Dict] = None,
         use_e2e: bool = False,
@@ -26,7 +24,6 @@ def __init__(
         """Constructor for the StoryReader.
 
         Args:
-            interpreter: Interpreter to be used to parse intents.
             domain: Domain object.
             template_vars: Template variables to be replaced.
             use_e2e: Specifies whether to use the e2e parser or not.
@@ -41,7 +38,6 @@ def __init__(
         self.story_steps = []
         self.current_step_builder: Optional[StoryStepBuilder] = None
         self.domain = domain
-        self.interpreter = interpreter
         self.template_variables = template_vars if template_vars else {}
         self.use_e2e = use_e2e
         self.source_name = source_name
diff --git a/rasa/core/training/story_reader/yaml_story_reader.py b/rasa/core/training/story_reader/yaml_story_reader.py
index af8178cceef9..92d78cfc2c76 100644
--- a/rasa/core/training/story_reader/yaml_story_reader.py
+++ b/rasa/core/training/story_reader/yaml_story_reader.py
@@ -2,29 +2,32 @@
 from pathlib import Path
 from typing import Dict, Text, List, Any, Optional, Union
 
+from rasa.nlu.training_data import entities_parser
 from rasa.utils.validation import validate_yaml_schema, InvalidYamlFileError
 from ruamel.yaml.parser import ParserError
 
 import rasa.utils.common as common_utils
 import rasa.utils.io as io_utils
-from rasa.constants import DOCS_URL_STORIES, DOCS_URL_RULES
+from rasa.constants import DOCS_URL_STORIES, DOCS_URL_RULES, DOCS_URL_TEST_CONVERSATIONS
 from rasa.core.constants import INTENT_MESSAGE_PREFIX
 from rasa.core.actions.action import RULE_SNIPPET_ACTION_NAME
 from rasa.core.events import UserUttered, SlotSet, Form
 from rasa.core.training.story_reader.story_reader import StoryReader
 from rasa.core.training.structures import StoryStep
-from rasa.data import YAML_FILE_EXTENSIONS
 from rasa.nlu.constants import INTENT_NAME_KEY
+import rasa.data
 
 logger = logging.getLogger(__name__)
 
 KEY_STORIES = "stories"
 KEY_STORY_NAME = "story"
+KEY_TEST_CONVERSATIONS = "test_conversations"
 KEY_RULES = "rules"
 KEY_RULE_NAME = "rule"
 KEY_STEPS = "steps"
 KEY_ENTITIES = "entities"
 KEY_USER_INTENT = "intent"
+KEY_USER_MESSAGE = "user"
 KEY_SLOT_NAME = "slot_was_set"
 KEY_SLOT_VALUE = "value"
 KEY_FORM = "active_loop"
@@ -55,10 +58,9 @@ def from_reader(cls, reader: "YAMLStoryReader") -> "YAMLStoryReader":
             A new reader instance.
         """
         return cls(
-            reader.interpreter,
             reader.domain,
             reader.template_variables,
-            reader.use_e2e,
+            reader.use_e2e,  # TODO: I don't think we actually need this
             reader.source_name,
             reader.unfold_or_utterances,
         )
@@ -109,6 +111,7 @@ def read_from_parsed_yaml(
         for key, parser_class in {
             KEY_STORIES: StoryParser,
             KEY_RULES: RuleParser,
+            KEY_TEST_CONVERSATIONS: TestConversationsParser,
         }.items():
             data = parsed_content.get(key, [])
             parser = parser_class.from_reader(self)
@@ -117,8 +120,8 @@ def read_from_parsed_yaml(
 
         return self.story_steps
 
-    @staticmethod
-    def is_yaml_story_file(file_path: Text) -> bool:
+    @classmethod
+    def is_yaml_story_file(cls, file_path: Text) -> bool:
         """Check if file contains Core training data or rule data in YAML format.
 
         Args:
@@ -128,24 +131,37 @@ def is_yaml_story_file(file_path: Text) -> bool:
             `True` in case the file is a Core YAML training data or rule data file,
             `False` otherwise.
         """
-        suffix = Path(file_path).suffix
-
-        if suffix and suffix not in YAML_FILE_EXTENSIONS:
-            return False
+        return rasa.data.is_likely_yaml_file(file_path) and cls.is_key_in_yaml(
+            file_path, KEY_STORIES, KEY_RULES
+        )
 
+    @classmethod
+    def is_key_in_yaml(cls, file_path, *keys):
         try:
             content = io_utils.read_yaml_file(file_path)
-            return any(key in content for key in [KEY_STORIES, KEY_RULES])
+            return any(key in content for key in keys)
         except Exception as e:
             # Using broad `Exception` because yaml library is not exposing all Errors
             common_utils.raise_warning(
-                f"Tried to check if '{file_path}' is a story or rule file, but failed "
-                f"to read it. If this file contains story or rule data, you should "
-                f"investigate this error, otherwise it is probably best to "
-                f"move the file to a different location. Error: {e}"
+                f"Tried to open '{file_path}' and load its data, but failed "
+                f"to read it. There seems to be an error with the yaml syntax: {e}"
             )
             return False
 
+    @classmethod
+    def is_yaml_test_conversations_file(cls, file_path: Union[Text, Path]) -> bool:
+        """Checks if a file is a test conversations file.
+
+        Args:
+            file_path: Path of the file which should be checked.
+
+        Returns:
+            `True` if it's a conversation test file, otherwise `False`.
+        """
+        return rasa.data.is_likely_yaml_file(file_path) and cls.is_key_in_yaml(
+            file_path, KEY_TEST_CONVERSATIONS
+        )
+
     def get_steps(self) -> List[StoryStep]:
         self._add_current_stories_to_result()
         return self.story_steps
@@ -215,7 +231,7 @@ def _parse_step(self, step: Union[Text, Dict[Text, Any]]) -> None:
                 f"'{RULE_SNIPPET_ACTION_NAME}'. It will be skipped.",
                 docs=self._get_docs_link(),
             )
-        elif KEY_USER_INTENT in step.keys():
+        elif KEY_USER_INTENT in step.keys() or KEY_USER_MESSAGE in step.keys():
             self._parse_user_utterance(step)
         elif KEY_OR in step.keys():
             self._parse_or_statement(step)
@@ -291,10 +307,10 @@ def _parse_or_statement(self, step: Dict[Text, Any]) -> None:
 
         self.current_step_builder.add_user_messages(utterances)
 
-    def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUttered]:
-        user_utterance = step.get(KEY_USER_INTENT, "").strip()
+    def _user_intent_from_step(self, step):
+        user_intent = step.get(KEY_USER_INTENT, "").strip()
 
-        if not user_utterance:
+        if not user_intent:
             common_utils.raise_warning(
                 f"Issue found in '{self.source_name}':\n"
                 f"User utterance cannot be empty. "
@@ -303,22 +319,31 @@ def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUtter
                 docs=self._get_docs_link(),
             )
 
-        raw_entities = step.get(KEY_ENTITIES, [])
-        final_entities = self._parse_raw_entities(raw_entities)
-
-        if user_utterance.startswith(INTENT_MESSAGE_PREFIX):
+        if user_intent.startswith(INTENT_MESSAGE_PREFIX):
             common_utils.raise_warning(
                 f"Issue found in '{self.source_name}':\n"
-                f"User intent '{user_utterance}' starts with "
+                f"User intent '{user_intent}' starts with "
                 f"'{INTENT_MESSAGE_PREFIX}'. This is not required.",
                 docs=self._get_docs_link(),
             )
             # Remove leading slash
-            user_utterance = user_utterance[1:]
+            user_intent = user_intent[1:]
+        return user_intent
 
-        intent = {"name": user_utterance, "confidence": 1.0}
+    def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUttered]:
+        intent_name = self._user_intent_from_step(step)
+        intent = {"name": intent_name, "confidence": 1.0}
+
+        if KEY_USER_MESSAGE in step:
+            user_message = step[KEY_USER_MESSAGE].strip()
+            entities = entities_parser.find_entities_in_training_example(user_message)
+            plain_text = entities_parser.replace_entities(user_message)
+        else:
+            raw_entities = step.get(KEY_ENTITIES, [])
+            entities = self._parse_raw_entities(raw_entities)
+            plain_text = intent_name
 
-        return UserUttered(user_utterance, intent, final_entities)
+        return UserUttered(plain_text, intent, entities)
 
     @staticmethod
     def _parse_raw_entities(
@@ -410,6 +435,22 @@ def _get_docs_link(self) -> Text:
         return DOCS_URL_STORIES
 
 
+class TestConversationsParser(YAMLStoryReader):
+    """Encapsulate test conversation (e2e tests) specific parser behavior."""
+
+    def _new_part(self, item_name: Text, item: Dict[Text, Any]) -> None:
+        self._new_story_part(item_name, self.source_name)
+
+    def _get_item_title(self) -> Text:
+        return KEY_STORY_NAME
+
+    def _get_plural_item_title(self) -> Text:
+        return KEY_TEST_CONVERSATIONS
+
+    def _get_docs_link(self) -> Text:
+        return DOCS_URL_TEST_CONVERSATIONS
+
+
 class RuleParser(YAMLStoryReader):
     """Encapsulate rule-specific parser behavior."""
 
diff --git a/rasa/core/training/story_writer/yaml_story_writer.py b/rasa/core/training/story_writer/yaml_story_writer.py
index fd6134d758da..02ae65c7b786 100644
--- a/rasa/core/training/story_writer/yaml_story_writer.py
+++ b/rasa/core/training/story_writer/yaml_story_writer.py
@@ -1,17 +1,27 @@
 from collections import OrderedDict
 from pathlib import Path
 
+from ruamel import yaml
 import ruamel.yaml as ruamel_yaml
-from typing import List, Text, Union, Optional
+from typing import Any, Dict, List, Text, Union, Optional
 
+from ruamel.yaml.comments import CommentedMap
+
+from rasa import data
 from rasa.utils.common import raise_warning
-from ruamel.yaml.scalarstring import DoubleQuotedScalarString
+from ruamel.yaml.scalarstring import (
+    DoubleQuotedScalarString,
+    LiteralScalarString,
+    PlainScalarString,
+    ScalarString,
+)
 
 from rasa.constants import LATEST_TRAINING_DATA_FORMAT_VERSION, DOCS_URL_STORIES
 from rasa.core.events import UserUttered, ActionExecuted, SlotSet, Form
 from rasa.core.training.story_reader.yaml_story_reader import (
     KEY_STORIES,
     KEY_STORY_NAME,
+    KEY_TEST_CONVERSATIONS,
     KEY_USER_INTENT,
     KEY_ENTITIES,
     KEY_ACTION,
@@ -20,6 +30,7 @@
     KEY_SLOT_NAME,
     KEY_CHECKPOINT_SLOTS,
     KEY_OR,
+    KEY_USER_MESSAGE,
 )
 from rasa.core.training.structures import StoryStep, Checkpoint
 
@@ -29,33 +40,51 @@
 class YAMLStoryWriter:
     """Writes Core training data into a file in a YAML format. """
 
-    def dumps(self, story_steps: List[StoryStep]) -> Text:
+    def dumps(
+        self, story_steps: List[StoryStep], as_test_conversations: bool = False
+    ) -> Text:
         """Turns Story steps into a string.
 
         Args:
             story_steps: Original story steps to be converted to the YAML.
-
+            as_test_conversations: Decides which top level YAML key to use (stories
+                or test_conversations).
         Returns:
             String with story steps in the YAML format.
         """
         stream = ruamel_yaml.StringIO()
-        self.dump(stream, story_steps)
+        self.dump(stream, story_steps, as_test_conversations)
         return stream.getvalue()
 
     def dump(
         self,
         target: Union[Text, Path, ruamel_yaml.StringIO],
         story_steps: List[StoryStep],
+        as_test_conversations: bool = False,
     ) -> None:
         """Writes Story steps into a target file/stream.
 
         Args:
             target: name of the target file/stream to write the YAML to.
             story_steps: Original story steps to be converted to the YAML.
+            as_test_conversations: Decides which top level YAML key to use (stories
+                or test_conversations).
         """
-        from rasa.validator import KEY_TRAINING_DATA_FORMAT_VERSION
+        result = self.stories_to_yaml(story_steps, as_test_conversations)
+
+        io_utils.write_yaml(result, target, True)
 
-        self.target = target
+    def stories_to_yaml(
+        self, story_steps: List[StoryStep], as_test_conversations: bool = False,
+    ) -> Dict[Text, Any]:
+        """Converts a sequence of story steps into yaml format.
+
+        Args:
+            story_steps: Original story steps to be converted to the YAML.
+            as_test_conversations: Decides which top level YAML key to use (stories
+                or test_conversations).
+        """
+        from rasa.validator import KEY_TRAINING_DATA_FORMAT_VERSION
 
         stories = []
         for story_step in story_steps:
@@ -67,9 +96,11 @@ def dump(
         result[KEY_TRAINING_DATA_FORMAT_VERSION] = DoubleQuotedScalarString(
             LATEST_TRAINING_DATA_FORMAT_VERSION
         )
-        result[KEY_STORIES] = stories
 
-        io_utils.write_yaml(result, self.target, True)
+        data_key = KEY_TEST_CONVERSATIONS if as_test_conversations else KEY_STORIES
+
+        result[data_key] = stories
+        return result
 
     def process_story_step(self, story_step: StoryStep) -> Optional[OrderedDict]:
         """Converts a single story step into an ordered dict.
@@ -82,7 +113,7 @@ def process_story_step(self, story_step: StoryStep) -> Optional[OrderedDict]:
         """
         if self.story_contains_forms(story_step):
             raise_warning(
-                f'File "{self.target}" contains a story "{story_step.block_name}" '
+                f'Training data file contains a story "{story_step.block_name}" '
                 f"that has form(s) in it. This story cannot be converted automatically "
                 f"because of the new Rules system in Rasa Open Source "
                 f"version {LATEST_TRAINING_DATA_FORMAT_VERSION}. "
@@ -126,6 +157,13 @@ def story_contains_forms(story_step) -> bool:
         """
         return any([event for event in story_step.events if isinstance(event, Form)])
 
+    @staticmethod
+    def _text_is_real_message(user_utterance: UserUttered) -> bool:
+        return (
+            not user_utterance.intent
+            or user_utterance.text != user_utterance.as_story_string()
+        )
+
     @staticmethod
     def process_user_utterance(user_utterance: UserUttered) -> OrderedDict:
         """Converts a single user utterance into an ordered dict.
@@ -136,9 +174,16 @@ def process_user_utterance(user_utterance: UserUttered) -> OrderedDict:
         Returns:
             Dict with a user utterance.
         """
-        result = OrderedDict()
+        result = CommentedMap()
         result[KEY_USER_INTENT] = user_utterance.intent["name"]
 
+        # TODO: this is a workaround to print predicted intents / entities...
+        if hasattr(user_utterance, "comment"):
+            result.yaml_add_eol_comment(user_utterance.comment(), KEY_USER_INTENT)
+
+        if YAMLStoryWriter._text_is_real_message(user_utterance):
+            result[KEY_USER_MESSAGE] = LiteralScalarString(user_utterance.text)
+
         if len(user_utterance.entities):
             entities = []
             for entity in user_utterance.entities:
@@ -161,8 +206,13 @@ def process_action(action: ActionExecuted) -> OrderedDict:
             Dict with an action.
         """
         result = OrderedDict()
-        result[KEY_ACTION] = action.action_name
+        s = PlainScalarString(action.action_name)
+
+        # TODO: this is a workaround to print predicted action...
+        if hasattr(action, "comment"):
+            s.comment = f"  # {action.comment()}"
 
+        result[KEY_ACTION] = s
         return result
 
     @staticmethod
diff --git a/rasa/core/training/structures.py b/rasa/core/training/structures.py
index e0af764aa4d3..0088b0683b4a 100644
--- a/rasa/core/training/structures.py
+++ b/rasa/core/training/structures.py
@@ -240,7 +240,7 @@ def __init__(
     def from_events(events: List[Event], story_name: Optional[Text] = None) -> "Story":
         """Create a story from a list of events."""
 
-        story_step = StoryStep()
+        story_step = StoryStep(story_name)
         for event in events:
             story_step.add_event(event)
         return Story([story_step], story_name)
diff --git a/rasa/data.py b/rasa/data.py
index e5bdd0ca62a6..37285228fa39 100644
--- a/rasa/data.py
+++ b/rasa/data.py
@@ -4,9 +4,8 @@
 import tempfile
 import uuid
 from pathlib import Path
-from typing import Tuple, List, Text, Set, Union, Optional, Iterable
+from typing import Callable, Tuple, List, Text, Set, Union, Optional, Iterable
 
-from rasa.constants import DEFAULT_E2E_TESTS_PATH
 from rasa.nlu.training_data import loading as nlu_loading
 
 logger = logging.getLogger(__name__)
@@ -22,6 +21,31 @@
 )
 
 
+def is_likely_yaml_file(file_path: Text) -> bool:
+    return Path(file_path).suffix in YAML_FILE_EXTENSIONS
+
+
+def is_likely_json_file(file_path: Text) -> bool:
+    return Path(file_path).suffix in JSON_FILE_EXTENSIONS
+
+
+def is_likely_markdown_file(file_path: Text) -> bool:
+    return Path(file_path).suffix in MARKDOWN_FILE_EXTENSIONS
+
+
+def get_test_directory(paths: Optional[Union[Text, List[Text]]],) -> Text:
+    """Recursively collects all Core training files from a list of paths.
+
+    Args:
+        paths: List of paths to training files or folders containing them.
+
+    Returns:
+        Path to temporary directory containing all found Core training files.
+    """
+    test_files = get_data_files(paths, is_test_conversations_file)
+    return _copy_files_to_new_dir(test_files)
+
+
 def get_core_directory(paths: Optional[Union[Text, List[Text]]],) -> Text:
     """Recursively collects all Core training files from a list of paths.
 
@@ -31,7 +55,7 @@ def get_core_directory(paths: Optional[Union[Text, List[Text]]],) -> Text:
     Returns:
         Path to temporary directory containing all found Core training files.
     """
-    core_files, _ = get_core_nlu_files(paths)
+    core_files = get_data_files(paths, is_story_file)
     return _copy_files_to_new_dir(core_files)
 
 
@@ -44,7 +68,7 @@ def get_nlu_directory(paths: Optional[Union[Text, List[Text]]],) -> Text:
     Returns:
         Path to temporary directory containing all found NLU training files.
     """
-    _, nlu_files = get_core_nlu_files(paths)
+    nlu_files = get_data_files(paths, is_nlu_file)
     return _copy_files_to_new_dir(nlu_files)
 
 
@@ -61,7 +85,8 @@ def get_core_nlu_directories(
         containing the NLU training files.
     """
 
-    story_files, nlu_data_files = get_core_nlu_files(paths)
+    story_files = get_data_files(paths, is_story_file)
+    nlu_data_files = get_data_files(paths, is_nlu_file)
 
     story_directory = _copy_files_to_new_dir(story_files)
     nlu_directory = _copy_files_to_new_dir(nlu_data_files)
@@ -69,20 +94,19 @@ def get_core_nlu_directories(
     return story_directory, nlu_directory
 
 
-def get_core_nlu_files(
-    paths: Optional[Union[Text, List[Text]]]
-) -> Tuple[List[Text], List[Text]]:
+def get_data_files(
+    paths: Optional[Union[Text, List[Text]]], filter_property: Callable[[Text], bool]
+) -> List[Text]:
     """Recursively collects all training files from a list of paths.
 
     Args:
         paths: List of paths to training files or folders containing them.
 
     Returns:
-        Tuple of paths to story and NLU files.
+        paths of training data files.
     """
 
-    story_files = set()
-    nlu_data_files = set()
+    data_files = set()
 
     if paths is None:
         paths = []
@@ -94,24 +118,19 @@ def get_core_nlu_files(
             continue
 
         if _is_valid_filetype(path):
-            if is_nlu_file(path):
-                nlu_data_files.add(os.path.abspath(path))
-            elif is_story_file(path):
-                story_files.add(os.path.abspath(path))
+            if filter_property(path):
+                data_files.add(os.path.abspath(path))
         else:
-            new_story_files, new_nlu_data_files = _find_core_nlu_files_in_directory(
-                path
-            )
+            new_data_files = _find_data_files_in_directory(path, filter_property)
+            data_files.update(new_data_files)
 
-            story_files.update(new_story_files)
-            nlu_data_files.update(new_nlu_data_files)
+    return sorted(data_files)
 
-    return sorted(story_files), sorted(nlu_data_files)
 
-
-def _find_core_nlu_files_in_directory(directory: Text,) -> Tuple[Set[Text], Set[Text]]:
-    story_files = set()
-    nlu_data_files = set()
+def _find_data_files_in_directory(
+    directory: Text, filter_property: Callable[[Text], bool]
+) -> Set[Text]:
+    filtered_files = set()
 
     for root, _, files in os.walk(directory, followlinks=True):
         # we sort the files here to ensure consistent order for repeatable training
@@ -122,12 +141,10 @@ def _find_core_nlu_files_in_directory(directory: Text,) -> Tuple[Set[Text], Set[
             if not _is_valid_filetype(full_path):
                 continue
 
-            if is_nlu_file(full_path):
-                nlu_data_files.add(full_path)
-            elif is_story_file(full_path):
-                story_files.add(full_path)
+            if filter_property(full_path):
+                filtered_files.add(full_path)
 
-    return story_files, nlu_data_files
+    return filtered_files
 
 
 def _is_valid_filetype(path: Text) -> bool:
@@ -156,18 +173,16 @@ def is_story_file(file_path: Text) -> bool:
         `True` if it's a story file, otherwise `False`.
     """
     from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader
-
-    if YAMLStoryReader.is_yaml_story_file(file_path):
-        return True
-
     from rasa.core.training.story_reader.markdown_story_reader import (
         MarkdownStoryReader,
     )
 
-    return MarkdownStoryReader.is_markdown_story_file(file_path)
+    return YAMLStoryReader.is_yaml_story_file(
+        file_path
+    ) or MarkdownStoryReader.is_markdown_story_file(file_path)
 
 
-def is_end_to_end_conversation_test_file(file_path: Text) -> bool:
+def is_test_conversations_file(file_path: Text) -> bool:
     """Checks if a file is an end-to-end conversation test file.
 
     Args:
@@ -176,17 +191,15 @@ def is_end_to_end_conversation_test_file(file_path: Text) -> bool:
     Returns:
         `True` if it's a conversation test file, otherwise `False`.
     """
-
-    if Path(file_path).suffix not in MARKDOWN_FILE_EXTENSIONS:
-        return False
-
-    dirname = os.path.dirname(file_path)
-    return (
-        DEFAULT_E2E_TESTS_PATH in dirname
-        and is_story_file(file_path)
-        and not is_nlu_file(file_path)
+    from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader
+    from rasa.core.training.story_reader.markdown_story_reader import (
+        MarkdownStoryReader,
     )
 
+    return YAMLStoryReader.is_yaml_test_conversations_file(
+        file_path
+    ) or MarkdownStoryReader.is_markdown_test_conversations_file(file_path)
+
 
 def is_config_file(file_path: Text) -> bool:
     """Checks whether the given file path is a Rasa config file.
diff --git a/rasa/importers/importer.py b/rasa/importers/importer.py
index 9724779457b9..ab6f2af8030c 100644
--- a/rasa/importers/importer.py
+++ b/rasa/importers/importer.py
@@ -26,7 +26,6 @@ async def get_domain(self) -> Domain:
 
     async def get_stories(
         self,
-        interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(),
         template_variables: Optional[Dict] = None,
         use_e2e: bool = False,
         exclusion_percentage: Optional[int] = None,
@@ -34,8 +33,6 @@ async def get_stories(
         """Retrieves the stories that should be used for training.
 
         Args:
-            interpreter: Interpreter that should be used to parse end to
-                         end learning annotations.
             template_variables: Values of templates that should be replaced while
                                 reading the story files.
             use_e2e: Specifies whether to parse end to end learning annotations.
@@ -182,7 +179,6 @@ async def get_domain(self) -> Domain:
 
     async def get_stories(
         self,
-        interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(),
         template_variables: Optional[Dict] = None,
         use_e2e: bool = False,
         exclusion_percentage: Optional[int] = None,
@@ -207,13 +203,12 @@ async def get_domain(self) -> Domain:
 
     async def get_stories(
         self,
-        interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(),
         template_variables: Optional[Dict] = None,
         use_e2e: bool = False,
         exclusion_percentage: Optional[int] = None,
     ) -> StoryGraph:
         return await self._importer.get_stories(
-            interpreter, template_variables, use_e2e, exclusion_percentage
+            template_variables, use_e2e, exclusion_percentage
         )
 
     async def get_config(self) -> Dict:
@@ -247,15 +242,12 @@ async def get_domain(self) -> Domain:
 
     async def get_stories(
         self,
-        interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(),
         template_variables: Optional[Dict] = None,
         use_e2e: bool = False,
         exclusion_percentage: Optional[int] = None,
     ) -> StoryGraph:
         stories = [
-            importer.get_stories(
-                interpreter, template_variables, use_e2e, exclusion_percentage
-            )
+            importer.get_stories(template_variables, use_e2e, exclusion_percentage)
             for importer in self._importers
         ]
         stories = await asyncio.gather(*stories)
diff --git a/rasa/importers/multi_project.py b/rasa/importers/multi_project.py
index 17dff5af01af..8fa772dbd153 100644
--- a/rasa/importers/multi_project.py
+++ b/rasa/importers/multi_project.py
@@ -6,7 +6,6 @@
 from rasa import data
 import rasa.utils.io as io_utils
 from rasa.core.domain import Domain
-from rasa.core.interpreter import RegexInterpreter, NaturalLanguageInterpreter
 from rasa.importers.importer import TrainingDataImporter
 from rasa.importers import utils
 from rasa.nlu.training_data import TrainingData
@@ -38,9 +37,8 @@ def __init__(
 
         self._init_from_dict(self.config, self._project_directory)
 
-        extra_story_files, extra_nlu_files = data.get_core_nlu_files(
-            training_data_paths
-        )
+        extra_nlu_files = data.get_data_files(training_data_paths, data.is_nlu_file)
+        extra_story_files = data.get_data_files(training_data_paths, data.is_story_file)
         self._story_paths += extra_story_files
         self._nlu_paths += extra_nlu_files
 
@@ -95,7 +93,7 @@ def _init_from_directory(self, path: Text):
                     # Check next file
                     continue
 
-                if data.is_end_to_end_conversation_test_file(full_path):
+                if data.is_test_conversations_file(full_path):
                     self._e2e_story_paths.append(full_path)
                 elif Domain.is_domain_file(full_path):
                     self._domain_paths.append(full_path)
@@ -173,7 +171,6 @@ async def get_domain(self) -> Domain:
 
     async def get_stories(
         self,
-        interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(),
         template_variables: Optional[Dict] = None,
         use_e2e: bool = False,
         exclusion_percentage: Optional[int] = None,
@@ -183,7 +180,6 @@ async def get_stories(
         return await utils.story_graph_from_paths(
             story_paths,
             await self.get_domain(),
-            interpreter,
             template_variables,
             use_e2e,
             exclusion_percentage,
diff --git a/rasa/importers/rasa.py b/rasa/importers/rasa.py
index 0a594843b0f0..a105b85525cb 100644
--- a/rasa/importers/rasa.py
+++ b/rasa/importers/rasa.py
@@ -25,9 +25,8 @@ def __init__(
 
         self._domain_path = domain_path
 
-        self._story_files, self._nlu_files = data.get_core_nlu_files(
-            training_data_paths
-        )
+        self._nlu_files = data.get_data_files(training_data_paths, data.is_nlu_file)
+        self._story_files = data.get_data_files(training_data_paths, data.is_story_file)
 
         self.config = autoconfig.get_configuration(config_file)
 
@@ -36,7 +35,6 @@ async def get_config(self) -> Dict:
 
     async def get_stories(
         self,
-        interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(),
         template_variables: Optional[Dict] = None,
         use_e2e: bool = False,
         exclusion_percentage: Optional[int] = None,
@@ -45,7 +43,6 @@ async def get_stories(
         return await utils.story_graph_from_paths(
             self._story_files,
             await self.get_domain(),
-            interpreter,
             template_variables,
             use_e2e,
             exclusion_percentage,
diff --git a/rasa/importers/utils.py b/rasa/importers/utils.py
index 3e4a603cc61c..a39e3c3afc42 100644
--- a/rasa/importers/utils.py
+++ b/rasa/importers/utils.py
@@ -1,7 +1,6 @@
 from typing import Iterable, Text, Optional, Dict, List
 
 from rasa.core.domain import Domain
-from rasa.core.interpreter import NaturalLanguageInterpreter, RegexInterpreter
 from rasa.core.training.structures import StoryGraph
 from rasa.nlu.training_data import TrainingData
 
@@ -16,7 +15,6 @@ def training_data_from_paths(paths: Iterable[Text], language: Text) -> TrainingD
 async def story_graph_from_paths(
     files: List[Text],
     domain: Domain,
-    interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
     template_variables: Optional[Dict] = None,
     use_e2e: bool = False,
     exclusion_percentage: Optional[int] = None,
@@ -25,6 +23,6 @@ async def story_graph_from_paths(
     from rasa.core.training import loading
 
     story_steps = await loading.load_data_from_files(
-        files, domain, interpreter, template_variables, use_e2e, exclusion_percentage
+        files, domain, template_variables, use_e2e, exclusion_percentage
     )
     return StoryGraph(story_steps)
diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py
index ca269a4b49c6..36c38d353fc6 100644
--- a/rasa/nlu/test.py
+++ b/rasa/nlu/test.py
@@ -1505,7 +1505,7 @@ def run_evaluation(
             disable_plotting,
         )
 
-    if entity_results:
+    if any(entity_results):
         logger.info("Entity evaluation results:")
         extractors = get_entity_extractors(interpreter)
         result["entity_evaluation"] = evaluate_entities(
diff --git a/rasa/nlu/training_data/entities_parser.py b/rasa/nlu/training_data/entities_parser.py
index d5293896f3f1..299dbfe97f03 100644
--- a/rasa/nlu/training_data/entities_parser.py
+++ b/rasa/nlu/training_data/entities_parser.py
@@ -9,6 +9,7 @@
     ENTITY_ATTRIBUTE_ROLE,
     ENTITY_ATTRIBUTE_VALUE,
 )
+from rasa.nlu.training_data.message import Message
 from rasa.utils.common import raise_warning
 
 GROUP_ENTITY_VALUE = "value"
@@ -165,3 +166,13 @@ def replace_entities(training_example: Text) -> Text:
     return re.sub(
         ENTITY_REGEX, lambda m: m.groupdict()[GROUP_ENTITY_TEXT], training_example
     )
+
+
+def parse_training_example(example: Text, intent: Optional[Text]) -> "Message":
+    """Extract entities and synonyms, and convert to plain text."""
+    from rasa.nlu.training_data import Message
+
+    entities = find_entities_in_training_example(example)
+    plain_text = replace_entities(example)
+
+    return Message.build(plain_text, intent, entities)
diff --git a/rasa/nlu/training_data/formats/markdown.py b/rasa/nlu/training_data/formats/markdown.py
index f40416202437..4c71a907e2d9 100644
--- a/rasa/nlu/training_data/formats/markdown.py
+++ b/rasa/nlu/training_data/formats/markdown.py
@@ -116,12 +116,18 @@ def _parse_item(self, line: Text) -> None:
         """Parses an md list item line based on the current section type."""
         import rasa.nlu.training_data.lookup_tables_parser as lookup_tables_parser
         import rasa.nlu.training_data.synonyms_parser as synonyms_parser
+        from rasa.nlu.training_data import entities_parser
 
         match = re.match(item_regex, line)
         if match:
             item = match.group(1)
             if self.current_section == INTENT:
-                parsed = self.parse_training_example(item)
+                parsed = entities_parser.parse_training_example(
+                    item, self.current_title
+                )
+                synonyms_parser.add_synonyms_from_entities(
+                    parsed.text, parsed.get("entities", []), self.entity_synonyms
+                )
                 self.training_examples.append(parsed)
             elif self.current_section == SYNONYM:
                 synonyms_parser.add_synonym(
@@ -173,24 +179,6 @@ def _get_validated_dict(json_str: Text) -> Dict[Text, Text]:
 
         return data
 
-    def parse_training_example(self, example: Text) -> "Message":
-        """Extract entities and synonyms, and convert to plain text."""
-        from rasa.nlu.training_data import Message
-        import rasa.nlu.training_data.entities_parser as entities_parser
-        import rasa.nlu.training_data.synonyms_parser as synonyms_parser
-
-        entities = entities_parser.find_entities_in_training_example(example)
-        plain_text = entities_parser.replace_entities(example)
-        synonyms_parser.add_synonyms_from_entities(
-            plain_text, entities, self.entity_synonyms
-        )
-
-        message = Message.build(plain_text, self.current_title)
-
-        if len(entities) > 0:
-            message.set("entities", entities)
-        return message
-
     def _set_current_section(self, section: Text, title: Text) -> None:
         """Update parsing mode."""
         if section not in AVAILABLE_SECTIONS:
diff --git a/rasa/nlu/training_data/formats/rasa_yaml.py b/rasa/nlu/training_data/formats/rasa_yaml.py
index f27f1908825a..cd003c210c78 100644
--- a/rasa/nlu/training_data/formats/rasa_yaml.py
+++ b/rasa/nlu/training_data/formats/rasa_yaml.py
@@ -13,6 +13,7 @@
     Optional,
 )
 
+from rasa import data
 from rasa.utils import validation
 from ruamel.yaml import YAMLError, StringIO
 
@@ -21,7 +22,6 @@
     DOCS_URL_TRAINING_DATA_NLU,
     LATEST_TRAINING_DATA_FORMAT_VERSION,
 )
-from rasa.data import YAML_FILE_EXTENSIONS
 from rasa.nlu.training_data.formats.readerwriter import (
     TrainingDataReader,
     TrainingDataWriter,
@@ -354,7 +354,7 @@ def is_yaml_nlu_file(filename: Text) -> bool:
             `True` if the `filename` is possibly a valid YAML NLU file,
             `False` otherwise.
         """
-        if Path(filename).suffix not in YAML_FILE_EXTENSIONS:
+        if not data.is_likely_yaml_file(filename):
             return False
 
         try:
diff --git a/rasa/nlu/training_data/synonyms_parser.py b/rasa/nlu/training_data/synonyms_parser.py
index 5d8aa1459c48..89744419c29a 100644
--- a/rasa/nlu/training_data/synonyms_parser.py
+++ b/rasa/nlu/training_data/synonyms_parser.py
@@ -1,4 +1,4 @@
-from typing import Text, List, Dict
+from typing import Any, Text, List, Dict
 
 from rasa.nlu.constants import (
     ENTITY_ATTRIBUTE_VALUE,
@@ -8,7 +8,7 @@
 
 
 def add_synonyms_from_entities(
-    plain_text: Text, entities: List[Dict], existing_synonyms: Dict
+    plain_text: Text, entities: List[Dict], existing_synonyms: Dict[Text, Any]
 ) -> None:
     """Adds synonyms found in intent examples.
 
@@ -25,7 +25,7 @@ def add_synonyms_from_entities(
 
 
 def add_synonym(
-    synonym_value: Text, synonym_name: Text, existing_synonyms: Dict
+    synonym_value: Text, synonym_name: Text, existing_synonyms: Dict[Text, Any]
 ) -> None:
     """Adds a new synonym mapping to the provided list of synonyms.
 
diff --git a/rasa/nlu/training_data/training_data.py b/rasa/nlu/training_data/training_data.py
index fef7e7d6e17b..ee57f0ae8142 100644
--- a/rasa/nlu/training_data/training_data.py
+++ b/rasa/nlu/training_data/training_data.py
@@ -7,11 +7,7 @@
 from os.path import relpath
 from typing import Any, Dict, List, Optional, Set, Text, Tuple, Callable
 
-from rasa.data import (
-    JSON_FILE_EXTENSIONS,
-    MARKDOWN_FILE_EXTENSIONS,
-    YAML_FILE_EXTENSIONS,
-)
+from rasa import data
 import rasa.nlu.utils
 from rasa.utils.common import raise_warning, lazy_property
 from rasa.nlu.constants import (
@@ -309,11 +305,11 @@ def nlu_as_yaml(self) -> Text:
 
     def persist_nlu(self, filename: Text = DEFAULT_TRAINING_DATA_OUTPUT_PATH) -> None:
 
-        if Path(filename).suffix in JSON_FILE_EXTENSIONS:
+        if data.is_likely_json_file(filename):
             rasa.nlu.utils.write_to_file(filename, self.nlu_as_json(indent=2))
-        elif Path(filename).suffix in MARKDOWN_FILE_EXTENSIONS:
+        elif data.is_likely_markdown_file(filename):
             rasa.nlu.utils.write_to_file(filename, self.nlu_as_markdown())
-        elif Path(filename).suffix in YAML_FILE_EXTENSIONS:
+        elif data.is_likely_yaml_file(filename):
             rasa.nlu.utils.write_to_file(filename, self.nlu_as_yaml())
         else:
             ValueError(
@@ -322,9 +318,9 @@ def persist_nlu(self, filename: Text = DEFAULT_TRAINING_DATA_OUTPUT_PATH) -> Non
             )
 
     def persist_nlg(self, filename: Text) -> None:
-        if Path(filename).suffix in YAML_FILE_EXTENSIONS:
+        if data.is_likely_yaml_file(filename):
             rasa.nlu.utils.write_to_file(filename, self.nlg_as_yaml())
-        elif Path(filename).suffix in MARKDOWN_FILE_EXTENSIONS:
+        elif data.is_likely_markdown_file(filename):
             nlg_serialized_data = self.nlg_as_markdown()
             if nlg_serialized_data:
                 rasa.nlu.utils.write_to_file(filename, nlg_serialized_data)
@@ -338,7 +334,7 @@ def persist_nlg(self, filename: Text) -> None:
     def get_nlg_persist_filename(nlu_filename: Text) -> Text:
 
         extension = Path(nlu_filename).suffix
-        if extension in JSON_FILE_EXTENSIONS:
+        if data.is_likely_json_file(nlu_filename):
             # backwards compatibility: previously NLG was always dumped as md. now
             # we are going to dump in the same format as the NLU data. unfortunately
             # there is a special case: NLU is in json format, in this case we use
diff --git a/rasa/utils/io.py b/rasa/utils/io.py
index d1a7743d1d3d..b2fd249b6943 100644
--- a/rasa/utils/io.py
+++ b/rasa/utils/io.py
@@ -245,6 +245,8 @@ def convert_to_ordered_dict(obj: Any) -> Any:
         An `OrderedDict` with all nested dictionaries converted if `obj` is a
         dictionary, otherwise the object itself.
     """
+    if isinstance(obj, OrderedDict):
+        return obj
     # use recursion on lists
     if isinstance(obj, list):
         return [convert_to_ordered_dict(element) for element in obj]
diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py
index 6035464e1c55..26a28534d0be 100644
--- a/rasa/utils/plotting.py
+++ b/rasa/utils/plotting.py
@@ -12,18 +12,34 @@
 logger = logging.getLogger(__name__)
 
 
-# At first, matplotlib will be initialized with default OS-specific available backend
-# if that didn't happen, we'll try to set it up manually
-if matplotlib.get_backend() is not None:
-    pass
-else:  # pragma: no cover
-    try:
-        # If the `tkinter` package is available, we can use the `TkAgg` backend
-        import tkinter
-
-        matplotlib.use("TkAgg")
-    except ImportError:
-        matplotlib.use("agg")
+def _fix_matplotlib_backend() -> None:
+    """Tries to fix a broken matplotlib backend..."""
+    # At first, matplotlib will be initialized with default OS-specific
+    # available backend
+    if matplotlib.get_backend() == "TkAgg":
+        try:
+            # on OSX sometimes the tkinter package is broken and can't be imported.
+            # we'll try to import it and if it fails we will use a different backend
+            import tkinter
+        except ImportError or ModuleNotFoundError:
+            logger.debug("Setting matplotlib backend to 'agg'")
+            matplotlib.use("agg")
+
+    # if no backend is set by default, we'll try to set it up manually
+    elif matplotlib.get_backend() is None:  # pragma: no cover
+        try:
+            # If the `tkinter` package is available, we can use the `TkAgg` backend
+            import tkinter
+
+            logger.debug("Setting matplotlib backend to 'TkAgg'")
+            matplotlib.use("TkAgg")
+        except ImportError or ModuleNotFoundError:
+            logger.debug("Setting matplotlib backend to 'agg'")
+            matplotlib.use("agg")
+
+
+# we call the fix as soon as this package gets imported
+_fix_matplotlib_backend()
 
 
 def plot_confusion_matrix(
@@ -52,7 +68,7 @@ def plot_confusion_matrix(
     import matplotlib.pyplot as plt
     from matplotlib.colors import LogNorm
 
-    zmax = confusion_matrix.max()
+    zmax = confusion_matrix.max() if len(confusion_matrix) else 1
     plt.clf()
     if not color_map:
         color_map = plt.cm.Blues
@@ -78,7 +94,7 @@ def plot_confusion_matrix(
     else:
         logger.info(f"Confusion matrix, without normalization: \n{confusion_matrix}")
 
-    thresh = confusion_matrix.max() / 2.0
+    thresh = zmax / 2.0
     for i, j in itertools.product(
         range(confusion_matrix.shape[0]), range(confusion_matrix.shape[1])
     ):
diff --git a/tests/core/test_data.py b/tests/core/test_data.py
index 7c1b2405c162..325cb6d4525b 100644
--- a/tests/core/test_data.py
+++ b/tests/core/test_data.py
@@ -48,8 +48,8 @@ def test_get_nlu_file(project):
 
 def test_get_core_nlu_files(project):
     data_dir = os.path.join(project, "data")
-    core_files, nlu_files = data.get_core_nlu_files([data_dir])
-
+    nlu_files = data.get_data_files([data_dir], data.is_nlu_file)
+    core_files = data.get_data_files([data_dir], data.is_story_file)
     assert len(nlu_files) == 1
     assert list(nlu_files)[0].endswith("nlu.yml")
 
@@ -162,7 +162,8 @@ def test_same_file_names_get_resolved(tmpdir):
 def test_find_nlu_files_with_different_formats(test_input, expected):
     examples_dir = "data/examples"
     data_dir = os.path.join(examples_dir, test_input)
-    core_files, nlu_files = data.get_core_nlu_files([data_dir])
+    nlu_files = data.get_data_files([data_dir], data.is_nlu_file)
+    core_files = data.get_data_files([data_dir], data.is_story_file)
     assert [Path(f) for f in nlu_files] == [Path(f) for f in expected]
 
 
@@ -193,5 +194,6 @@ def test_is_not_nlu_file_with_json():
 
 def test_get_story_file_with_yaml():
     examples_dir = "data/test_yaml_stories"
-    core_files, nlu_files = data.get_core_nlu_files([examples_dir])
+    nlu_files = data.get_data_files([examples_dir], data.is_nlu_file)
+    core_files = data.get_data_files([examples_dir], data.is_story_file)
     assert core_files
diff --git a/tests/core/test_dsl.py b/tests/core/test_dsl.py
index f04055439e42..e69de29bb2d1 100644
--- a/tests/core/test_dsl.py
+++ b/tests/core/test_dsl.py
@@ -1,127 +0,0 @@
-from typing import Text, Dict
-
-import pytest
-
-from rasa.core.events import UserUttered
-from rasa.core.training.dsl import EndToEndReader
-
-
-@pytest.mark.parametrize(
-    "line, expected",
-    [
-        (" greet: hi", {"intent": "greet", "true_intent": "greet", "text": "hi"}),
-        (
-            " greet: /greet",
-            {
-                "intent": "greet",
-                "true_intent": "greet",
-                "text": "/greet",
-                "entities": [],
-            },
-        ),
-        (
-            'greet: /greet{"test": "test"}',
-            {
-                "intent": "greet",
-                "entities": [
-                    {"entity": "test", "start": 6, "end": 22, "value": "test"}
-                ],
-                "true_intent": "greet",
-                "text": '/greet{"test": "test"}',
-            },
-        ),
-        (
-            'greet{"test": "test"}: /greet{"test": "test"}',
-            {
-                "intent": "greet",
-                "entities": [
-                    {"entity": "test", "start": 6, "end": 22, "value": "test"}
-                ],
-                "true_intent": "greet",
-                "text": '/greet{"test": "test"}',
-            },
-        ),
-        (
-            "mood_great: [great](feeling)",
-            {
-                "intent": "mood_great",
-                "entities": [
-                    {"start": 0, "end": 5, "value": "great", "entity": "feeling"}
-                ],
-                "true_intent": "mood_great",
-                "text": "great",
-            },
-        ),
-        (
-            'form: greet{"test": "test"}: /greet{"test": "test"}',
-            {
-                "intent": "greet",
-                "entities": [
-                    {"end": 22, "entity": "test", "start": 6, "value": "test"}
-                ],
-                "true_intent": "greet",
-                "text": '/greet{"test": "test"}',
-            },
-        ),
-    ],
-)
-def test_e2e_parsing(line: Text, expected: Dict):
-    reader = EndToEndReader()
-    actual = reader._parse_item(line)
-
-    assert actual.as_dict() == expected
-
-
-@pytest.mark.parametrize(
-    "parse_data, expected_story_string",
-    [
-        (
-            {
-                "text": "/simple",
-                "parse_data": {
-                    "intent": {"confidence": 1.0, "name": "simple"},
-                    "entities": [
-                        {"start": 0, "end": 5, "value": "great", "entity": "feeling"}
-                    ],
-                },
-            },
-            "simple: /simple",
-        ),
-        (
-            {
-                "text": "great",
-                "parse_data": {
-                    "intent": {"confidence": 1.0, "name": "simple"},
-                    "entities": [
-                        {"start": 0, "end": 5, "value": "great", "entity": "feeling"}
-                    ],
-                },
-            },
-            "simple: [great](feeling)",
-        ),
-        (
-            {
-                "text": "great",
-                "parse_data": {
-                    "intent": {"confidence": 1.0, "name": "simple"},
-                    "entities": [],
-                },
-            },
-            "simple: great",
-        ),
-    ],
-)
-def test_user_uttered_to_e2e(parse_data: Dict, expected_story_string: Text):
-    event = UserUttered.from_story_string("user", parse_data)[0]
-
-    assert isinstance(event, UserUttered)
-    assert event.as_story_string(e2e=True) == expected_story_string
-
-
-@pytest.mark.parametrize("line", [" greet{: hi"])
-def test_invalid_end_to_end_format(line: Text):
-    reader = EndToEndReader()
-
-    with pytest.raises(ValueError):
-        # noinspection PyProtectedMember
-        _ = reader._parse_item(line)
diff --git a/tests/core/test_evaluation.py b/tests/core/test_evaluation.py
index 89113d93e220..7041c28adbf7 100644
--- a/tests/core/test_evaluation.py
+++ b/tests/core/test_evaluation.py
@@ -57,7 +57,7 @@ async def test_end_to_end_evaluation_script(default_agent: Agent):
         END_TO_END_STORY_FILE, default_agent, use_e2e=True
     )
 
-    story_evaluation, num_stories = _collect_story_predictions(
+    story_evaluation, num_stories = await _collect_story_predictions(
         completed_trackers, default_agent, use_e2e=True
     )
 
@@ -94,7 +94,7 @@ async def test_end_to_end_evaluation_script_unknown_entity(default_agent: Agent)
         E2E_STORY_FILE_UNKNOWN_ENTITY, default_agent, use_e2e=True
     )
 
-    story_evaluation, num_stories = _collect_story_predictions(
+    story_evaluation, num_stories = await _collect_story_predictions(
         completed_trackers, default_agent, use_e2e=True
     )
 
@@ -108,7 +108,7 @@ async def test_end_to_evaluation_with_forms(form_bot_agent: Agent):
         "data/test_evaluations/form-end-to-end-stories.md", form_bot_agent, use_e2e=True
     )
 
-    story_evaluation, num_stories = _collect_story_predictions(
+    story_evaluation, num_stories = await _collect_story_predictions(
         test_stories, form_bot_agent, use_e2e=True
     )
 
@@ -146,7 +146,7 @@ async def test_end_to_evaluation_trips_circuit_breaker():
         E2E_STORY_FILE_TRIPS_CIRCUIT_BREAKER, agent, use_e2e=True
     )
 
-    story_evaluation, num_stories = _collect_story_predictions(
+    story_evaluation, num_stories = await _collect_story_predictions(
         test_stories, agent, use_e2e=True
     )
 
diff --git a/tests/core/test_processor.py b/tests/core/test_processor.py
index 92686727b1a6..77a60c79326d 100644
--- a/tests/core/test_processor.py
+++ b/tests/core/test_processor.py
@@ -69,14 +69,14 @@ async def test_message_id_logging(default_processor: MessageProcessor):
 
 async def test_parsing(default_processor: MessageProcessor):
     message = UserMessage('/greet{"name": "boy"}')
-    parsed = await default_processor._parse_message(message)
+    parsed = await default_processor.parse_message(message)
     assert parsed["intent"][INTENT_NAME_KEY] == "greet"
     assert parsed["entities"][0]["entity"] == "name"
 
 
 async def test_check_for_unseen_feature(default_processor: MessageProcessor):
     message = UserMessage('/dislike{"test_entity": "RASA"}')
-    parsed = await default_processor._parse_message(message)
+    parsed = await default_processor.parse_message(message)
     with pytest.warns(UserWarning) as record:
         default_processor._check_for_unseen_features(parsed)
     assert len(record) == 2
@@ -96,7 +96,7 @@ async def test_default_intent_recognized(
     default_processor: MessageProcessor, default_intent: Text
 ):
     message = UserMessage(default_intent)
-    parsed = await default_processor._parse_message(message)
+    parsed = await default_processor.parse_message(message)
     with pytest.warns(None) as record:
         default_processor._check_for_unseen_features(parsed)
     assert len(record) == 0
@@ -111,9 +111,7 @@ async def test_http_parsing():
 
         inter = RasaNLUHttpInterpreter(endpoint_config=endpoint)
         try:
-            await MessageProcessor(inter, None, None, None, None)._parse_message(
-                message
-            )
+            await MessageProcessor(inter, None, None, None, None).parse_message(message)
         except KeyError:
             pass  # logger looks for intent and entities, so we except
 
diff --git a/tests/core/test_training.py b/tests/core/test_training.py
index 08284c82e374..0e18a0910962 100644
--- a/tests/core/test_training.py
+++ b/tests/core/test_training.py
@@ -25,7 +25,7 @@ async def test_story_visualization(
     import rasa.core.training.loading as core_loading
 
     story_steps = await core_loading.load_data_from_resource(
-        "data/test_stories/stories.md", default_domain, interpreter=RegexInterpreter()
+        "data/test_stories/stories.md", default_domain
     )
     out_file = str(tmp_path / "graph.html")
     generated_graph = await visualize_stories(
@@ -51,7 +51,7 @@ async def test_story_visualization_with_merging(
     import rasa.core.training.loading as core_loading
 
     story_steps = await core_loading.load_data_from_resource(
-        stories_file, default_domain, interpreter=RegexInterpreter()
+        stories_file, default_domain
     )
     generated_graph = await visualize_stories(
         story_steps,
diff --git a/tests/core/test_visualization.py b/tests/core/test_visualization.py
index 6ad408f51786..e0fa22b7e5f9 100644
--- a/tests/core/test_visualization.py
+++ b/tests/core/test_visualization.py
@@ -89,7 +89,7 @@ async def test_graph_persistence(stories_file: Text, default_domain: Domain, tmp
     import rasa.core.training.loading as core_loading
 
     story_steps = await core_loading.load_data_from_resource(
-        stories_file, default_domain, RegexInterpreter()
+        stories_file, default_domain
     )
     out_file = tmpdir.join("graph.html").strpath
     generated_graph = await visualization.visualize_stories(
@@ -120,7 +120,7 @@ async def test_merge_nodes(stories_file: Text, default_domain: Domain, tmpdir):
     import rasa.core.training.loading as core_loading
 
     story_steps = await core_loading.load_data_from_resource(
-        stories_file, default_domain, RegexInterpreter()
+        stories_file, default_domain
     )
     out_file = tmpdir.join("graph.html").strpath
     await visualization.visualize_stories(
diff --git a/tests/core/training/story_reader/test_markdown_story_reader.py b/tests/core/training/story_reader/test_markdown_story_reader.py
index bf3e8665cd69..14c62aa7fc1b 100644
--- a/tests/core/training/story_reader/test_markdown_story_reader.py
+++ b/tests/core/training/story_reader/test_markdown_story_reader.py
@@ -1,3 +1,7 @@
+from typing import Dict, Text
+
+import pytest
+
 import rasa.utils.io
 from rasa.core import training
 from rasa.core.domain import Domain
@@ -9,7 +13,6 @@
     FormValidation,
     SlotSet,
 )
-from rasa.core.interpreter import RegexInterpreter
 from rasa.core.trackers import DialogueStateTracker
 from rasa.core.training import loading
 from rasa.core.training.story_reader.markdown_story_reader import MarkdownStoryReader
@@ -147,7 +150,7 @@ async def test_persist_form_story():
 
 
 async def test_read_stories_with_multiline_comments(tmpdir, default_domain: Domain):
-    reader = MarkdownStoryReader(RegexInterpreter(), default_domain)
+    reader = MarkdownStoryReader(default_domain)
 
     story_steps = await reader.read_from_file(
         "data/test_stories/stories_with_multiline_comments.md"
@@ -166,7 +169,7 @@ async def test_read_stories_with_multiline_comments(tmpdir, default_domain: Doma
 
 async def test_read_stories_with_rules(default_domain: Domain):
     story_steps = await loading.load_data_from_files(
-        ["data/test_stories/stories_with_rules.md"], default_domain, RegexInterpreter()
+        ["data/test_stories/stories_with_rules.md"], default_domain
     )
 
     # this file contains three rules and two ML stories
@@ -187,9 +190,7 @@ async def test_read_stories_with_rules(default_domain: Domain):
 
 async def test_read_rules_without_stories(default_domain: Domain):
     story_steps = await loading.load_data_from_files(
-        ["data/test_stories/rules_without_stories.md"],
-        default_domain,
-        RegexInterpreter(),
+        ["data/test_stories/rules_without_stories.md"], default_domain,
     )
 
     # this file contains three rules and two ML stories
@@ -219,3 +220,111 @@ async def test_read_rules_without_stories(default_domain: Domain):
         [{"entity": "some_slot", "start": 6, "end": 25, "value": "bla"}],
     )
     assert events[4] == ActionExecuted("loop_q_form")
+
+
+@pytest.mark.parametrize(
+    "line, expected",
+    [
+        (" greet: hi", {"intent": "greet", "text": "hi"}),
+        (" greet: /greet", {"intent": "greet", "text": "/greet", "entities": [],},),
+        (
+            'greet: /greet{"test": "test"}',
+            {
+                "intent": "greet",
+                "entities": [
+                    {"entity": "test", "start": 6, "end": 22, "value": "test"}
+                ],
+                "text": '/greet{"test": "test"}',
+            },
+        ),
+        (
+            'greet{"test": "test"}: /greet{"test": "test"}',
+            {
+                "intent": "greet",
+                "entities": [
+                    {"entity": "test", "start": 6, "end": 22, "value": "test"}
+                ],
+                "text": '/greet{"test": "test"}',
+            },
+        ),
+        (
+            "mood_great: [great](feeling)",
+            {
+                "intent": "mood_great",
+                "entities": [
+                    {"start": 0, "end": 5, "value": "great", "entity": "feeling"}
+                ],
+                "text": "great",
+            },
+        ),
+        (
+            'form: greet{"test": "test"}: /greet{"test": "test"}',
+            {
+                "intent": "greet",
+                "entities": [
+                    {"end": 22, "entity": "test", "start": 6, "value": "test"}
+                ],
+                "text": '/greet{"test": "test"}',
+            },
+        ),
+    ],
+)
+def test_e2e_parsing(line: Text, expected: Dict):
+    actual = MarkdownStoryReader.parse_e2e_message(line)
+
+    assert actual.as_dict() == expected
+
+
+@pytest.mark.parametrize(
+    "parse_data, expected_story_string",
+    [
+        (
+            {
+                "text": "/simple",
+                "parse_data": {
+                    "intent": {"confidence": 1.0, "name": "simple"},
+                    "entities": [
+                        {"start": 0, "end": 5, "value": "great", "entity": "feeling"}
+                    ],
+                },
+            },
+            "simple: /simple",
+        ),
+        (
+            {
+                "text": "great",
+                "parse_data": {
+                    "intent": {"confidence": 1.0, "name": "simple"},
+                    "entities": [
+                        {"start": 0, "end": 5, "value": "great", "entity": "feeling"}
+                    ],
+                },
+            },
+            "simple: [great](feeling)",
+        ),
+        (
+            {
+                "text": "great",
+                "parse_data": {
+                    "intent": {"confidence": 1.0, "name": "simple"},
+                    "entities": [],
+                },
+            },
+            "simple: great",
+        ),
+    ],
+)
+def test_user_uttered_to_e2e(parse_data: Dict, expected_story_string: Text):
+    event = UserUttered.from_story_string("user", parse_data)[0]
+
+    assert isinstance(event, UserUttered)
+    assert event.as_story_string(e2e=True) == expected_story_string
+
+
+@pytest.mark.parametrize("line", [" greet{: hi"])
+def test_invalid_end_to_end_format(line: Text):
+    reader = MarkdownStoryReader()
+
+    with pytest.raises(ValueError):
+        # noinspection PyProtectedMember
+        _ = reader.parse_e2e_message(line)
diff --git a/tests/core/training/story_reader/test_yaml_story_reader.py b/tests/core/training/story_reader/test_yaml_story_reader.py
index b91e357e4a9d..87518094b61c 100644
--- a/tests/core/training/story_reader/test_yaml_story_reader.py
+++ b/tests/core/training/story_reader/test_yaml_story_reader.py
@@ -18,9 +18,7 @@
 async def rule_steps_without_stories(default_domain: Domain) -> List[StoryStep]:
     yaml_file = "data/test_yaml_stories/rules_without_stories.yml"
 
-    return await loading.load_data_from_files(
-        [yaml_file], default_domain, RegexInterpreter()
-    )
+    return await loading.load_data_from_files([yaml_file], default_domain)
 
 
 async def test_can_read_test_story_with_slots(default_domain: Domain):
@@ -162,9 +160,7 @@ async def test_read_rules_with_stories(default_domain: Domain):
 
     yaml_file = "data/test_yaml_stories/stories_and_rules.yml"
 
-    steps = await loading.load_data_from_files(
-        [yaml_file], default_domain, RegexInterpreter()
-    )
+    steps = await loading.load_data_from_files([yaml_file], default_domain)
 
     ml_steps = [s for s in steps if not s.is_rule]
     rule_steps = [s for s in steps if s.is_rule]
@@ -260,7 +256,7 @@ async def test_warning_if_intent_not_in_domain(default_domain: Domain):
       - intent: definitely not in domain
     """
 
-    reader = YAMLStoryReader(RegexInterpreter(), default_domain)
+    reader = YAMLStoryReader(default_domain)
     yaml_content = io_utils.read_yaml(stories)
 
     with pytest.warns(UserWarning) as record:
@@ -279,7 +275,7 @@ async def test_no_warning_if_intent_in_domain(default_domain: Domain):
         f"  - intent: greet"
     )
 
-    reader = YAMLStoryReader(RegexInterpreter(), default_domain)
+    reader = YAMLStoryReader(default_domain)
     yaml_content = io_utils.read_yaml(stories)
 
     with pytest.warns(None) as record:
@@ -298,7 +294,7 @@ async def test_active_loop_is_parsed(default_domain: Domain):
         f"  - active_loop: null"
     )
 
-    reader = YAMLStoryReader(RegexInterpreter(), default_domain)
+    reader = YAMLStoryReader(default_domain)
     yaml_content = io_utils.read_yaml(stories)
 
     with pytest.warns(None) as record:
diff --git a/tests/core/training/story_writer/test_yaml_story_writer.py b/tests/core/training/story_writer/test_yaml_story_writer.py
index 7772e525c7cd..892a6e3a071f 100644
--- a/tests/core/training/story_writer/test_yaml_story_writer.py
+++ b/tests/core/training/story_writer/test_yaml_story_writer.py
@@ -25,18 +25,11 @@ async def test_simple_story(
 ):
 
     original_md_reader = MarkdownStoryReader(
-        RegexInterpreter(),
-        default_domain,
-        None,
-        False,
-        input_yaml_file,
-        unfold_or_utterances=False,
+        default_domain, None, False, input_yaml_file, unfold_or_utterances=False,
     )
     original_md_story_steps = await original_md_reader.read_from_file(input_md_file)
 
-    original_yaml_reader = YAMLStoryReader(
-        RegexInterpreter(), default_domain, None, False
-    )
+    original_yaml_reader = YAMLStoryReader(default_domain, None, False)
     original_yaml_story_steps = await original_yaml_reader.read_from_file(
         input_yaml_file
     )
@@ -45,9 +38,7 @@ async def test_simple_story(
     writer = YAMLStoryWriter()
     writer.dump(target_story_filename, original_md_story_steps)
 
-    processed_yaml_reader = YAMLStoryReader(
-        RegexInterpreter(), default_domain, None, False
-    )
+    processed_yaml_reader = YAMLStoryReader(default_domain, None, False)
     processed_yaml_story_steps = await processed_yaml_reader.read_from_file(
         target_story_filename
     )
@@ -61,7 +52,7 @@ async def test_simple_story(
 
 async def test_forms_are_skipped_with_warning(default_domain: Domain):
     original_md_reader = MarkdownStoryReader(
-        RegexInterpreter(), default_domain, None, False, unfold_or_utterances=False,
+        default_domain, None, False, unfold_or_utterances=False,
     )
     original_md_story_steps = await original_md_reader.read_from_file(
         "data/test_stories/stories_form.md"
diff --git a/tests/importers/test_multi_project.py b/tests/importers/test_multi_project.py
index bff2d51b6813..3bd197ce940e 100644
--- a/tests/importers/test_multi_project.py
+++ b/tests/importers/test_multi_project.py
@@ -214,10 +214,33 @@ def test_not_importing_not_relevant_additional_files(tmpdir_factory):
     assert not selector.is_imported(str(not_relevant_file2))
 
 
+@pytest.mark.parametrize(
+    "e2e_filename,e2e_story_test",
+    [
+        (
+            "test_conversations.yml",
+            """
+        test_conversations:
+        - story: story test
+          steps:
+          - user: hello
+            intent: greet
+          - action: utter_greet
+        """,
+        ),
+        (
+            "conversation_tests.md",
+            """
+        ## story test
+        * greet : "hello"
+            - utter_greet
+        """,
+        ),
+    ],
+)
 async def test_only_getting_e2e_conversation_tests_if_e2e_enabled(
-    tmpdir_factory: TempdirFactory,
+    tmpdir_factory: TempdirFactory, e2e_filename: Text, e2e_story_test: Text
 ):
-    from rasa.core.interpreter import RegexInterpreter
     from rasa.core.training.structures import StoryGraph
     import rasa.core.training.loading as core_loading
 
@@ -237,23 +260,15 @@ async def test_only_getting_e2e_conversation_tests_if_e2e_enabled(
     )
 
     e2e_story_test_file = (
-        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "conversation_tests.md"
-    )
-    e2e_story_test_file.write(
-        """
-        ## story test
-        * greet : "hello"
-            - utter_greet
-        """,
-        ensure=True,
+        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / e2e_filename
     )
+    e2e_story_test_file.write(e2e_story_test, ensure=True)
 
     selector = MultiProjectImporter(config_path)
 
     story_steps = await core_loading.load_data_from_resource(
         resource=str(e2e_story_test_file),
         domain=Domain.empty(),
-        interpreter=RegexInterpreter(),
         template_variables=None,
         use_e2e=True,
         exclusion_percentage=None,
@@ -278,24 +293,15 @@ def test_not_importing_e2e_conversation_tests_in_project(
     story_file.write("""## story""", ensure=True)
 
     e2e_story_test_file = (
-        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "conversation_tests.md"
+        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "test_conversations.yml"
     )
-    e2e_story_test_file.write("""## story test""", ensure=True)
+    e2e_story_test_file.write("""test_conversations:""", ensure=True)
 
     selector = MultiProjectImporter(config_path)
 
     # Conversation tests should not be included in story paths
-    expected = {
-        "story_paths": [str(story_file)],
-        "e2e_story_paths": [str(e2e_story_test_file)],
-    }
-
-    actual = {
-        "story_paths": selector._story_paths,
-        "e2e_story_paths": selector._e2e_story_paths,
-    }
-
-    assert expected == actual
+    assert [str(story_file)] == selector._story_paths
+    assert [str(e2e_story_test_file)] == selector._e2e_story_paths
 
 
 def test_single_additional_file(tmpdir_factory):
diff --git a/tests/test_data.py b/tests/test_data.py
index 22b3f9ad0052..6c39b4185ebd 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -34,7 +34,19 @@ def test_default_story_files_are_story_files():
         assert data.is_story_file(fn)
 
 
-def test_default_conversation_tests_are_conversation_tests(tmpdir: Path):
+def test_default_conversation_tests_are_conversation_tests_yml(tmpdir: Path):
+    parent = tmpdir / DEFAULT_E2E_TESTS_PATH
+    Path(parent).mkdir(parents=True)
+
+    e2e_path = parent / "test_conversations.yml"
+    e2e_story = """test_conversations:"""
+    write_text_file(e2e_story, e2e_path)
+
+    assert data.is_test_conversations_file(str(e2e_path))
+
+
+def test_default_conversation_tests_are_conversation_tests_md(tmpdir: Path):
+    # can be removed once conversation tests MD support is removed
     parent = tmpdir / DEFAULT_E2E_TESTS_PATH
     Path(parent).mkdir(parents=True)
 
@@ -42,7 +54,7 @@ def test_default_conversation_tests_are_conversation_tests(tmpdir: Path):
     e2e_story = """## my story test"""
     write_text_file(e2e_story, e2e_path)
 
-    assert data.is_end_to_end_conversation_test_file(str(e2e_path))
+    assert data.is_test_conversations_file(str(e2e_path))
 
 
 def test_nlu_data_files_are_not_conversation_tests(tmpdir: Path):
@@ -58,7 +70,7 @@ def test_nlu_data_files_are_not_conversation_tests(tmpdir: Path):
     """
     write_text_file(nlu_data, nlu_path)
 
-    assert not data.is_end_to_end_conversation_test_file(str(nlu_path))
+    assert not data.is_test_conversations_file(str(nlu_path))
 
 
 def test_domain_files_are_not_conversation_tests(tmpdir: Path):
@@ -67,4 +79,4 @@ def test_domain_files_are_not_conversation_tests(tmpdir: Path):
 
     domain_path = parent / "domain.yml"
 
-    assert not data.is_end_to_end_conversation_test_file(str(domain_path))
+    assert not data.is_test_conversations_file(str(domain_path))
diff --git a/tests/test_server.py b/tests/test_server.py
index 149786e5bf3e..12d5ae1a21bc 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -701,6 +701,7 @@ def test_evaluate_stories_end_to_end(
         "is_end_to_end_evaluation",
     }
     assert js["is_end_to_end_evaluation"]
+    assert js["actions"] != []
     assert set(js["actions"][0].keys()) == {
         "action",
         "predicted",

From 919864eefab19b181428c9a8b9fa15fc0ee8e9fd Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Thu, 20 Aug 2020 14:01:09 +0200
Subject: [PATCH 03/34] fixed linter issues

---
 rasa/core/test.py       | 2 +-
 tests/core/test_data.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index 821ca644e632..cb0800ba7d68 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -34,7 +34,7 @@
 CONFUSION_MATRIX_STORIES_FILE = "story_confusion_matrix.png"
 REPORT_STORIES_FILE = "story_report.json"
 FAILED_STORIES_FILE = "failed_conversations.yml"
-SUCCESSFUL_STORIES_FILE = "successful_conversations.md"
+SUCCESSFUL_STORIES_FILE = "successful_conversations.yml"
 
 
 logger = logging.getLogger(__name__)
diff --git a/tests/core/test_data.py b/tests/core/test_data.py
index 325cb6d4525b..5f209838f32e 100644
--- a/tests/core/test_data.py
+++ b/tests/core/test_data.py
@@ -163,7 +163,6 @@ def test_find_nlu_files_with_different_formats(test_input, expected):
     examples_dir = "data/examples"
     data_dir = os.path.join(examples_dir, test_input)
     nlu_files = data.get_data_files([data_dir], data.is_nlu_file)
-    core_files = data.get_data_files([data_dir], data.is_story_file)
     assert [Path(f) for f in nlu_files] == [Path(f) for f in expected]
 
 
@@ -194,6 +193,5 @@ def test_is_not_nlu_file_with_json():
 
 def test_get_story_file_with_yaml():
     examples_dir = "data/test_yaml_stories"
-    nlu_files = data.get_data_files([examples_dir], data.is_nlu_file)
     core_files = data.get_data_files([examples_dir], data.is_story_file)
     assert core_files

From ae13dab9344fae8a20c83abf510513933bcb9ad8 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Thu, 20 Aug 2020 17:25:14 +0200
Subject: [PATCH 04/34] renamed test_conversatiosn to stories

---
 docs/docs/chitchat-faqs.mdx                   |  4 +-
 examples/formbot/tests/test_conversations.yml |  2 +-
 .../tests/test_conversations.yml              |  2 +-
 rasa/constants.py                             |  1 +
 rasa/core/schemas/stories.yml                 | 49 ++-----------------
 rasa/core/test.py                             |  2 +-
 .../story_reader/markdown_story_reader.py     | 10 ++--
 .../story_reader/yaml_story_reader.py         | 36 ++++++--------
 .../story_writer/yaml_story_writer.py         | 23 ++-------
 rasa/data.py                                  | 12 ++---
 rasa/importers/multi_project.py               |  2 +-
 tests/importers/test_multi_project.py         |  4 +-
 tests/test_data.py                            | 10 ++--
 13 files changed, 48 insertions(+), 109 deletions(-)

diff --git a/docs/docs/chitchat-faqs.mdx b/docs/docs/chitchat-faqs.mdx
index bca96c02e717..c8310ec16004 100644
--- a/docs/docs/chitchat-faqs.mdx
+++ b/docs/docs/chitchat-faqs.mdx
@@ -165,7 +165,7 @@ The file `tests/test_conversations.yml` contains example test conversations. Del
 them with some test conversations for your assistant so far:
 
 ```yaml title="tests/test_conversations.yml"
-e2e_tests:
+stories:
 - story: greet and goodybe
   steps:
   - user: |
@@ -343,7 +343,7 @@ rasa shell
 At this stage it makes sense to add a few test cases for our conversations:
 
 ```yaml title="tests/test_conversations.yml"
-e2e_tests:
+stories:
 - story: ask channels
   steps:
   - user: |
diff --git a/examples/formbot/tests/test_conversations.yml b/examples/formbot/tests/test_conversations.yml
index 39c6c43cb118..10d26db64d27 100644
--- a/examples/formbot/tests/test_conversations.yml
+++ b/examples/formbot/tests/test_conversations.yml
@@ -1,4 +1,4 @@
-test_conversations:
+stories:
 - story: Happy path
   steps:
   - user: |
diff --git a/rasa/cli/initial_project/tests/test_conversations.yml b/rasa/cli/initial_project/tests/test_conversations.yml
index f08392fef8e4..d4e567ab9b16 100644
--- a/rasa/cli/initial_project/tests/test_conversations.yml
+++ b/rasa/cli/initial_project/tests/test_conversations.yml
@@ -1,7 +1,7 @@
 #### This file contains tests to evaluate that your bot behaves as expected.
 #### If you want to learn more, please see the docs: https://rasa.com/docs/rasa/user-guide/testing-your-assistant/
 
-test_conversations:
+stories:
 - story: happy path 1
   steps:
   - user: |
diff --git a/rasa/constants.py b/rasa/constants.py
index f59d68aa698a..feb264920795 100644
--- a/rasa/constants.py
+++ b/rasa/constants.py
@@ -8,6 +8,7 @@
 DEFAULT_MODELS_PATH = "models"
 DEFAULT_DATA_PATH = "data"
 DEFAULT_E2E_TESTS_PATH = "tests"
+DEFAULT_TEST_STORIES_FILE_PREFIX = "test_"
 DEFAULT_RESULTS_PATH = "results"
 DEFAULT_NLU_RESULTS_PATH = "nlu_comparison_results"
 DEFAULT_CORE_SUBDIRECTORY_NAME = "core"
diff --git a/rasa/core/schemas/stories.yml b/rasa/core/schemas/stories.yml
index a072437815ab..2725712dcaac 100644
--- a/rasa/core/schemas/stories.yml
+++ b/rasa/core/schemas/stories.yml
@@ -26,6 +26,10 @@ mapping:
                 type: "str"
                 required: True
                 allowempty: False
+              user:
+                type: "str"
+                required: False
+                allowempty: False
               entities: &entities
                 type: "seq"
                 matching: "any"
@@ -89,51 +93,6 @@ mapping:
                 sequence:
                 - type: "map"
                   mapping: *intent_and_entities
-  test_conversations:
-    type: "seq"
-    matching: "any"
-    sequence:
-    - type: "map"
-      mapping:
-        story:
-          type: "str"
-          allowempty: False
-        metadata:
-          type: "any"
-          required: False
-        steps:
-          type: "seq"
-          matching: "any"
-          sequence:
-          - type: "map"
-            mapping:
-              user:
-                type: "str"
-                required: False
-                allowempty: False
-              intent: *intent
-              entities: *entities
-          - type: "map"
-            mapping: *active_loop
-          - type: "map"
-            mapping: *action
-          - type: "map"
-            mapping: *slot_was_set_seq
-          - type: "map"
-            matching-rule: 'any'
-            mapping:
-              checkpoint:
-                type: "str"
-                allowempty: False
-              slot_was_set: *slot_was_set_seq_value
-          - type: "map"
-            mapping:
-              or:
-                type: "seq"
-                matching: "any"
-                sequence:
-                - type: "map"
-                  mapping: *intent_and_entities
   rules:
     type: "seq"
     matching: "any"
diff --git a/rasa/core/test.py b/rasa/core/test.py
index cb0800ba7d68..0183316ba862 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -557,7 +557,7 @@ def _log_stories(
                 tracker.as_story(include_source=True) for tracker in trackers
             ]  # TODO: revisit `include_source=True` - what do we need it for?
             steps = [step for story in stories for step in story.story_steps]
-            f.write(YAMLStoryWriter().dumps(steps, as_test_conversations=True))
+            f.write(YAMLStoryWriter().dumps(steps))
 
 
 async def test(
diff --git a/rasa/core/training/story_reader/markdown_story_reader.py b/rasa/core/training/story_reader/markdown_story_reader.py
index 0bc8d6c45502..5905aca4adc1 100644
--- a/rasa/core/training/story_reader/markdown_story_reader.py
+++ b/rasa/core/training/story_reader/markdown_story_reader.py
@@ -6,9 +6,9 @@
 from pathlib import PurePath, Path
 from typing import Dict, Optional, Text, List, Any, Union
 
+from rasa import data
 import rasa.data
 from rasa.nlu.training_data import Message
-from rasa.nlu.training_data.formats import MarkdownReader
 import rasa.utils.io as io_utils
 from rasa.constants import (
     DEFAULT_E2E_TESTS_PATH,
@@ -296,16 +296,16 @@ def is_markdown_story_file(file_path: Union[Text, Path]) -> bool:
             return False
 
     @staticmethod
-    def is_markdown_test_conversations_file(file_path: Union[Text, Path]) -> bool:
-        """Checks if a file is a test conversations file.
+    def is_markdown_test_stories_file(file_path: Union[Text, Path]) -> bool:
+        """Checks if a file contains test stories.
 
         Args:
             file_path: Path of the file which should be checked.
 
         Returns:
-            `True` if it's a conversation test file, otherwise `False`.
+            `True` if it's a file containing test stories, otherwise `False`.
         """
-        if Path(file_path).suffix not in MARKDOWN_FILE_EXTENSIONS:
+        if not data.is_likely_markdown_file(file_path):
             return False
 
         dirname = os.path.dirname(file_path)
diff --git a/rasa/core/training/story_reader/yaml_story_reader.py b/rasa/core/training/story_reader/yaml_story_reader.py
index 92d78cfc2c76..7b5513892967 100644
--- a/rasa/core/training/story_reader/yaml_story_reader.py
+++ b/rasa/core/training/story_reader/yaml_story_reader.py
@@ -8,7 +8,13 @@
 
 import rasa.utils.common as common_utils
 import rasa.utils.io as io_utils
-from rasa.constants import DOCS_URL_STORIES, DOCS_URL_RULES, DOCS_URL_TEST_CONVERSATIONS
+from rasa.constants import (
+    DEFAULT_E2E_TESTS_PATH,
+    DEFAULT_TEST_STORIES_FILE_PREFIX,
+    DOCS_URL_STORIES,
+    DOCS_URL_RULES,
+    DOCS_URL_TEST_CONVERSATIONS,
+)
 from rasa.core.constants import INTENT_MESSAGE_PREFIX
 from rasa.core.actions.action import RULE_SNIPPET_ACTION_NAME
 from rasa.core.events import UserUttered, SlotSet, Form
@@ -111,7 +117,6 @@ def read_from_parsed_yaml(
         for key, parser_class in {
             KEY_STORIES: StoryParser,
             KEY_RULES: RuleParser,
-            KEY_TEST_CONVERSATIONS: TestConversationsParser,
         }.items():
             data = parsed_content.get(key, [])
             parser = parser_class.from_reader(self)
@@ -149,7 +154,11 @@ def is_key_in_yaml(cls, file_path, *keys):
             return False
 
     @classmethod
-    def is_yaml_test_conversations_file(cls, file_path: Union[Text, Path]) -> bool:
+    def _has_test_prefix(cls, file_path):
+        return Path(file_path).name.startswith(DEFAULT_TEST_STORIES_FILE_PREFIX)
+
+    @classmethod
+    def is_yaml_test_stories_file(cls, file_path: Union[Text, Path]) -> bool:
         """Checks if a file is a test conversations file.
 
         Args:
@@ -158,9 +167,8 @@ def is_yaml_test_conversations_file(cls, file_path: Union[Text, Path]) -> bool:
         Returns:
             `True` if it's a conversation test file, otherwise `False`.
         """
-        return rasa.data.is_likely_yaml_file(file_path) and cls.is_key_in_yaml(
-            file_path, KEY_TEST_CONVERSATIONS
-        )
+
+        return cls._has_test_prefix(file_path) and cls.is_yaml_story_file(file_path)
 
     def get_steps(self) -> List[StoryStep]:
         self._add_current_stories_to_result()
@@ -435,22 +443,6 @@ def _get_docs_link(self) -> Text:
         return DOCS_URL_STORIES
 
 
-class TestConversationsParser(YAMLStoryReader):
-    """Encapsulate test conversation (e2e tests) specific parser behavior."""
-
-    def _new_part(self, item_name: Text, item: Dict[Text, Any]) -> None:
-        self._new_story_part(item_name, self.source_name)
-
-    def _get_item_title(self) -> Text:
-        return KEY_STORY_NAME
-
-    def _get_plural_item_title(self) -> Text:
-        return KEY_TEST_CONVERSATIONS
-
-    def _get_docs_link(self) -> Text:
-        return DOCS_URL_TEST_CONVERSATIONS
-
-
 class RuleParser(YAMLStoryReader):
     """Encapsulate rule-specific parser behavior."""
 
diff --git a/rasa/core/training/story_writer/yaml_story_writer.py b/rasa/core/training/story_writer/yaml_story_writer.py
index 02ae65c7b786..9dac8c4aa89d 100644
--- a/rasa/core/training/story_writer/yaml_story_writer.py
+++ b/rasa/core/training/story_writer/yaml_story_writer.py
@@ -40,49 +40,38 @@
 class YAMLStoryWriter:
     """Writes Core training data into a file in a YAML format. """
 
-    def dumps(
-        self, story_steps: List[StoryStep], as_test_conversations: bool = False
-    ) -> Text:
+    def dumps(self, story_steps: List[StoryStep]) -> Text:
         """Turns Story steps into a string.
 
         Args:
             story_steps: Original story steps to be converted to the YAML.
-            as_test_conversations: Decides which top level YAML key to use (stories
-                or test_conversations).
         Returns:
             String with story steps in the YAML format.
         """
         stream = ruamel_yaml.StringIO()
-        self.dump(stream, story_steps, as_test_conversations)
+        self.dump(stream, story_steps)
         return stream.getvalue()
 
     def dump(
         self,
         target: Union[Text, Path, ruamel_yaml.StringIO],
         story_steps: List[StoryStep],
-        as_test_conversations: bool = False,
     ) -> None:
         """Writes Story steps into a target file/stream.
 
         Args:
             target: name of the target file/stream to write the YAML to.
             story_steps: Original story steps to be converted to the YAML.
-            as_test_conversations: Decides which top level YAML key to use (stories
-                or test_conversations).
         """
-        result = self.stories_to_yaml(story_steps, as_test_conversations)
+        result = self.stories_to_yaml(story_steps)
 
         io_utils.write_yaml(result, target, True)
 
-    def stories_to_yaml(
-        self, story_steps: List[StoryStep], as_test_conversations: bool = False,
-    ) -> Dict[Text, Any]:
+    def stories_to_yaml(self, story_steps: List[StoryStep]) -> Dict[Text, Any]:
         """Converts a sequence of story steps into yaml format.
 
         Args:
             story_steps: Original story steps to be converted to the YAML.
-            as_test_conversations: Decides which top level YAML key to use (stories
-                or test_conversations).
         """
         from rasa.validator import KEY_TRAINING_DATA_FORMAT_VERSION
 
@@ -97,9 +86,7 @@ def stories_to_yaml(
             LATEST_TRAINING_DATA_FORMAT_VERSION
         )
 
-        data_key = KEY_TEST_CONVERSATIONS if as_test_conversations else KEY_STORIES
-
-        result[data_key] = stories
+        result[KEY_STORIES] = stories
         return result
 
     def process_story_step(self, story_step: StoryStep) -> Optional[OrderedDict]:
diff --git a/rasa/data.py b/rasa/data.py
index 37285228fa39..9c056b29813e 100644
--- a/rasa/data.py
+++ b/rasa/data.py
@@ -42,7 +42,7 @@ def get_test_directory(paths: Optional[Union[Text, List[Text]]],) -> Text:
     Returns:
         Path to temporary directory containing all found Core training files.
     """
-    test_files = get_data_files(paths, is_test_conversations_file)
+    test_files = get_data_files(paths, is_test_stories_file)
     return _copy_files_to_new_dir(test_files)
 
 
@@ -182,23 +182,23 @@ def is_story_file(file_path: Text) -> bool:
     ) or MarkdownStoryReader.is_markdown_story_file(file_path)
 
 
-def is_test_conversations_file(file_path: Text) -> bool:
-    """Checks if a file is an end-to-end conversation test file.
+def is_test_stories_file(file_path: Text) -> bool:
+    """Checks if a file is a test stories file.
 
     Args:
         file_path: Path of the file which should be checked.
 
     Returns:
-        `True` if it's a conversation test file, otherwise `False`.
+        `True` if it's a story file containing tests, otherwise `False`.
     """
     from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader
     from rasa.core.training.story_reader.markdown_story_reader import (
         MarkdownStoryReader,
     )
 
-    return YAMLStoryReader.is_yaml_test_conversations_file(
+    return YAMLStoryReader.is_yaml_story_file(
         file_path
-    ) or MarkdownStoryReader.is_markdown_test_conversations_file(file_path)
+    ) or MarkdownStoryReader.is_markdown_test_stories_file(file_path)
 
 
 def is_config_file(file_path: Text) -> bool:
diff --git a/rasa/importers/multi_project.py b/rasa/importers/multi_project.py
index 8fa772dbd153..ec1d2c1eebd4 100644
--- a/rasa/importers/multi_project.py
+++ b/rasa/importers/multi_project.py
@@ -93,7 +93,7 @@ def _init_from_directory(self, path: Text):
                     # Check next file
                     continue
 
-                if data.is_test_conversations_file(full_path):
+                if data.is_test_stories_file(full_path):
                     self._e2e_story_paths.append(full_path)
                 elif Domain.is_domain_file(full_path):
                     self._domain_paths.append(full_path)
diff --git a/tests/importers/test_multi_project.py b/tests/importers/test_multi_project.py
index 3bd197ce940e..9bab4b4b5554 100644
--- a/tests/importers/test_multi_project.py
+++ b/tests/importers/test_multi_project.py
@@ -220,7 +220,7 @@ def test_not_importing_not_relevant_additional_files(tmpdir_factory):
         (
             "test_conversations.yml",
             """
-        test_conversations:
+        stories:
         - story: story test
           steps:
           - user: hello
@@ -295,7 +295,7 @@ def test_not_importing_e2e_conversation_tests_in_project(
     e2e_story_test_file = (
         root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "test_conversations.yml"
     )
-    e2e_story_test_file.write("""test_conversations:""", ensure=True)
+    e2e_story_test_file.write("""stories:""", ensure=True)
 
     selector = MultiProjectImporter(config_path)
 
diff --git a/tests/test_data.py b/tests/test_data.py
index 6c39b4185ebd..a6f4404a5e25 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -39,10 +39,10 @@ def test_default_conversation_tests_are_conversation_tests_yml(tmpdir: Path):
     Path(parent).mkdir(parents=True)
 
     e2e_path = parent / "test_conversations.yml"
-    e2e_story = """test_conversations:"""
+    e2e_story = """stories:"""
     write_text_file(e2e_story, e2e_path)
 
-    assert data.is_test_conversations_file(str(e2e_path))
+    assert data.is_test_stories_file(str(e2e_path))
 
 
 def test_default_conversation_tests_are_conversation_tests_md(tmpdir: Path):
@@ -54,7 +54,7 @@ def test_default_conversation_tests_are_conversation_tests_md(tmpdir: Path):
     e2e_story = """## my story test"""
     write_text_file(e2e_story, e2e_path)
 
-    assert data.is_test_conversations_file(str(e2e_path))
+    assert data.is_test_stories_file(str(e2e_path))
 
 
 def test_nlu_data_files_are_not_conversation_tests(tmpdir: Path):
@@ -70,7 +70,7 @@ def test_nlu_data_files_are_not_conversation_tests(tmpdir: Path):
     """
     write_text_file(nlu_data, nlu_path)
 
-    assert not data.is_test_conversations_file(str(nlu_path))
+    assert not data.is_test_stories_file(str(nlu_path))
 
 
 def test_domain_files_are_not_conversation_tests(tmpdir: Path):
@@ -79,4 +79,4 @@ def test_domain_files_are_not_conversation_tests(tmpdir: Path):
 
     domain_path = parent / "domain.yml"
 
-    assert not data.is_test_conversations_file(str(domain_path))
+    assert not data.is_test_stories_file(str(domain_path))

From 87fcfdbb44d8ba3ced68537dbf83d6c38f5640ee Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Fri, 21 Aug 2020 11:09:37 +0200
Subject: [PATCH 05/34] fixed story reader detection

---
 rasa/core/training/story_reader/markdown_story_reader.py | 6 +++---
 rasa/core/training/story_writer/yaml_story_writer.py     | 7 +++----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/rasa/core/training/story_reader/markdown_story_reader.py b/rasa/core/training/story_reader/markdown_story_reader.py
index 5905aca4adc1..d01ef8622ea8 100644
--- a/rasa/core/training/story_reader/markdown_story_reader.py
+++ b/rasa/core/training/story_reader/markdown_story_reader.py
@@ -272,9 +272,9 @@ def is_markdown_story_file(file_path: Union[Text, Path]) -> bool:
             `True` in case the file is a Core Markdown training data or rule data file,
             `False` otherwise.
         """
-        suffix = PurePath(file_path).suffix
-
-        if suffix not in MARKDOWN_FILE_EXTENSIONS:
+        if not data.is_likely_markdown_file(file_path) or rasa.data.is_nlu_file(
+            file_path
+        ):
             return False
 
         try:
diff --git a/rasa/core/training/story_writer/yaml_story_writer.py b/rasa/core/training/story_writer/yaml_story_writer.py
index 9dac8c4aa89d..4cf0cffec331 100644
--- a/rasa/core/training/story_writer/yaml_story_writer.py
+++ b/rasa/core/training/story_writer/yaml_story_writer.py
@@ -192,14 +192,13 @@ def process_action(action: ActionExecuted) -> OrderedDict:
         Returns:
             Dict with an action.
         """
-        result = OrderedDict()
-        s = PlainScalarString(action.action_name)
+        result = CommentedMap()
+        result[KEY_ACTION] = action.action_name
 
         # TODO: this is a workaround to print predicted action...
         if hasattr(action, "comment"):
-            s.comment = f"  # {action.comment()}"
+            result.yaml_add_eol_comment(action.comment(), KEY_ACTION)
 
-        result[KEY_ACTION] = s
         return result
 
     @staticmethod

From e9586cfb958b88add6a9059e68ea1ad2e7efca34 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Fri, 21 Aug 2020 14:27:26 +0200
Subject: [PATCH 06/34] fixed remaining tests

---
 tests/core/test_evaluation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/core/test_evaluation.py b/tests/core/test_evaluation.py
index 7041c28adbf7..aa6b0d36f34e 100644
--- a/tests/core/test_evaluation.py
+++ b/tests/core/test_evaluation.py
@@ -129,7 +129,7 @@ async def test_source_in_failed_stories(tmpdir: Path, default_agent: Agent):
     failed_stories = rasa.utils.io.read_file(stories_path)
 
     assert (
-        f"## simple_story_with_unknown_entity ({E2E_STORY_FILE_UNKNOWN_ENTITY})"
+        f"story: simple_story_with_unknown_entity ({E2E_STORY_FILE_UNKNOWN_ENTITY})"
         in failed_stories
     )
 

From 1cf5cca86446837df1d1cc3fad2455f767ffacc8 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Fri, 21 Aug 2020 14:45:35 +0200
Subject: [PATCH 07/34] fixed type error

---
 rasa/core/training/story_reader/markdown_story_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/training/story_reader/markdown_story_reader.py b/rasa/core/training/story_reader/markdown_story_reader.py
index d01ef8622ea8..992300c7c79d 100644
--- a/rasa/core/training/story_reader/markdown_story_reader.py
+++ b/rasa/core/training/story_reader/markdown_story_reader.py
@@ -207,7 +207,7 @@ async def _add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> No
         self.current_step_builder.add_user_messages(parsed_messages)
 
     @staticmethod
-    def parse_e2e_message(line: Text) -> Optional["Message"]:
+    def parse_e2e_message(line: Text) -> "Message":
         f"""Parses an md list item line based on the current section type.
 
         Matches expressions of the form `<intent>:<example>. For the

From 4f1567d0d23a02e750dc72642c286245dfb61e65 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Mon, 24 Aug 2020 14:07:28 +0200
Subject: [PATCH 08/34] fixed some smaller style issues

---
 rasa/core/test.py |  6 ++----
 rasa/test.py      | 21 ++++++++-------------
 2 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index 0183316ba862..ae567dd12aea 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -551,11 +551,9 @@ def _log_stories(
         os.path.join(out_directory, filename), "w", encoding=DEFAULT_ENCODING
     ) as f:
         if not trackers:
-            f.write("# No stories found.")
+            f.write("# None of the test stories failed - all good!")
         else:
-            stories = [
-                tracker.as_story(include_source=True) for tracker in trackers
-            ]  # TODO: revisit `include_source=True` - what do we need it for?
+            stories = [tracker.as_story(include_source=True) for tracker in trackers]
             steps = [step for story in stories for step in story.story_steps]
             f.write(YAMLStoryWriter().dumps(steps))
 
diff --git a/rasa/test.py b/rasa/test.py
index 275f5646bdef..ead4d731600f 100644
--- a/rasa/test.py
+++ b/rasa/test.py
@@ -113,10 +113,10 @@ def test_core(
     endpoints: Optional[Text] = None,
     output: Text = DEFAULT_RESULTS_PATH,
     additional_arguments: Optional[Dict] = None,
-):
+) -> None:
     import rasa.core.utils as core_utils
     import rasa.model
-    from rasa.core.interpreter import RegexInterpreter, NaturalLanguageInterpreter
+    from rasa.core.interpreter import RegexInterpreter
     from rasa.core.agent import Agent
 
     _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints)
@@ -136,27 +136,22 @@ def test_core(
         )
         return
 
-    core_path, nlu_path = rasa.model.get_model_subdirectories(unpacked_model)
+    _agent = Agent.load(unpacked_model)
 
-    if not core_path:
+    if _agent.policy_ensemble is None:
         cli_utils.print_error(
             "Unable to test: could not find a Core model. Use 'rasa train' to train a "
             "Rasa model and provide it via the '--model' argument."
         )
 
-    use_e2e = additional_arguments.get("e2e", False)
-
-    _interpreter = RegexInterpreter()
-    if nlu_path:
-        _interpreter = NaturalLanguageInterpreter.create(_endpoints.nlu or nlu_path)
-    elif use_e2e:
+    if isinstance(_agent.interpreter, RegexInterpreter):
         cli_utils.print_warning(
             "No NLU model found. Using default 'RegexInterpreter' for end-to-end "
-            "evaluation."
+            "evaluation. If you added actual user messages to your test stories "
+            "this will likely lead to the tests failing. In that case, you need "
+            "to train a NLU model first, e.g. using `rasa train`."
         )
 
-    _agent = Agent.load(unpacked_model, interpreter=_interpreter)
-
     from rasa.core.test import test
 
     kwargs = utils.minimal_kwargs(additional_arguments, test, ["stories", "agent"])

From ab208e5e6d2e2ff4bae9cf37f72ab51eaab508f7 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Mon, 24 Aug 2020 17:47:23 +0200
Subject: [PATCH 09/34] added documentation

---
 docs/docs/chitchat-faqs.mdx          | 271 +++++++++++++++------------
 docs/docs/command-line-interface.mdx |  68 ++++---
 docs/docs/policies.mdx               |  23 ++-
 docs/docs/testing-your-assistant.mdx |  97 +++++-----
 docs/docs/training-data-format.mdx   | 118 ++++++------
 5 files changed, 330 insertions(+), 247 deletions(-)

diff --git a/docs/docs/chitchat-faqs.mdx b/docs/docs/chitchat-faqs.mdx
index 80f0cfaa2d64..5c802343479e 100644
--- a/docs/docs/chitchat-faqs.mdx
+++ b/docs/docs/chitchat-faqs.mdx
@@ -6,109 +6,71 @@ title: Chitchat and FAQs
 
 import useBaseUrl from '@docusaurus/useBaseUrl';
 
-<!-- TODO: Restructure all conversations patterns pages to not depend on one another -->
-
-After following the basics of [prototyping an assistant](./prototype-an-assistant.mdx), we'll
-now walk through building a basic FAQ chatbot and then build a bot that can handle
-contextual conversations.
-
-<a aria-hidden="true" tabIndex="-1" className="anchor enhancedAnchor" id="build-faq-assistant"></a>
-
 FAQ assistants are the simplest assistants to build and a good place to get started.
-These assistants allow the user to ask a simple question and get a response. We're going to
-build a basic FAQ assistant using features of Rasa designed specifically for this type of assistant.
+These assistants allow the user to ask a simple question and get a response. You're
+going to build a basic FAQ assistant using features of Rasa designed specifically
+for this type of assistant. You can handle chitchat in the same way.
 
-In this section we're going to cover the following topics:
+In this section you will read about the following topics:
 
-* [Responding to simple intents](./chitchat-faqs.mdx#respond-with-memoization-policy) with the MemoizationPolicy
+* [Responding to simple messages](./chitchat-faqs.mdx#responding-to-simple-messages)
+  with the `MemoizationPolicy`
 
-* [Handling FAQs](./chitchat-faqs.mdx#faqs-response-selector) using the ResponseSelector
+* [Handling FAQs](./chitchat-faqs.mdx#handling-faqs-using-a-response-selector)
+  using a `ResponseSelector`
 
-We're going to use content from [Sara](https://github.com/RasaHQ/rasa-demo), the Rasa
-assistant that, amongst other things, helps the user get started with the Rasa products.
-You should [first install Rasa](installation.mdx)
-and then [prototype an assistant](prototype-an-assistant.mdx)
-to make sure you know the basics.
+## Prerequisites
 
-To prepare for this tutorial, we're going to start a new Rasa project:
+You should [first install Rasa](installation.mdx) and
+[prototype an assistant](prototype-an-assistant.mdx) to make sure
+you understand the basic concepts of intents, stories and domains.
+
+For this tutorial, you can create a new Rasa project using the CLI:
 
 ```bash
 rasa init
 ```
 
-Let's remove the default content from this bot, so that the `data/nlu.yml`, `data/stories.yml`
-and `domain.yml` files are empty.
+If you want to start from scratch, you should remove the default content from
+the `data/nlu.yml`, `data/stories.yml` and `domain.yml` files.
+
+## Responding to simple messages
 
-<a aria-hidden="true" tabIndex="-1" className="anchor enhancedAnchor" id="respond-with-memoization-policy"></a>
+Responding to single messages requires the assistant to recognize the message and
+trigger a response.
 
-## Memoization Policy
+Rasa Open Source uses what is called a `MemoizationPolicy` to
+predict what it needs to do once it receives such a message.
 
-The MemoizationPolicy remembers examples from training stories for up to a `max_history`
-of turns. One “turn” includes the message sent by the user and any actions the
-assistant performed before waiting for the next message. For the purpose of a simple,
-context-less FAQ bot, we only need to pay attention to the last message the user sent,
-and therefore we'll set that to `1`.
+:::note MemoizationPolicy
+If the `policies` key in your `config.py` is empty (or only contains
+comments) the `MemoizationPolicy` will be added automatically using
+[config suggestions](model-configuration.mdx#suggested-config).
+You do not need to do configure any policies.
 
-You can do this by editing your configuration file as follows
-(you can remove `TEDPolicy` for now):
+If you customized your `policies`, you need to make sure the
+[Memoization Policy](policies.mdx#memoization-policy)
+is part of your configuration:
 
-```yaml title="config.yml"
+```yaml-rasa title="config.yml"
 policies:
 - name: MemoizationPolicy
   max_history: 1
-- name: MappingPolicy
-```
-
-:::note MappingPolicy
-The `MappingPolicy` is there because it handles the logic of the `/restart` intent,
-which allows you to clear the conversation history and start fresh.
-
-:::
-
-Now that we've defined our policies, we can add some stories for the `goodbye`, `thank` and `greet`
-intents to our stories:
-
-```yaml title="data/stories.yml"
-stories:
 
-- story: greet           # name of the story
-  steps:
-  - intent: greet        # intent of the user message
-  - action: utter_greet  # reaction of the bot
-
-- story: thank
-  steps:
-  - intent: thank
-  - action: utter_noworries
-
-- story: goodbye
-  steps:
-  - intent: bye
-  - action: utter_bye
+# ... your other policies
 ```
 
-We'll also need to add the intents, actions and responses to our domain:
-
-```yml title="domain.yml"
-intents:
-  - greet
-  - bye
-  - thank
-
-responses:
-  utter_noworries:
-    - text: No worries!
-
-  utter_greet:
-    - text: Hi
+:::
 
-  utter_bye:
-    - text: Bye!
-```
+### 1. Creating intents
 
-Finally, we'll copy over some user message training data from Sara to train our
-intents (more can be found [here](https://github.com/RasaHQ/rasa-demo/blob/master/data/nlu/nlu.md)):
+The first step is to define the messages you want the bot to handle. You can copy
+over some user message training data from Sara to train your
+intents. Sara is the Rasa assistant that helps users to get started with our
+Rasa products, you can find more training data to use in your projects
+[here](https://github.com/RasaHQ/rasa-demo/blob/master/data/nlu/nlu.md).
 
+Add the training data to your bot, you'll need to add it to the training data files:
 
 ```yaml-rasa title="data/nlu.yml"
 nlu:
@@ -145,6 +107,58 @@ nlu:
     - cheers
 ```
 
+### 2. Writing stories
+
+Now that you've defined your intents, you'll need to add some [stories](stories.mdx)
+for the `goodbye`, `thank` and `greet` intents.
+You can add the following stories to define how the bot will respond to the intents:
+
+```yaml-rasa title="data/stories.yml"
+stories:
+
+- story: greet           # name of the story
+  steps:
+  - intent: greet        # intent of the user message
+  - action: utter_greet  # reaction of the bot
+
+- story: thank
+  steps:
+  - intent: thank
+  - action: utter_noworries
+
+- story: goodbye
+  steps:
+  - intent: bye
+  - action: utter_bye
+```
+
+:::note Lots of intents
+Do you have lots of intents that you want to respond to? In that case you should
+check out [Handling FAQs](./chitchat-faqs.mdx#handling-faqs-using-a-response-selector).
+You'll avoid the need to specify one story for each of your intents.
+:::
+
+You'll need to tie all these pieces together by adding the intents, actions
+and responses to our [domain](domain.mdx):
+
+```yaml-rasa title="domain.yml"
+intents:
+- greet
+- bye
+- thank
+
+responses:
+  utter_noworries:
+  - text: No worries!
+
+  utter_greet:
+  - text: Hi
+
+  utter_bye:
+  - text: Bye!
+```
+
+### 3. Using the bot
 You can now train a first model and test the bot, by running the following commands:
 
 ```bash
@@ -158,13 +172,17 @@ For example:
 
 <img alt="Memoization Policy Conversation" src={useBaseUrl("/img/memoization_policy_convo.png")} />
 
-While it's good to test the bot interactively, we should also add end to end test cases that
-can later be included as part of a [CI/CD system](./setting-up-ci-cd). End-to-end [test conversations](./testing-your-assistant#end-to-end-testing)
-include NLU data, so that both components of Rasa can be tested.
-The file `tests/test_conversations.yml` contains example test conversations. Delete all the test conversations and replace
-them with some test conversations for your assistant so far:
+### 4. Testing the bot
+
+While it's good to test the bot interactively, you should also add story tests that
+can later be included as part of a [CI/CD system](./setting-up-ci-cd).
+[Story tests](./testing-your-assistant#end-to-end-testing)
+include user messages, bot actions and responses. This ensures that
+the dialogue handling is tested as well as the message handling.
+The initial project already contains test conversations, you can replace
+them with some test conversations for your assistant:
 
-```yaml title="tests/test_conversations.yml"
+```yaml-rasa title="tests/test_conversations.yml"
 stories:
 - story: greet and goodybe
   steps:
@@ -204,36 +222,50 @@ stories:
   - action: utter_bye
 ```
 
-To test our model against the test file, run the command:
+This test file contains three separate test stories. You can
+test your bot on all of them using `rasa test`:
 
 ```bash
 rasa test --stories tests/test_conversations.yml
 ```
 
-The test command will produce a directory named `results`. It should contain a file
+The test command will produce a directory named `results`. It will contain a file
 called `failed_stories.yml`, where any test cases that failed will be printed. It will
 also specify whether it was an NLU or Core prediction that went wrong.  As part of a
 CI/CD pipeline, the test option `--fail-on-prediction-errors` can be used to throw
-an exception that stops the pipeline.
+an exception that stops the pipeline:
+
+```bash
+rasa test --stories tests/test_conversations.yml --fail-on-prediction-errors
+```
 
-<a aria-hidden="true" tabIndex="-1" className="anchor enhancedAnchor" id="faqs-response-selector"></a>
+## Handling FAQs using a Response Selector
 
-## Response Selectors
+When you need to handle lots of different messages like FAQs or chitchat, the above
+approach using the `MemoizationPolicy` will become cumbersome. You will need to write
+one story for each of the different intents.
 
-The [ResponseSelector](components/selectors.mdx#responseselector) NLU component
-is designed to make it easier to handle conversation patterns like small talk and
-FAQ messages in a simple manner. By using the `ResponseSelector`, you only need one
-story to handle all FAQs, instead of adding new stories every time you want to
-increase your bot's scope.
+The [ResponseSelector](components/selectors.mdx#responseselector) is designed to
+make it easier to handle conversation patterns like small talk and FAQ messages.
+When you use the `ResponseSelector`, you only need one story to handle all FAQs,
+instead of adding one story for each intent.
+
+:::note More details on the Response Selector
+
+This walk through shows you how to use the response selector. If you want to know
+more about how it works under the hood, head over to this [blog post](https://blog.rasa.com/response-retrieval-models/) and the
+[Retrieval Actions](./retrieval-actions.mdx) page.
+:::
+
+### 1. Creating intents
 
 People often ask Sara different questions surrounding the Rasa products, so let's
 start with three intents: `ask_channels`, `ask_languages`, and `ask_rasax`.
-We're going to copy over some user messages from the
-[Sara training data](https://github.com/RasaHQ/rasa-demo/blob/master/data/nlu/nlu.md)
-into our training data. It's important that these intents have an `faq/` prefix, so
-they're recognized as the faq intent by the `ResponseSelector`:
 
-```yml title="data/nlu.yml"
+Similar to the `MemoizationPolicy`, you'll need to define the intents and add some
+training data for them:
+
+```yaml-rasa title="data/nlu.yml"
 nlu:
 - intent: faq/ask_channels
   examples: |
@@ -260,11 +292,19 @@ nlu:
     - Tell me about rasa x
     - tell me what is rasa x
 ```
+:::info FAQ/ prefix
+It's important that these intents have a common prefix that is separated by a `/`.
+E.g. in the above example, all intents share the `faq/` prefix. This is necessary
+for the intents to be recognized by the `ResponseSelector`.
+
+:::
 
-Next, we'll need to define the responses associated with these FAQs in a new
+### 2. Creating responses
+
+Next, you'll need to define the responses associated with these FAQs in a new
 file:
 
-```yaml title="data/responses.yml"
+```yaml-rasa title="data/responses.yml"
 responses:
   faq/ask_channels:
   - text: |
@@ -279,9 +319,11 @@ responses:
   - text: "Rasa X is a tool to learn from real conversations and improve your assistant. Read more [here](https://rasa.com/docs/rasa-x/)"
 ```
 
+### 3. Configuring the bot
+
 The `ResponseSelector` should already be at the end of your pipeline configuration:
 
-```yaml title="config.yml" {14-15}
+```yaml-rasa title="config.yml" {14-15}
 language: en
 pipeline:
   - name: WhitespaceTokenizer
@@ -303,7 +345,7 @@ Now that we've defined the message handling side, we need to make
 the dialogue handling parts aware of these changes. First, we need to add the
 new intents to our domain:
 
-```yaml title="domain.yml"
+```yaml-rasa title="domain.yml"
 intents:
   - greet
   - bye
@@ -316,14 +358,14 @@ which takes care of sending the response predicted from the `ResponseSelector`
 back to the user, to the list of actions. These actions always have to start
 with the `respond_` prefix:
 
-```yaml title="domain.yml"
+```yaml-rasa title="domain.yml"
 actions:
   - respond_faq
 ```
 
 Next we'll write a story so that the dialogue engine knows which action to predict:
 
-```yml title="data/stories.yml"
+```yaml-rasa title="data/stories.yml"
 stories:
 - story: Some question from FAQ
   steps:
@@ -333,6 +375,8 @@ stories:
 
 This prediction is handled by the `MemoizationPolicy`, as we described earlier.
 
+### 4. Using the bot
+
 After all of the changes are done, train a new model and test the modified FAQs:
 
 ```bash
@@ -340,9 +384,11 @@ rasa train
 rasa shell
 ```
 
+### 5. Testing the bot
+
 At this stage it makes sense to add a few test cases for our conversations:
 
-```yaml title="tests/test_conversations.yml"
+```yaml-rasa title="tests/test_conversations.yml"
 stories:
 - story: ask channels
   steps:
@@ -365,25 +411,20 @@ stories:
     intent: faq
   - action: respond_faq
 ```
-
-You can read more in this [blog post](https://blog.rasa.com/response-retrieval-models/) and the
-[Retrieval Actions](./retrieval-actions.mdx) page.
+### Response Selector Checklist
 
 Using the features we described in this tutorial, you can easily build a context-less assistant.
 
-:::note Checklist
-Here's a minimal checklist of files we modified to build a basic FAQ assistant:
+Here's a **minimal checklist of files you need to modify** to build a basic FAQ assistant:
 
-* `data/nlu.yml`: Add NLU training data for `faq/` intents
+* `data/nlu.yml`: Add user message training data for `faq/` intents
 
 * `data/responses.yml`: Add responses associated with `faq/` intents
 
 * `config.yml`: Add `ResponseSelector` in your NLU pipeline
 
-* `domain.yml`: Add a retrieval action `respond_faq` and intent `faq`
+* `domain.yml`: Add a retrieval action `respond_faq` and the intent `faq`
 
 * `data/stories.yml`: Add a simple story for FAQs
 
 * `tests/test_conversations.yml`: Add E2E test stories for your FAQs
-
-:::
diff --git a/docs/docs/command-line-interface.mdx b/docs/docs/command-line-interface.mdx
index 2c4afe9158a7..7e626767b340 100644
--- a/docs/docs/command-line-interface.mdx
+++ b/docs/docs/command-line-interface.mdx
@@ -25,7 +25,7 @@ The command line interface (CLI) gives you easy-to-remember commands for common
 |`rasa x`                |Launch Rasa X locally.                                                                                                                    |
 |`rasa -h`               |Shows all available commands.                                                                                                             |
 
-## Create a new project
+## rasa init
 
 A single command sets up a complete project for you with some example training data.
 
@@ -42,8 +42,8 @@ This creates the following files:
 ├── config.yml
 ├── credentials.yml
 ├── data
-│   ├── nlu.yml
-│   └── stories.yml
+│   ├── nlu.yml
+│   └── stories.yml
 ├── domain.yml
 ├── endpoints.yml
 ├── models
@@ -59,7 +59,7 @@ With this project setup, common commands are very easy to remember.
 To train a model, type `rasa train`, to talk to your model on the command line, `rasa shell`,
 to test your model type `rasa test`.
 
-## Train a Model
+## rasa train
 
 The main command is:
 
@@ -87,7 +87,7 @@ If training data for only one model type is present, the command automatically f
 
 :::
 
-## Interactive Learning
+## rasa interactive
 
 To start an interactive learning session with your assistant, run
 
@@ -114,9 +114,10 @@ The full list of arguments that can be set for `rasa interactive` is:
 ```text [rasa interactive --help]
 ```
 
-## Talk to your Assistant
+## rasa shell
 
-To start a chat session with your assistant on the command line, run:
+This command allows you to talk to your assistant. To start a chat session
+with your assistant on the command line, run:
 
 ```bash
 rasa shell
@@ -149,7 +150,7 @@ The full list of options for `rasa shell` is:
 ```text [rasa shell --help]
 ```
 
-## Start a Server
+## rasa run
 
 To start a server running your Rasa model, run:
 
@@ -175,7 +176,7 @@ The following arguments can be used to configure your Rasa server:
 For more information on the additional parameters, see [Model Storage](./model-storage.mdx).
 See the Rasa [HTTP API](./http-api.mdx) docs for detailed documentation of all the endpoints.
 
-## Start an Action Server
+## rasa run actions
 
 To run your action server run
 
@@ -188,7 +189,7 @@ The following arguments can be used to adapt the server settings:
 ```text [rasa run actions --help]
 ```
 
-## Visualize your Stories
+## rasa visualize
 
 To open a browser tab with a graph showing your stories:
 
@@ -204,7 +205,7 @@ Additional arguments are:
 ```text [rasa visualize --help]
 ```
 
-## Evaluating a Model on Test Data
+## rasa test
 
 To evaluate your model on test data, run:
 
@@ -212,23 +213,38 @@ To evaluate your model on test data, run:
 rasa test
 ```
 
-Specify the model to test using `--model`.
-Check out more details in [Evaluating an NLU Model](./testing-your-assistant.mdx#evaluating-an-nlu-model) and [Evaluating a Core Model](./testing-your-assistant.mdx#evaluating-a-core-model).
+Specify the model to test using `--model`. The above command will test your model
+end-to-end using test stories. You can evaluate the dialogue and nlu
+model separately using
+```bash
+rasa test core
+```
+and
+
+```bash
+rasa test nlu
+```
+
+Check out more details in
+[Evaluating an NLU Model](./testing-your-assistant.mdx#evaluating-an-nlu-model) and
+[Evaluating a Core Model](./testing-your-assistant.mdx#evaluating-a-core-model).
 
 The following arguments are available for `rasa test`:
 
 ```text [rasa test --help]
 ```
 
-## Create a Train-Test Split
+## rasa data split
 
-To create a split of your NLU data, run:
+This commands allows you to create a train-test split of your training data. To
+create a split of your NLU data, run:
 
 ```bash
 rasa data split nlu
 ```
 
-You can specify the training data, the fraction, and the output directory using the following arguments:
+You can specify the training data, the fraction, and the output directory using t
+he following arguments:
 
 ```text [rasa data split nlu --help]
 ```
@@ -243,10 +259,20 @@ ls train_test_split
       nlg_training_data.yml training_data.json
 ```
 
-## Convert Data Between Markdown and JSON
+## rasa data convert
+
+You cam convert NLU data from
+- LUIS data format,
+- WIT data format,
+- Dialogflow data format,
+- JSON, or
+- Markdown
+
+to
+- JSON or
+- Markdown.
 
-To convert NLU data from LUIS data format, WIT data format, Dialogflow data format, JSON, or Markdown
-to JSON or Markdown, run:
+You can start the converter by running:
 
 ```bash
 rasa data convert nlu
@@ -257,7 +283,7 @@ You can specify the input file, output file, and the output format with the foll
 ```text [rasa data convert nlu --help]
 ```
 
-## Export Conversations to an Event Broker
+## rasa export
 
 To export events from a tracker store using an event broker, run:
 
@@ -272,7 +298,7 @@ should be published.
 ```text [rasa export --help]
 ```
 
-## Start Rasa X
+## rasa x
 
 Rasa X is a toolset that helps you leverage conversations to improve your assistant.
 You can find more information about it <a className="reference external" href="https://rasa.com/docs/rasa-x/" target="_blank">here</a>.You can start Rasa X locally by executing
diff --git a/docs/docs/policies.mdx b/docs/docs/policies.mdx
index 463245d65dfe..58f46d81a7db 100644
--- a/docs/docs/policies.mdx
+++ b/docs/docs/policies.mdx
@@ -386,10 +386,25 @@ a fixed behavior. Please see [Rules](./rules.mdx) for further information.
 
 ## Memoization Policy
 
-The `MemoizationPolicy` just memorizes the conversations in your
-training data. It predicts the next action with confidence `1.0`
-if this exact conversation exists in the training data, otherwise it
-predicts `None` with confidence `0.0`.
+The `MemoizationPolicy` remembers the stories from your
+training data. It checks if the current conversation matches a story
+in the training data. If so, it will predict the next action from the matching
+story of your training data with a confidence of `1.0`. If no matching conversation
+is found, the policy predicts `None` with confidence `0.0`.
+
+When looking for a match in your training data, the policy will take the last
+`max_history` number of turns of the conversation into account.
+One “turn” includes the message sent by the user and any actions the
+assistant performed before waiting for the next message.
+
+You can configure the number of turns the `MemoizationPolicy` should use in your
+configuration:
+```yaml title="config.yml"
+policies:
+  - name: "MemoizationPolicy"
+    max_history: 3
+```
+
 
 ## Augmented Memoization Policy
 
diff --git a/docs/docs/testing-your-assistant.mdx b/docs/docs/testing-your-assistant.mdx
index 0cef197b153a..65ec5a83fde5 100644
--- a/docs/docs/testing-your-assistant.mdx
+++ b/docs/docs/testing-your-assistant.mdx
@@ -7,31 +7,35 @@ description: Test your Rasa Open Source assistant to validate and improve your c
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
+Rasa Open Source lets you test dialogues end-to-end by running through
+test stories. The test make sure that user messages are processed correctly
+and the dialogue predictions are correct. In addition to end-to-end tests, you can
+also test the dialogue handling (core) and the message processing (nlu)
+separately.
+
 :::note
 If you are looking to tune the hyperparameters of your NLU model,
 check out this [tutorial](https://blog.rasa.com/rasa-nlu-in-depth-part-3-hyperparameters/).
 
 :::
 
-
 ## End-to-End Testing
 
-Rasa Open Source lets you test dialogues end-to-end by running through
-test conversations and making sure that both NLU and Core make correct predictions.
+Testing your assistant requires you to write test stories, which include
+the user messages and the conversation history. The format is the same as the one
+used to specify stories in your training data.
 
-To do this, you need some stories in the end-to-end format,
-which includes both the NLU output and the original text.
 Here are some examples:
 
 <Tabs values={[{"label": "Basics", "value": "basics"}, {"label": "Custom Actions", "value": "customactions"}, {"label": "Forms Happy Path", "value": "formshappypath"}, {"label": "Forms Unhappy Path", "value": "formsunhappypath"}]} defaultValue="basics">
   <TabItem value="basics">
 
-  ```yaml-rasa
-  e2e_tests:
+  ```yaml-rasa title="tests/test_conversations.yml"
+  stories:
   - story: A basic end-to-end test
     steps:
     - user: |
-       hey
+        hello
       intent: greet
     - action: utter_ask_howcanhelp
     - user: |
@@ -39,41 +43,41 @@ Here are some examples:
       intent: inform
     - action: utter_ask_location
     - user: |
-       in [Paris]{"entity": "location"}
+        in [Paris]{"entity": "location"}
       intent: inform
     - action: utter_ask_price
   ```
 
   </TabItem>
   <TabItem value="customactions">
-  
-  ```yaml-rasa
-  e2e_tests:
-  - story: A test where a custom action returns events 
+
+  ```yaml-rasa title="tests/test_conversations.yml"
+  stories:
+  - story: A test where a custom action returns events
     steps:
     - user: |
-       hey
+        hey
       intent: greet
     - action: my_custom_action
     - slot_was_set:
-      - my_slot: "value added by custom action" 
-    - action: 
-      - utter_ask_age
+      - my_slot: "value added by custom action"
+    - action: utter_ask_age
     - user: |
-        thanks 
+        thanks
       intent: thankyou
     - action: utter_no_worries
   ```
 
+
   </TabItem>
   <TabItem value="formshappypath">
-  
-  ```yaml-rasa
-  e2e_tests:
+
+  ```yaml-rasa title="tests/test_conversations.yml"
+  stories:
   - story: A test conversation with a form
     steps:
     - user: |
-       hi
+        hi
       intent: greet
     - action: utter_greet
     - user: |
@@ -82,41 +86,41 @@ Here are some examples:
     - action: restaurant_form
     - active_loop: restaurant_form
     - user: |
-       [afghan](cuisine) food
+        [afghan](cuisine) food
       intent: inform
     - action: restaurant_form
     - active_loop: null
     - action: utter_slots_values
     - user: |
-        thanks 
+        thanks
       intent: thankyou
     - action: utter_no_worries
   ```
 
   </TabItem>
   <TabItem value="formsunhappypath">
-  
-  ```yaml-rasa
-  e2e_tests:
+
+  ```yaml-rasa title="tests/test_conversations.yml"
+  stories:
   - story: A test conversation with unexpected input during a form
     steps:
     - user: |
-       hi
+        hi
       intent: greet
     - action: utter_greet
     - user: |
-       im looking for a restaurant
+        im looking for a restaurant
       intent: request_restaurant
     - action: restaurant_form
     - active_loop: restaurant_form
     - user: |
-       can you share your boss with me? 
+        can you share your boss with me?
     - action: utter_chitchat
     - action: restaurant_form
     - active_loop: null
     - action: utter_slots_values
     - user: |
-        thanks 
+        thanks
       intent: thankyou
     - action: utter_no_worries
   ```
@@ -131,27 +135,19 @@ You can test your assistant against them by running:
 rasa test
 ```
 
-:::note
-[Custom Actions](./actions.mdx#custom-actions) are **not executed as part of end-to-end tests.** If your custom
-actions append any events to the tracker, this has to be reflected in your end-to-end
+:::info Custom Actions
+[Custom Actions](./custom-actions.mdx) are **not executed as part of end-to-end tests.** If your custom
+actions append any events to the conversation, this has to be reflected in your end-to-end
 tests (e.g. by adding `slot` events to your end-to-end story).
 
-To test your custom actions, write unit tests for them and include these
-tests in your CI/CD pipeline.
+If you want to test the code of your custom actions, you should write unit tests
+for them and include these tests in your CI/CD pipeline.
 
 :::
 
 If you have any questions or problems, please share them with us in the dedicated
 [testing section on our forum](https://forum.rasa.com/tags/testing) !
 
-:::note
-Make sure your model file in `models` is a combined `core`
-and `nlu` model. If it does not contain an NLU model, Core will use
-the default `RegexInterpreter`.
-
-:::
-
-
 ## Evaluating an NLU Model
 
 A standard technique in machine learning is to keep some data separate as a *test set*.
@@ -162,10 +158,11 @@ into train and test sets using:
 rasa data split nlu
 ```
 
-If you've done this, you can see how well your NLU model predicts the test cases using this command:
+If you've done this, you can see how well your NLU model predicts the
+test cases:
 
 ```bash
-rasa test nlu -u train_test_split/test_data.yml --model models/nlu-20180323-145833.tar.gz
+rasa test nlu --nlu train_test_split/test_data.yml
 ```
 
 If you don't want to create a separate test set, you can
@@ -173,13 +170,11 @@ still estimate how well your model generalises using cross-validation.
 To do this, add the flag `--cross-validation`:
 
 ```bash
-rasa test nlu -u data/nlu.ml --config config.yml --cross-validation
+rasa test nlu --nlu data/nlu.ml --cross-validation
 ```
 
-The full list of options for the script is:
-
-```text [rasa test nlu --help]
-```
+You can find the full list of options in the
+[CLI documentation on rasa test](command-line-interface.mdx#rasa-test)
 
 ### Comparing NLU Pipelines
 
diff --git a/docs/docs/training-data-format.mdx b/docs/docs/training-data-format.mdx
index 74476a4484a9..619b9eed5499 100644
--- a/docs/docs/training-data-format.mdx
+++ b/docs/docs/training-data-format.mdx
@@ -57,6 +57,18 @@ Here's a short example which keeps all training data in a single file:
 ```yaml-rasa
 version: "2.0"
 
+nlu:
+- intent: greet
+  examples: |
+    - Hey
+    - Hi
+    - hey there [Sara](name)
+
+- intent: faq/language
+  examples: |
+    - What language do you speak?
+    - Do you only handle english?
+
 stories:
 - story: greet and faq
   steps:
@@ -71,21 +83,13 @@ rules:
   - intent: greet
   - action: utter_greet
 
-nlu:
-- intent: greet
-  examples: |
-    - Hey
-    - Hi
-    - hey there [Sara](name)
-
-- intent: faq/language
-  examples: |
-    - What language do you speak?
-    - Do you only handle english?
+```
 
-e2e_tests:
+If you want to specify your test stories, you need to put them into a separate file:
+```yaml-rasa title="tests/test_conversations.yml"
+stories:
 - story: greet and ask language
-- steps: 
+- steps:
   - user: |
       hey
     intent: greet
@@ -95,7 +99,7 @@ e2e_tests:
     intent: faq/language
   - action: respond_faq
 ```
-
+They use the same format as the story training data.
 
 ## NLU Training Data
 
@@ -103,7 +107,7 @@ e2e_tests:
 **intent**, i.e. what the user is trying to convey or accomplish with their
 message. Training examples can also include **entities**. Entities are structured
 pieces of information that can be extracted from a user's message. You can also
-add extra information such as regular expressions and lookup tables to your 
+add extra information such as regular expressions and lookup tables to your
 training data to help the model identify intents and entities correctly.
 
 NLU training data is defined under the `nlu` key. Items that can be added under this key are:
@@ -122,7 +126,7 @@ nlu:
 - [Synonyms](#synonyms)
 
 ```yaml-rasa
-nlu: 
+nlu:
 - synonym: credit
   examples: |
     - credit card account
@@ -169,7 +173,7 @@ However, it's also possible to use an extended format if you have a custom NLU c
 ```yaml-rasa
 nlu:
 - intent: greet
-  examples: 
+  examples:
   - text: |
       hi
     metadata:
@@ -178,14 +182,14 @@ nlu:
       hey there!
 ```
 
-The `metadata` key can contain arbitrary key-value data that stays with an example and is accessible by the components in the NLU pipeline. In the example above, the sentiment of 
+The `metadata` key can contain arbitrary key-value data that stays with an example and is accessible by the components in the NLU pipeline. In the example above, the sentiment of
 the example could be used by a custom component in the pipeline for sentiment analysis.
 
 ### Entities
 
 [Entities](glossary.mdx#entity) are structured pieces of information that can be extracted from a user's message. For entity extraction to work, you need to either specify training data to train an ML model or you need to define [regular expressions](#regular-expressions-for-entity-extraction) to extract entities using the [`RegexEntityExtractor`](components/entity-extractors.mdx#regexentityextractor) based on a character pattern.
 
-Entities are annotated in training examples with minimally the entity's name. 
+Entities are annotated in training examples with minimally the entity's name.
 In addition to the entity name, you can annotate an entity with synonyms, roles, or groups.
 
 In training examples, entity annotation would look like this:
@@ -200,7 +204,7 @@ nlu:
 
 ```
 
-The full possible syntax for annotating an entity is: 
+The full possible syntax for annotating an entity is:
 
 ```text
 [<entity-text>]{"entity": "<entity name>", "role": "<role name>", "group": "<group name>", "value": "<entity synonym>"}
@@ -243,7 +247,7 @@ pipeline in your configuration file contains the
 should define synonyms when there are multiple ways users refer to the same
 thing.
 
-#### Example 
+#### Example
 
 Let's say you had an entity `account`, and you expect the
 value "credit". Your users also refer to their "credit" account as "credit
@@ -264,16 +268,16 @@ recognize these as entities and replace them with `credit`.
 
 ### Regular Expressions
 
-You can use regular expressions to improve intent classification and 
+You can use regular expressions to improve intent classification and
 entity extraction in combination with the [`RegexFeaturizer`](components/featurizers.mdx#regexfeaturizer) and [`RegexEntityExtractor`](components/entity-extractors.mdx#regexentityextractor) components in the pipeline.
 
-#### Regular Expressions for Intent Classification 
+#### Regular Expressions for Intent Classification
 
-You can use regular expressions to improve intent classification by including the `RegexFeaturizer` component in your pipeline. When using the `RegexFeaturizer`, a regex does not act as a rule for classifying an intent. It only provides a feature that the intent classifier will use 
+You can use regular expressions to improve intent classification by including the `RegexFeaturizer` component in your pipeline. When using the `RegexFeaturizer`, a regex does not act as a rule for classifying an intent. It only provides a feature that the intent classifier will use
 to learn patterns for intent classification.
-Currently, all intent classifiers make use of available regex features. 
+Currently, all intent classifiers make use of available regex features.
 
-The name of a regex in this case is a human readable description. It can help you remember what a regex is used for, and it is the title of the corresponding pattern feature. It does not have to match any intent or entity name. A regex for greeting might look like this: 
+The name of a regex in this case is a human readable description. It can help you remember what a regex is used for, and it is the title of the corresponding pattern feature. It does not have to match any intent or entity name. A regex for greeting might look like this:
 
 ```yaml-rasa
 nlu:
@@ -303,7 +307,7 @@ If your entity has a deterministic structure (like an account number), you can u
   `DIETClassifier` component. Other entity extractors, like
   `MitieEntityExtractor` or `SpacyEntityExtractor`, won't use the generated
   features and their presence will not improve entity recognition for
-  these extractors. 
+  these extractors.
 
 2. For rule-based entity extraction using the [`RegexEntityExtractor`](components/entity-extractors.mdx#regexentityextractor) component in the pipeline.
 
@@ -326,7 +330,7 @@ nlu:
 When using the `RegexFeaturizer`, a regular expression only provides a feature
 that helps the model learn an association between intents/entities and inputs
 that fit the regular expression. In order for the model to learn this association,
-you must provide example inputs that include that regular expression! 
+you must provide example inputs that include that regular expression!
 :::
 
 
@@ -379,16 +383,16 @@ model. [**Stories**](stories.mdx) are used to train a machine learning model
 to identify patterns in conversations and generalize to unseen conversation paths.
 **[Rules](rules.mdx)** describe parts of conversations that should always
 follow the same path and are used to train the
-[RulePolicy](policies.mdx#rule-policy). 
+[RulePolicy](policies.mdx#rule-policy).
 
 
 ### Stories
 
 Stories are composed of:
-  
-  - `story`: The story's name. The name is arbitrary and not used in training; 
+
+  - `story`: The story's name. The name is arbitrary and not used in training;
     you can use it as a human-readable reference for the story.
-  - `metadata`: arbitrary and optional, not used in training, 
+  - `metadata`: arbitrary and optional, not used in training,
     you can use it to store relevant information about the story
     like e.g. the author
   - a list of `steps`: The user messages and actions that make up the story
@@ -431,7 +435,7 @@ messages the users can send to mean the same thing.
 User messages follow the format:
 
 ```yaml-rasa
-stories: 
+stories:
 - story: user message structure
   steps:
     - intent: intent_name  # Required
@@ -505,8 +509,8 @@ A [form](glossary.mdx#form) is a specific kind of custom action that contains th
 a set of required slots and ask the user for this information. You
 [define a form](forms.mdx#defining-a-form) in the `forms` section in your domain.
 Once defined, the [happy path](glossary.mdx#happy-unhappy-paths)
-for a form should be specified as a [rule](forms.mdx), but interruptions of forms or 
-other "unhappy paths" should be included in stories so that the model can 
+for a form should be specified as a [rule](forms.mdx), but interruptions of forms or
+other "unhappy paths" should be included in stories so that the model can
 generalize to unseen conversation sequences.
 As a step in a story, a form takes the following basic format:
 
@@ -527,10 +531,10 @@ The `action` step activates the form and begins looping over the required slots.
 step indicates that there is a currently active form. Much like a `slot_was_set` step,
 a `form` step doesn't **set** a form to active but indicates that it should already be activated.
 In the same way, the  `active_loop: null` step indicates that no form should be active before the subsequent
-steps are taken. 
+steps are taken.
 
-A form can be interrupted and remain active; in this case the interruption should come after the 
-`action: <form to activate>` step and be followed by the `active_loop: <active form>` step. 
+A form can be interrupted and remain active; in this case the interruption should come after the
+`action: <form to activate>` step and be followed by the `active_loop: <active form>` step.
 An interruption of a form could look like this:
 
 ```yaml-rasa
@@ -549,10 +553,10 @@ stories:
 
 #### Slots
 
-A slot event is specified under the key `slot_was_set:` with the 
+A slot event is specified under the key `slot_was_set:` with the
 slot name and optionally the slot's value.
 
-**[Slots](domain.mdx#slots)** act as the bots memory. 
+**[Slots](domain.mdx#slots)** act as the bots memory.
 Slots are **set** by entities or by custom actions and **referenced**
 by stories in `slot_was_set` steps. For example:
 
@@ -594,7 +598,7 @@ action **before** the `slot_was_set` step.
 #### Checkpoints
 
 Checkpoints are specified with the `checkpoint:` key, either at the beginning
-or the end of a story. 
+or the end of a story.
 
 
 Checkpoints are ways to connect stories together. They can be either the first
@@ -627,7 +631,7 @@ stories:
   steps:
   - checkpoint: greet_checkpoint
     # This checkpoint should only apply if slots are set to the specified value
-    slots: 
+    slots:
     - context_scenario: holiday
     - holiday_name: thanksgiving
   - intent: greet
@@ -639,7 +643,7 @@ Checkpoints can help simplify your training data and reduce redundancy in it,
 but **do not overuse them**. Using lots of checkpoints can quickly make your
 stories hard to understand. It makes sense to use them if a sequence of steps
 is repeated often in different stories, but stories without checkpoints
-are easier to read and write. 
+are easier to read and write.
 
 #### OR statement
 
@@ -693,22 +697,16 @@ Read more about writing rules [here](rules.mdx#writing-a-rule).
 
 ## Test Conversations
 
-Test conversations combine both NLU and Core training data into a end-to-end story
-for evaluation.
-
-:::info Test Only
-This format is only used for end-to-end evaluation and cannot be used for training.
-:::
-
-Test conversations are listed under the `e2e_tests` key. 
-Their format is similar to the [story](#stories) format,
-except that user message steps can include a `user` key which specifies the actual
-text and entity annotation of the user message.
+Test conversations use stories to test your bot. The tests check if a message is
+classified correctly as well as the action predictions.
 
-Here's an example of a test conversation:
+Test stories use the same format as [stories](#stories),
+except that user message steps can include a `user` to specify the actual
+text and entity annotations of the user message. Here's an example of a
+test conversation:
 
 ```yaml-rasa
-e2e_tests:
+stories:
 - story: A basic end-to-end test
   steps:
   - user: |
@@ -724,3 +722,11 @@ e2e_tests:
     intent: inform
   - action: utter_ask_price
 ```
+
+Running the tests can be done using the CLI:
+```bash
+rasa test
+```
+
+If you want to know more about testing head over to
+[Testing Your Assistant](testing-your-assistant.mdx).

From 2f08a5191a454cf0bc7782925a73a8e588d233f9 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Tue, 25 Aug 2020 13:23:09 +0200
Subject: [PATCH 10/34] style improvements

---
 rasa/core/events/__init__.py                  | 11 +++++++++
 rasa/core/schemas/stories.yml                 |  4 ++--
 rasa/core/test.py                             | 19 ++++++++++-----
 rasa/core/trackers.py                         |  7 +++---
 .../story_reader/yaml_story_reader.py         | 22 ++++++++++++++---
 .../story_writer/yaml_story_writer.py         | 12 +++++-----
 rasa/data.py                                  | 24 +++++++++++++++++++
 rasa/server.py                                |  3 ++-
 8 files changed, 81 insertions(+), 21 deletions(-)

diff --git a/rasa/core/events/__init__.py b/rasa/core/events/__init__.py
index c063ed263714..034903f2c0a0 100644
--- a/rasa/core/events/__init__.py
+++ b/rasa/core/events/__init__.py
@@ -59,6 +59,17 @@ def deserialise_entities(entities: Union[Text, List[Any]]) -> List[Dict[Text, An
 def md_format_message(
     text: Text, intent: Optional[Text], entities: Union[Text, List[Any]]
 ) -> Text:
+    """Uses NLU parser information to generate a message with inline entity annotations.
+
+    Arguments:
+        text: text of the message
+        intent: intent of the message
+        entities: entities of the message
+
+    Return:
+        Message with entities annotated inline, e.g.
+        `I am from [Berlin]{"entity": "city"}`.
+    """
     from rasa.nlu.training_data.formats.readerwriter import TrainingDataWriter
     from rasa.nlu.training_data import entities_parser
 
diff --git a/rasa/core/schemas/stories.yml b/rasa/core/schemas/stories.yml
index 2725712dcaac..cc1f9e5c2f98 100644
--- a/rasa/core/schemas/stories.yml
+++ b/rasa/core/schemas/stories.yml
@@ -22,7 +22,7 @@ mapping:
           sequence:
           - type: "map"
             mapping: &intent_and_entities
-              intent: &intent
+              intent:
                 type: "str"
                 required: True
                 allowempty: False
@@ -30,7 +30,7 @@ mapping:
                 type: "str"
                 required: False
                 allowempty: False
-              entities: &entities
+              entities:
                 type: "seq"
                 matching: "any"
                 sequence:
diff --git a/rasa/core/test.py b/rasa/core/test.py
index ae567dd12aea..86d347da6ebf 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -144,11 +144,12 @@ def __init__(
         self.predicted_action = predicted_action
         super().__init__(correct_action, policy, confidence, timestamp=timestamp)
 
-    def comment(self) -> Text:
+    def inline_comment(self) -> Text:
+        """A comment attached to this event. Used during dumping."""
         return f"predicted: {self.predicted_action}"
 
     def as_story_string(self) -> Text:
-        return f"{self.action_name}   <!-- {self.comment()} -->"
+        return f"{self.action_name}   <!-- {self.inline_comment()} -->"
 
 
 class EndToEndUserUtterance(UserUttered):
@@ -188,7 +189,8 @@ def __init__(self, event: UserUttered, eval_store: EvaluationStore) -> None:
             event.input_channel,
         )
 
-    def comment(self) -> Text:
+    def inline_comment(self) -> Text:
+        """A comment attached to this event. Used during dumping."""
         from rasa.core.events import md_format_message
 
         predicted_message = md_format_message(
@@ -203,7 +205,8 @@ def as_story_string(self, e2e: bool = True) -> Text:
             self.text, self.intent.get("name"), self.entities
         )
         return (
-            f"{self.intent.get('name')}: {correct_message}   <!-- {self.comment()} -->"
+            f"{self.intent.get('name')}: {correct_message}   "
+            f"<!-- {self.inline_comment()} -->"
         )
 
 
@@ -286,7 +289,9 @@ def _collect_user_uttered_predictions(
         if fail_on_prediction_errors:
             raise ValueError(
                 "NLU model predicted a wrong intent. Failed Story:"
-                " \n\n{}".format(partial_tracker.export_stories())
+                " \n\n{}".format(
+                    YAMLStoryWriter().dumps(partial_tracker.as_story().story_steps)
+                )
             )
     else:
         end_to_end_user_utterance = EndToEndUserUtterance(
@@ -356,7 +361,9 @@ def _collect_action_executed_predictions(
         if fail_on_prediction_errors:
             error_msg = (
                 "Model predicted a wrong action. Failed Story: "
-                "\n\n{}".format(partial_tracker.export_stories())
+                "\n\n{}".format(
+                    YAMLStoryWriter().dumps(partial_tracker.as_story().story_steps)
+                )
             )
             if FormPolicy.__name__ in policy:
                 error_msg += (
diff --git a/rasa/core/trackers.py b/rasa/core/trackers.py
index d4bc17121045..4be86085c7be 100644
--- a/rasa/core/trackers.py
+++ b/rasa/core/trackers.py
@@ -547,12 +547,13 @@ def as_story(self, include_source: bool = False) -> "Story":
         )
         return Story.from_events(self.applied_events(), story_name)
 
-    def export_stories(
-        self, e2e: bool = False, include_source: bool = False
-    ) -> Text:  # TODO: this should not be used, deprecate, use yaml instead
+    def export_stories(self, e2e: bool = False, include_source: bool = False) -> Text:
         """Dump the tracker as a story in the Rasa Core story format.
 
         Returns the dumped tracker as a string."""
+        # TODO: we need to revisit all usages of this, the caller needs to specify
+        #       the format. this likely points to areas where we are not properly
+        #       handling markdown vs yaml
         story = self.as_story(include_source)
         return story.as_story_string(flat=True, e2e=e2e)
 
diff --git a/rasa/core/training/story_reader/yaml_story_reader.py b/rasa/core/training/story_reader/yaml_story_reader.py
index aa13b15b6881..39630a6b283f 100644
--- a/rasa/core/training/story_reader/yaml_story_reader.py
+++ b/rasa/core/training/story_reader/yaml_story_reader.py
@@ -141,7 +141,15 @@ def is_yaml_story_file(cls, file_path: Text) -> bool:
         )
 
     @classmethod
-    def is_key_in_yaml(cls, file_path, *keys):
+    def is_key_in_yaml(cls, file_path: Text, *keys: Text) -> bool:
+        """Check if all keys are contained in the parsed dictionary from a yaml file.
+
+        Arguments:
+            file_path: path to the yaml file
+            keys: keys to look for
+        Returns:
+              `True` if all the keys are contained in the file, `False` otherwise.
+        """
         try:
             content = io_utils.read_yaml_file(file_path)
             return any(key in content for key in keys)
@@ -154,7 +162,15 @@ def is_key_in_yaml(cls, file_path, *keys):
             return False
 
     @classmethod
-    def _has_test_prefix(cls, file_path):
+    def _has_test_prefix(cls, file_path: Text) -> bool:
+        """Check if the filename of a file at a path has a certain prefix.
+
+        Arguments:
+            file_path: path to the file
+
+        Returns:
+            `True` if the filename starts with the prefix, `False` otherwise.
+        """
         return Path(file_path).name.startswith(DEFAULT_TEST_STORIES_FILE_PREFIX)
 
     @classmethod
@@ -315,7 +331,7 @@ def _parse_or_statement(self, step: Dict[Text, Any]) -> None:
 
         self.current_step_builder.add_user_messages(utterances)
 
-    def _user_intent_from_step(self, step):
+    def _user_intent_from_step(self, step: Dict[Text, Any]) -> Text:
         user_intent = step.get(KEY_USER_INTENT, "").strip()
 
         if not user_intent:
diff --git a/rasa/core/training/story_writer/yaml_story_writer.py b/rasa/core/training/story_writer/yaml_story_writer.py
index cff66e986b00..8c355673c9ac 100644
--- a/rasa/core/training/story_writer/yaml_story_writer.py
+++ b/rasa/core/training/story_writer/yaml_story_writer.py
@@ -166,9 +166,10 @@ def process_user_utterance(user_utterance: UserUttered) -> OrderedDict:
         result = CommentedMap()
         result[KEY_USER_INTENT] = user_utterance.intent["name"]
 
-        # TODO: this is a workaround to print predicted intents / entities...
-        if hasattr(user_utterance, "comment"):
-            result.yaml_add_eol_comment(user_utterance.comment(), KEY_USER_INTENT)
+        if hasattr(user_utterance, "inline_comment"):
+            result.yaml_add_eol_comment(
+                user_utterance.inline_comment(), KEY_USER_INTENT
+            )
 
         if YAMLStoryWriter._text_is_real_message(user_utterance):
             result[KEY_USER_MESSAGE] = LiteralScalarString(user_utterance.text)
@@ -197,9 +198,8 @@ def process_action(action: ActionExecuted) -> OrderedDict:
         result = CommentedMap()
         result[KEY_ACTION] = action.action_name
 
-        # TODO: this is a workaround to print predicted action...
-        if hasattr(action, "comment"):
-            result.yaml_add_eol_comment(action.comment(), KEY_ACTION)
+        if hasattr(action, "inline_comment"):
+            result.yaml_add_eol_comment(action.inline_comment(), KEY_ACTION)
 
         return result
 
diff --git a/rasa/data.py b/rasa/data.py
index 9c056b29813e..473f8ca4b50f 100644
--- a/rasa/data.py
+++ b/rasa/data.py
@@ -22,14 +22,38 @@
 
 
 def is_likely_yaml_file(file_path: Text) -> bool:
+    """Check if a file likely contains yaml.
+
+    Arguments:
+        file_path: path to the file
+
+    Returns:
+        `True` if the file likely contains data in yaml format, `False` otherwise.
+    """
     return Path(file_path).suffix in YAML_FILE_EXTENSIONS
 
 
 def is_likely_json_file(file_path: Text) -> bool:
+    """Check if a file likely contains json.
+
+        Arguments:
+            file_path: path to the file
+
+        Returns:
+            `True` if the file likely contains data in json format, `False` otherwise.
+        """
     return Path(file_path).suffix in JSON_FILE_EXTENSIONS
 
 
 def is_likely_markdown_file(file_path: Text) -> bool:
+    """Check if a file likely contains markdown.
+
+        Arguments:
+            file_path: path to the file
+
+        Returns:
+            `True` if the file likely contains data in markdown format, `False` otherwise.
+        """
     return Path(file_path).suffix in MARKDOWN_FILE_EXTENSIONS
 
 
diff --git a/rasa/server.py b/rasa/server.py
index 4afdc8b1dc8c..13f4fd5e71cc 100644
--- a/rasa/server.py
+++ b/rasa/server.py
@@ -14,6 +14,7 @@
 
 from sanic.exceptions import InvalidUsage
 
+from rasa.core.training.story_writer.yaml_story_writer import YAMLStoryWriter
 from rasa.nlu.training_data.formats import RasaYAMLReader
 import rasa
 import rasa.core.utils
@@ -583,7 +584,7 @@ async def retrieve_story(request: Request, conversation_id: Text):
                 tracker = tracker.travel_back_in_time(until_time)
 
             # dump and return tracker
-            state = tracker.export_stories(e2e=True)
+            state = YAMLStoryWriter().dumps(tracker.as_story().story_steps)
             return response.text(state)
         except Exception as e:
             logger.debug(traceback.format_exc())

From 3aae77fe57ab091aac20bb1ede72a689266ece8c Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Tue, 25 Aug 2020 13:30:43 +0200
Subject: [PATCH 11/34] Update
 rasa/core/training/story_reader/markdown_story_reader.py

Co-authored-by: Ella Rohm-Ensing <erohmensing@gmail.com>
---
 rasa/core/training/story_reader/markdown_story_reader.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rasa/core/training/story_reader/markdown_story_reader.py b/rasa/core/training/story_reader/markdown_story_reader.py
index 992300c7c79d..202af93cebae 100644
--- a/rasa/core/training/story_reader/markdown_story_reader.py
+++ b/rasa/core/training/story_reader/markdown_story_reader.py
@@ -210,9 +210,9 @@ async def _add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> No
     def parse_e2e_message(line: Text) -> "Message":
         f"""Parses an md list item line based on the current section type.
 
-        Matches expressions of the form `<intent>:<example>. For the
-        syntax of <example> see the Rasa docs on NLU training data:
-        {DOCS_BASE_URL}/nlu/training-data-format/#markdown-format"""
+        Matches expressions of the form `<intent>:<example>`. For the
+        syntax of `<example>` see the Rasa docs on NLU training data:
+        {DOCS_BASE_URL}/training-data-format/#markdown-format"""
 
         # Match three groups:
         # 1) Potential "form" annotation

From a1f8d67ec83e1d092c08bbbdd594a8f48a61cd69 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Tue, 25 Aug 2020 13:54:15 +0200
Subject: [PATCH 12/34] adressed review comments (and linter error)

---
 docs/docs/command-line-interface.mdx          |  4 ++-
 docs/docs/testing-your-assistant.mdx          |  6 ++++-
 rasa/cli/test.py                              |  4 ---
 rasa/constants.py                             |  2 +-
 rasa/core/agent.py                            | 27 +------------------
 .../story_reader/markdown_story_reader.py     |  5 ++--
 rasa/data.py                                  |  1 +
 rasa/test.py                                  |  7 +----
 8 files changed, 15 insertions(+), 41 deletions(-)

diff --git a/docs/docs/command-line-interface.mdx b/docs/docs/command-line-interface.mdx
index 7e626767b340..7a02e9b9e310 100644
--- a/docs/docs/command-line-interface.mdx
+++ b/docs/docs/command-line-interface.mdx
@@ -308,7 +308,9 @@ rasa x
 ```
 
 To be able to start Rasa X you need to have Rasa X local mode installed
-and you need to be in a Rasa project.:::note
+and you need to be in a Rasa project.
+
+:::note
 By default Rasa X runs on the port 5002. Using the argument `--rasa-x-port` allows you to change it to
 any other port.
 
diff --git a/docs/docs/testing-your-assistant.mdx b/docs/docs/testing-your-assistant.mdx
index 65ec5a83fde5..05b0b2714e7e 100644
--- a/docs/docs/testing-your-assistant.mdx
+++ b/docs/docs/testing-your-assistant.mdx
@@ -135,6 +135,10 @@ You can test your assistant against them by running:
 rasa test
 ```
 
+The command will always load all stories from any story files, where there file
+name starts with `test_`, e.g. `test_conversations.yml`. Your story test
+file names should always start with `test_` for this detection to work.
+
 :::info Custom Actions
 [Custom Actions](./custom-actions.mdx) are **not executed as part of end-to-end tests.** If your custom
 actions append any events to the conversation, this has to be reflected in your end-to-end
@@ -170,7 +174,7 @@ still estimate how well your model generalises using cross-validation.
 To do this, add the flag `--cross-validation`:
 
 ```bash
-rasa test nlu --nlu data/nlu.ml --cross-validation
+rasa test nlu --nlu data/nlu.yml --cross-validation
 ```
 
 You can find the full list of options in the
diff --git a/rasa/cli/test.py b/rasa/cli/test.py
index a9b108f52416..d4618b4c4ea3 100644
--- a/rasa/cli/test.py
+++ b/rasa/cli/test.py
@@ -63,9 +63,6 @@ def run_core_test(args: argparse.Namespace) -> None:
     from rasa import data
     from rasa.test import test_core_models_in_directory, test_core, test_core_models
 
-    endpoints = cli_utils.get_validated_path(
-        args.endpoints, "endpoints", DEFAULT_ENDPOINTS_PATH, True
-    )
     stories = cli_utils.get_validated_path(args.stories, "stories", DEFAULT_DATA_PATH)
     stories = data.get_test_directory(stories)
     output = args.out or DEFAULT_RESULTS_PATH
@@ -87,7 +84,6 @@ def run_core_test(args: argparse.Namespace) -> None:
             test_core(
                 model=model_path,
                 stories=stories,
-                endpoints=endpoints,
                 output=output,
                 additional_arguments=vars(args),
             )
diff --git a/rasa/constants.py b/rasa/constants.py
index feb264920795..af77ee26afc7 100644
--- a/rasa/constants.py
+++ b/rasa/constants.py
@@ -51,7 +51,7 @@
 
 DOCS_BASE_URL_RASA_X = "https://rasa.com/docs/rasa-x"
 
-LEGACY_DOCS_BASE_URL = "http://legacy-docs.rasa.com"
+LEGACY_DOCS_BASE_URL = "https://legacy-docs-v1.rasa.com"
 
 CONFIG_KEYS_CORE = ["policies"]
 CONFIG_KEYS_NLU = ["language", "pipeline"]
diff --git a/rasa/core/agent.py b/rasa/core/agent.py
index f7efa5e4b465..28e9d7df157f 100644
--- a/rasa/core/agent.py
+++ b/rasa/core/agent.py
@@ -7,15 +7,12 @@
 from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Union
 
 import aiohttp
-from sanic import Sanic
 
 import rasa
 import rasa.utils.io
 import rasa.core.utils
 from rasa.constants import (
     DEFAULT_DOMAIN_PATH,
-    LEGACY_DOCS_BASE_URL,
-    ENV_SANIC_BACKLOG,
     DEFAULT_CORE_SUBDIRECTORY_NAME,
 )
 from rasa.core import constants, jobs, training
@@ -45,7 +42,7 @@
     get_model,
 )
 from rasa.nlu.utils import is_url
-from rasa.utils.common import raise_warning, update_sanic_log_level
+from rasa.utils.common import raise_warning
 from rasa.utils.endpoints import EndpointConfig
 
 logger = logging.getLogger(__name__)
@@ -707,28 +704,6 @@ def train(
         if not self.is_core_ready():
             raise AgentNotReady("Can't train without a policy ensemble.")
 
-        # deprecation tests
-        if kwargs.get("featurizer"):
-            raise Exception(
-                "Passing `featurizer` "
-                "to `agent.train(...)` is not supported anymore. "
-                "Pass appropriate featurizer directly "
-                "to the policy configuration instead. More info "
-                "{}/core/migrations.html".format(LEGACY_DOCS_BASE_URL)
-            )
-        if (
-            kwargs.get("epochs")
-            or kwargs.get("max_history")
-            or kwargs.get("batch_size")
-        ):
-            raise Exception(
-                "Passing policy configuration parameters "
-                "to `agent.train(...)` is not supported "
-                "anymore. Specify parameters directly in the "
-                "policy configuration instead. More info "
-                "{}/core/migrations.html".format(LEGACY_DOCS_BASE_URL)
-            )
-
         if isinstance(training_trackers, str):
             # the user most likely passed in a file name to load training
             # data from
diff --git a/rasa/core/training/story_reader/markdown_story_reader.py b/rasa/core/training/story_reader/markdown_story_reader.py
index 992300c7c79d..c9b71e09772f 100644
--- a/rasa/core/training/story_reader/markdown_story_reader.py
+++ b/rasa/core/training/story_reader/markdown_story_reader.py
@@ -15,6 +15,7 @@
     DOCS_BASE_URL,
     DOCS_URL_DOMAINS,
     DOCS_URL_STORIES,
+    LEGACY_DOCS_BASE_URL,
 )
 from rasa.core.constants import INTENT_MESSAGE_PREFIX
 from rasa.core.events import UserUttered
@@ -225,10 +226,10 @@ def parse_e2e_message(line: Text) -> "Message":
 
         if not match:
             raise ValueError(
-                "Encountered invalid end-to-end format for message "
+                "Encountered invalid test story format for message "
                 "`{}`. Please visit the documentation page on "
                 "end-to-end testing at {}/user-guide/testing-your-assistant/"
-                "#end-to-end-testing/".format(line, DOCS_BASE_URL)
+                "#end-to-end-testing/".format(line, LEGACY_DOCS_BASE_URL)
             )
         from rasa.nlu.training_data import entities_parser
 
diff --git a/rasa/data.py b/rasa/data.py
index 473f8ca4b50f..95dd541a3449 100644
--- a/rasa/data.py
+++ b/rasa/data.py
@@ -125,6 +125,7 @@ def get_data_files(
 
     Args:
         paths: List of paths to training files or folders containing them.
+        filter_property: property to use when filtering the paths, e.g. `is_nlu_file`.
 
     Returns:
         paths of training data files.
diff --git a/rasa/test.py b/rasa/test.py
index ead4d731600f..bdeee1ff796c 100644
--- a/rasa/test.py
+++ b/rasa/test.py
@@ -96,31 +96,26 @@ def test(
     model: Text,
     stories: Text,
     nlu_data: Text,
-    endpoints: Optional[Text] = None,
     output: Text = DEFAULT_RESULTS_PATH,
     additional_arguments: Optional[Dict] = None,
 ):
     if additional_arguments is None:
         additional_arguments = {}
 
-    test_core(model, stories, endpoints, output, additional_arguments)
+    test_core(model, stories, output, additional_arguments)
     test_nlu(model, nlu_data, output, additional_arguments)
 
 
 def test_core(
     model: Optional[Text] = None,
     stories: Optional[Text] = None,
-    endpoints: Optional[Text] = None,
     output: Text = DEFAULT_RESULTS_PATH,
     additional_arguments: Optional[Dict] = None,
 ) -> None:
-    import rasa.core.utils as core_utils
     import rasa.model
     from rasa.core.interpreter import RegexInterpreter
     from rasa.core.agent import Agent
 
-    _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints)
-
     if additional_arguments is None:
         additional_arguments = {}
 

From e231f061587a1e8517c40bdef88133a529052114 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Tue, 25 Aug 2020 13:57:14 +0200
Subject: [PATCH 13/34] added changelog item

---
 changelog/6457.improvement.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog/6457.improvement.md

diff --git a/changelog/6457.improvement.md b/changelog/6457.improvement.md
new file mode 100644
index 000000000000..821a5d33ec8d
--- /dev/null
+++ b/changelog/6457.improvement.md
@@ -0,0 +1 @@
+Support for test stories written in yaml format.

From 6c85cc1474fb5c5b2f1298b57e71a2493be09a0d Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Tue, 25 Aug 2020 14:23:30 +0200
Subject: [PATCH 14/34] fixed failing tests

---
 tests/test_test.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/tests/test_test.py b/tests/test_test.py
index 126ad90836d4..40bd9dc97879 100644
--- a/tests/test_test.py
+++ b/tests/test_test.py
@@ -135,27 +135,6 @@ def test_get_label_set(targets, exclude_label, expected):
     assert set(expected) == set(actual)
 
 
-async def test_interpreter_passed_to_agent(
-    monkeypatch: MonkeyPatch, trained_rasa_model: Text
-):
-    from rasa.test import test_core
-
-    # Patching is bit more complicated as we have a module `train` and function
-    # with the same name 😬
-    monkeypatch.setattr(
-        sys.modules["rasa.test"], "_test_core", asyncio.coroutine(lambda *_, **__: True)
-    )
-
-    agent_load = Mock()
-    monkeypatch.setattr(Agent, "load", agent_load)
-
-    test_core(trained_rasa_model)
-
-    agent_load.assert_called_once()
-    _, _, kwargs = agent_load.mock_calls[0]
-    assert isinstance(kwargs["interpreter"], RasaNLUInterpreter)
-
-
 async def test_e2e_warning_if_no_nlu_model(
     monkeypatch: MonkeyPatch, trained_core_model: Text, capsys: CaptureFixture
 ):
@@ -167,13 +146,6 @@ async def test_e2e_warning_if_no_nlu_model(
         sys.modules["rasa.test"], "_test_core", asyncio.coroutine(lambda *_, **__: True)
     )
 
-    agent_load = Mock()
-    monkeypatch.setattr(Agent, "load", agent_load)
-
     test_core(trained_core_model, additional_arguments={"e2e": True})
 
     assert "No NLU model found. Using default" in capsys.readouterr().out
-
-    agent_load.assert_called_once()
-    _, _, kwargs = agent_load.mock_calls[0]
-    assert isinstance(kwargs["interpreter"], RegexInterpreter)

From 0bad210f785241a7cfa16b8f239897c3b1ff8697 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Tue, 25 Aug 2020 23:39:40 +0200
Subject: [PATCH 15/34] added tests

---
 rasa/core/test.py                             | 30 +++++-----
 .../nlu/training_data/formats/readerwriter.py | 28 +++++-----
 tests/core/test_events.py                     | 41 ++++++++++++++
 .../story_reader/test_yaml_story_reader.py    | 26 ++++++++-
 .../story_writer/test_yaml_story_writer.py    | 53 ++++++++++++++++++
 .../nlu/training_data/test_entities_parser.py | 23 ++++++++
 tests/test_data.py                            | 42 ++++++++++++++
 tests/test_test.py                            | 56 +++++++++++++++++++
 8 files changed, 270 insertions(+), 29 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index 86d347da6ebf..ad0c235712f2 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -139,14 +139,20 @@ class WronglyPredictedAction(ActionExecuted):
     type_name = "wrong_action"
 
     def __init__(
-        self, correct_action, predicted_action, policy, confidence, timestamp=None
+        self,
+        action_name_target: Text,
+        action_name_prediction: Text,
+        policy: Optional[Text] = None,
+        confidence: Optional[float] = None,
+        timestamp: Optional[float] = None,
+        metadata: Optional[Dict] = None,
     ) -> None:
-        self.predicted_action = predicted_action
-        super().__init__(correct_action, policy, confidence, timestamp=timestamp)
+        self.action_name_prediction = action_name_prediction
+        super().__init__(action_name_target, policy, confidence, timestamp, metadata)
 
     def inline_comment(self) -> Text:
         """A comment attached to this event. Used during dumping."""
-        return f"predicted: {self.predicted_action}"
+        return f"predicted: {self.action_name_prediction}"
 
     def as_story_string(self) -> Text:
         return f"{self.action_name}   <!-- {self.inline_comment()} -->"
@@ -547,16 +553,10 @@ async def _collect_story_predictions(
     )
 
 
-def _log_stories(
-    trackers: List[DialogueStateTracker], filename: Text, out_directory: Text
-) -> None:
+def _log_stories(trackers: List[DialogueStateTracker], file_path: Text) -> None:
     """Write given stories to the given file."""
-    if not out_directory:
-        return
 
-    with open(
-        os.path.join(out_directory, filename), "w", encoding=DEFAULT_ENCODING
-    ) as f:
+    with open(file_path, "w", encoding=DEFAULT_ENCODING) as f:
         if not trackers:
             f.write("# None of the test stories failed - all good!")
         else:
@@ -644,11 +644,13 @@ async def test(
 
     if errors:
         _log_stories(
-            story_evaluation.failed_stories, FAILED_STORIES_FILE, out_directory
+            story_evaluation.failed_stories,
+            os.path.join(out_directory, FAILED_STORIES_FILE),
         )
     if successes:
         _log_stories(
-            story_evaluation.successful_stories, SUCCESSFUL_STORIES_FILE, out_directory
+            story_evaluation.successful_stories,
+            os.path.join(out_directory, SUCCESSFUL_STORIES_FILE),
         )
 
     return {
diff --git a/rasa/nlu/training_data/formats/readerwriter.py b/rasa/nlu/training_data/formats/readerwriter.py
index d8763bfc676c..1816b7e24bb7 100644
--- a/rasa/nlu/training_data/formats/readerwriter.py
+++ b/rasa/nlu/training_data/formats/readerwriter.py
@@ -117,20 +117,20 @@ def generate_entity(text: Text, entity: Dict[Text, Any]) -> Text:
 
         if use_short_syntax:
             return f"[{entity_text}]({entity_type})"
-
-        entity_dict = OrderedDict(
-            [
-                (ENTITY_ATTRIBUTE_TYPE, entity_type),
-                (ENTITY_ATTRIBUTE_ROLE, entity_role),
-                (ENTITY_ATTRIBUTE_GROUP, entity_group),
-                (ENTITY_ATTRIBUTE_VALUE, entity_value),
-            ]
-        )
-        entity_dict = OrderedDict(
-            [(k, v) for k, v in entity_dict.items() if v is not None]
-        )
-
-        return f"[{entity_text}]{json.dumps(entity_dict)}"
+        else:
+            entity_dict = OrderedDict(
+                [
+                    (ENTITY_ATTRIBUTE_TYPE, entity_type),
+                    (ENTITY_ATTRIBUTE_ROLE, entity_role),
+                    (ENTITY_ATTRIBUTE_GROUP, entity_group),
+                    (ENTITY_ATTRIBUTE_VALUE, entity_value),
+                ]
+            )
+            entity_dict = OrderedDict(
+                [(k, v) for k, v in entity_dict.items() if v is not None]
+            )
+
+            return f"[{entity_text}]{json.dumps(entity_dict)}"
 
 
 class JsonTrainingDataReader(TrainingDataReader):
diff --git a/tests/core/test_events.py b/tests/core/test_events.py
index dcc34455a95d..0faf88f90a73 100644
--- a/tests/core/test_events.py
+++ b/tests/core/test_events.py
@@ -26,6 +26,7 @@
     UserUtteranceReverted,
     AgentUttered,
     SessionStarted,
+    md_format_message,
 )
 
 
@@ -300,3 +301,43 @@ def test_event_default_metadata(event_class: Type[Event]):
         assert event.as_dict()["metadata"] == {}
     else:
         assert "metadata" not in event.as_dict()
+
+
+def test_md_format_message():
+    assert (
+        md_format_message("Hello there!", intent="greet", entities=[]) == "Hello there!"
+    )
+
+
+def test_md_format_message_empty():
+    assert md_format_message("", intent=None, entities=[]) == ""
+
+
+def test_md_format_message_using_short_entity_syntax():
+    formatted = md_format_message(
+        "I am from Berlin.",
+        intent="location",
+        entities=[{"start": 10, "end": 16, "entity": "city", "value": "Berlin"}],
+    )
+    assert formatted == """I am from [Berlin](city)."""
+
+
+def test_md_format_message_using_long_entity_syntax():
+    formatted = md_format_message(
+        "I am from Berlin in Germany.",
+        intent="location",
+        entities=[
+            {"start": 10, "end": 16, "entity": "city", "value": "Berlin"},
+            {
+                "start": 20,
+                "end": 27,
+                "entity": "country",
+                "value": "Germany",
+                "role": "destination",
+            },
+        ],
+    )
+    assert (
+        formatted
+        == """I am from [Berlin](city) in [Germany]{"entity": "country", "role": "destination"}."""
+    )
diff --git a/tests/core/training/story_reader/test_yaml_story_reader.py b/tests/core/training/story_reader/test_yaml_story_reader.py
index 23349bd5b105..bc0761592275 100644
--- a/tests/core/training/story_reader/test_yaml_story_reader.py
+++ b/tests/core/training/story_reader/test_yaml_story_reader.py
@@ -1,14 +1,15 @@
+from pathlib import Path
 from typing import Text, List
 
 import pytest
 
+import rasa.utils.io
 from rasa.constants import LATEST_TRAINING_DATA_FORMAT_VERSION
 from rasa.core import training
 from rasa.core.actions.action import RULE_SNIPPET_ACTION_NAME
 from rasa.core.domain import Domain
 from rasa.core.training import loading
 from rasa.core.events import ActionExecuted, UserUttered, SlotSet, ActiveLoop
-from rasa.core.interpreter import RegexInterpreter
 from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader
 from rasa.core.training.structures import StoryStep
 from rasa.utils import io as io_utils
@@ -301,3 +302,26 @@ async def test_active_loop_is_parsed(default_domain: Domain):
         reader.read_from_parsed_yaml(yaml_content)
 
     assert not len(record)
+
+
+def test_is_test_story_file(tmp_path: Path):
+    path = str(tmp_path / "test_stories.yml")
+    rasa.utils.io.write_yaml({"stories": []}, path)
+    assert YAMLStoryReader.is_yaml_test_stories_file(path)
+
+
+def test_is_not_test_story_file_if_it_doesnt_contain_stories(tmp_path: Path):
+    path = str(tmp_path / "test_stories.yml")
+    rasa.utils.io.write_yaml({"nlu": []}, path)
+    assert not YAMLStoryReader.is_yaml_test_stories_file(path)
+
+
+def test_is_not_test_story_file_if_empty(tmp_path: Path):
+    path = str(tmp_path / "test_stories.yml")
+    assert not YAMLStoryReader.is_yaml_test_stories_file(path)
+
+
+def test_is_not_test_story_file_without_test_prefix(tmp_path: Path):
+    path = str(tmp_path / "stories.yml")
+    rasa.utils.io.write_yaml({"stories": []}, path)
+    assert not YAMLStoryReader.is_yaml_test_stories_file(path)
diff --git a/tests/core/training/story_writer/test_yaml_story_writer.py b/tests/core/training/story_writer/test_yaml_story_writer.py
index 892a6e3a071f..c7edd3265795 100644
--- a/tests/core/training/story_writer/test_yaml_story_writer.py
+++ b/tests/core/training/story_writer/test_yaml_story_writer.py
@@ -1,10 +1,13 @@
 from pathlib import Path
+import textwrap
 from typing import Text
 
 import pytest
 
 from rasa.core.domain import Domain
+from rasa.core.events import ActionExecuted, UserUttered
 from rasa.core.interpreter import RegexInterpreter
+from rasa.core.trackers import DialogueStateTracker
 from rasa.core.training.story_reader.markdown_story_reader import MarkdownStoryReader
 from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader
 from rasa.core.training.story_writer.yaml_story_writer import YAMLStoryWriter
@@ -65,3 +68,53 @@ async def test_forms_are_skipped_with_warning(default_domain: Domain):
 
     # We skip 5 stories with the forms and warn users
     assert len(record) == 5
+
+
+def test_yaml_writer_dumps_user_messages():
+    events = [
+        UserUttered("Hello", {"name": "greet"}),
+        ActionExecuted("utter_greet"),
+    ]
+    tracker = DialogueStateTracker.from_events("default", events)
+    dump = YAMLStoryWriter().dumps(tracker.as_story().story_steps)
+
+    assert (
+        dump.strip()
+        == textwrap.dedent(
+            """
+        version: "2.0"
+        stories:
+        - story: default
+          steps:
+          - intent: greet
+            user: |-
+              Hello
+          - action: utter_greet
+
+    """
+        ).strip()
+    )
+
+
+def test_yaml_writer_avoids_dumping_not_existing_user_messages():
+    events = [
+        UserUttered("greet", {"name": "greet"}),
+        ActionExecuted("utter_greet"),
+    ]
+    tracker = DialogueStateTracker.from_events("default", events)
+    dump = YAMLStoryWriter().dumps(tracker.as_story().story_steps)
+
+    assert (
+        dump.strip()
+        == textwrap.dedent(
+            """
+        version: "2.0"
+        stories:
+        - story: default
+          steps:
+          - intent: greet
+          - action: utter_greet
+
+    """
+        ).strip()
+    )
diff --git a/tests/nlu/training_data/test_entities_parser.py b/tests/nlu/training_data/test_entities_parser.py
index c84ec01b068c..7bdaec09bfe4 100644
--- a/tests/nlu/training_data/test_entities_parser.py
+++ b/tests/nlu/training_data/test_entities_parser.py
@@ -113,3 +113,26 @@ def test_markdown_entity_regex(
 
     replaced_text = entities_parser.replace_entities(example)
     assert replaced_text == expected_text
+
+
+def test_parse_training_example():
+    message = entities_parser.parse_training_example("Hello!", intent="greet")
+    assert message.get("intent") == "greet"
+    assert message.text == "Hello!"
+
+
+def test_parse_empty_example():
+    message = entities_parser.parse_training_example("", intent=None)
+    assert message.get("intent") is None
+    assert message.text == ""
+
+
+def test_parse_training_example_with_entities():
+    message = entities_parser.parse_training_example(
+        "I am from [Berlin](city).", intent="inform"
+    )
+    assert message.get("intent") == "inform"
+    assert message.text == "I am from Berlin."
+    assert message.get("entities") == [
+        {"start": 10, "end": 16, "value": "Berlin", "entity": "city"}
+    ]
diff --git a/tests/test_data.py b/tests/test_data.py
index a6f4404a5e25..597b22461311 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -3,6 +3,8 @@
 
 from pathlib import Path
 
+import pytest
+
 from rasa.constants import DEFAULT_E2E_TESTS_PATH
 from rasa import data
 from rasa.utils.io import write_text_file
@@ -80,3 +82,43 @@ def test_domain_files_are_not_conversation_tests(tmpdir: Path):
     domain_path = parent / "domain.yml"
 
     assert not data.is_test_stories_file(str(domain_path))
+
+
+@pytest.mark.parametrize(
+    "path,is_yaml",
+    [
+        ("my_file.yaml", True),
+        ("my_file.yml", True),
+        ("/a/b/c/my_file.yml", True),
+        ("/a/b/c/my_file.ml", False),
+        ("my_file.md", False),
+    ],
+)
+def test_is_yaml_file(path, is_yaml):
+    assert data.is_likely_yaml_file(path) == is_yaml
+
+
+@pytest.mark.parametrize(
+    "path,is_md",
+    [
+        ("my_file.md", True),
+        ("/a/b/c/my_file.md", True),
+        ("/a/b/c/my_file.yml", False),
+        ("my_file.yaml", False),
+    ],
+)
+def test_is_md_file(path, is_md):
+    assert data.is_likely_markdown_file(path) == is_md
+
+
+@pytest.mark.parametrize(
+    "path,is_json",
+    [
+        ("my_file.json", True),
+        ("/a/b/c/my_file.json", True),
+        ("/a/b/c/my_file.yml", False),
+        ("my_file.md", False),
+    ],
+)
+def test_is_md_file(path, is_json):
+    assert data.is_likely_json_file(path) == is_json
diff --git a/tests/test_test.py b/tests/test_test.py
index 40bd9dc97879..187e1edd1fa0 100644
--- a/tests/test_test.py
+++ b/tests/test_test.py
@@ -1,6 +1,7 @@
 import asyncio
 import sys
 from pathlib import Path
+import textwrap
 from typing import Text
 from unittest.mock import Mock
 
@@ -8,6 +9,16 @@
 from _pytest.capture import CaptureFixture
 from _pytest.monkeypatch import MonkeyPatch
 
+import rasa.utils.io
+from rasa.core.actions.action import ActionListen
+from rasa.core.events import ActionExecuted, UserUttered
+from rasa.core.test import (
+    EvaluationStore,
+    WronglyClassifiedUserUtterance,
+    WronglyPredictedAction,
+)
+from rasa.core.trackers import DialogueStateTracker
+from rasa.core.training.story_writer.yaml_story_writer import YAMLStoryWriter
 import rasa.model
 import rasa.cli.utils
 from rasa.core.agent import Agent
@@ -149,3 +160,48 @@ async def test_e2e_warning_if_no_nlu_model(
     test_core(trained_core_model, additional_arguments={"e2e": True})
 
     assert "No NLU model found. Using default" in capsys.readouterr().out
+
+
+def test_write_classification_errors():
+    evaluation = EvaluationStore(
+        action_predictions=["utter_goodbye"],
+        action_targets=["utter_greet"],
+        intent_predictions=["goodbye"],
+        intent_targets=["greet"],
+        entity_predictions=None,
+        entity_targets=None,
+    )
+    events = [
+        WronglyClassifiedUserUtterance(
+            UserUttered("Hello", {"name": "goodbye"}), evaluation
+        ),
+        WronglyPredictedAction("utter_greet", "utter_goodbye"),
+    ]
+    tracker = DialogueStateTracker.from_events("default", events)
+    dump = YAMLStoryWriter().dumps(tracker.as_story().story_steps)
+    assert (
+        dump.strip()
+        == textwrap.dedent(
+            """
+        version: "2.0"
+        stories:
+        - story: default
+          steps:
+          - intent: greet  # predicted: goodbye: Hello 
+            user: |-
+              Hello
+          - action: utter_greet  # predicted: utter_goodbye
+
+    """
+        ).strip()
+    )
+
+
+def test_log_failed_stories(tmp_path: Path):
+    path = str(tmp_path / "stories.yml")
+    rasa.core.test._log_stories([], path)
+
+    dump = rasa.utils.io.read_file(path)
+
+    assert dump.startswith("#")
+    assert len(dump.split("\n")) == 1

From daf63bbe270f8472d82f69f6b0e17eb2588942d7 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Tue, 25 Aug 2020 23:50:58 +0200
Subject: [PATCH 16/34] fixed linting

---
 tests/test_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_test.py b/tests/test_test.py
index 187e1edd1fa0..c8b907c71266 100644
--- a/tests/test_test.py
+++ b/tests/test_test.py
@@ -187,7 +187,7 @@ def test_write_classification_errors():
         stories:
         - story: default
           steps:
-          - intent: greet  # predicted: goodbye: Hello 
+          - intent: greet  # predicted: goodbye: Hello
             user: |-
               Hello
           - action: utter_greet  # predicted: utter_goodbye

From 7b6de19cfa796d049f062280f3c95ee05db0501a Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 09:24:25 +0200
Subject: [PATCH 17/34] fixed classification test

---
 rasa/core/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index ad0c235712f2..0839e3f74868 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -202,7 +202,7 @@ def inline_comment(self) -> Text:
         predicted_message = md_format_message(
             self.text, self.predicted_intent, self.predicted_entities
         )
-        return f"predicted: {self.predicted_intent}: {predicted_message} "
+        return f"predicted: {self.predicted_intent}: {predicted_message}"
 
     def as_story_string(self, e2e: bool = True) -> Text:
         from rasa.core.events import md_format_message

From 25499c1617cc7d6b00b1cdcfdf8f627d637411e1 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 10:22:34 +0200
Subject: [PATCH 18/34] fixed some more style errors deepsource found

---
 rasa/core/agent.py                            | 15 ++++++++-------
 rasa/core/interpreter.py                      | 12 ++++++++----
 rasa/core/test.py                             |  2 +-
 rasa/core/training/interactive.py             |  1 -
 .../story_reader/markdown_story_reader.py     | 16 ++++++----------
 .../story_reader/yaml_story_reader.py         |  4 +---
 .../story_writer/yaml_story_writer.py         | 15 ++++-----------
 rasa/data.py                                  |  3 ++-
 rasa/importers/importer.py                    | 19 +++++++++----------
 rasa/importers/rasa.py                        |  1 -
 rasa/nlu/test.py                              |  8 +++++---
 rasa/nlu/training_data/entities_parser.py     |  1 -
 rasa/utils/plotting.py                        | 10 +++++-----
 tests/core/test_dsl.py                        |  0
 tests/core/test_visualization.py              |  2 --
 .../story_reader/test_yaml_story_reader.py    |  5 ++---
 .../story_writer/test_yaml_story_writer.py    |  1 -
 tests/test_data.py                            |  2 +-
 tests/test_test.py                            |  5 +----
 19 files changed, 53 insertions(+), 69 deletions(-)
 delete mode 100644 tests/core/test_dsl.py

diff --git a/rasa/core/agent.py b/rasa/core/agent.py
index 28e9d7df157f..e7fa6d40c332 100644
--- a/rasa/core/agent.py
+++ b/rasa/core/agent.py
@@ -7,6 +7,7 @@
 from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Union
 
 import aiohttp
+from aiohttp import ClientError
 
 import rasa
 import rasa.utils.io
@@ -134,7 +135,8 @@ def _load_and_set_updated_model(
         )
 
         logger.debug("Finished updating agent to new model.")
-    except Exception as e:
+    except Exception as e:  # skipcq: PYL-W0703
+        # TODO: this exception shouldn't be that broad, we need to be more specific
         logger.exception(
             f"Failed to update model. The previous model will stay loaded instead. "
             f"Error: {e}"
@@ -235,7 +237,7 @@ async def _run_model_pulling_worker(
         await _update_model_from_server(model_server, agent)
     except CancelledError:
         logger.warning("Stopping model pulling (cancelled).")
-    except Exception:
+    except ClientError:
         logger.exception(
             "An exception was raised while fetching a model. Continuing anyways..."
         )
@@ -400,9 +402,9 @@ def load(
         try:
             if not model_path:
                 raise ModelNotFound("No path specified.")
-            elif not os.path.exists(model_path):
+            if not os.path.exists(model_path):
                 raise ModelNotFound(f"No file or directory at '{model_path}'.")
-            elif os.path.isfile(model_path):
+            if os.path.isfile(model_path):
                 model_path = get_model(model_path)
         except ModelNotFound:
             raise ValueError(
@@ -501,9 +503,8 @@ async def handle_message(
                 "not supported anymore. Rather use `agent.handle_text(...)`."
             )
 
-        def noop(_):
+        def noop(_: Any) -> None:
             logger.info("Ignoring message as there is no agent to handle it.")
-            return None
 
         if not self.is_ready():
             return noop(message)
@@ -615,7 +616,7 @@ def toggle_memoization(self, activate: bool) -> None:
 
         for p in self.policy_ensemble.policies:
             # explicitly ignore inheritance (e.g. augmented memoization policy)
-            if type(p) == MemoizationPolicy:
+            if type(p) is MemoizationPolicy:
                 p.toggle(activate)
 
     def _max_history(self) -> int:
diff --git a/rasa/core/interpreter.py b/rasa/core/interpreter.py
index 925cfdfc9d61..ea686d1324e5 100644
--- a/rasa/core/interpreter.py
+++ b/rasa/core/interpreter.py
@@ -1,3 +1,5 @@
+from json import JSONDecodeError
+
 import aiohttp
 
 import json
@@ -86,11 +88,11 @@ def _parse_parameters(
             if isinstance(parsed_entities, dict):
                 return RegexInterpreter._create_entities(parsed_entities, sidx, eidx)
             else:
-                raise Exception(
+                raise ValueError(
                     f"Parsed value isn't a json object "
                     f"(instead parser found '{type(parsed_entities)}')"
                 )
-        except Exception as e:
+        except (JSONDecodeError, ValueError) as e:
             raise_warning(
                 f"Failed to parse arguments in line "
                 f"'{user_input}'. Failed to decode parameters "
@@ -108,7 +110,7 @@ def _parse_confidence(confidence_str: Text) -> float:
 
         try:
             return float(confidence_str.strip()[1:])
-        except Exception as e:
+        except ValueError as e:
             raise_warning(
                 f"Invalid to parse confidence value in line "
                 f"'{confidence_str}'. Make sure the intent confidence is an "
@@ -241,7 +243,9 @@ async def _rasa_http_parse(
                             f"http. Error: {response_text}"
                         )
                         return None
-        except Exception:
+        except Exception:  # skipcq: YL-W0703
+            # need to catch all possible exceptions when doing http requests
+            # (timeouts, value errors, parser errors, ...)
             logger.exception(f"Failed to parse text '{text}' using rasa NLU over http.")
             return None
 
diff --git a/rasa/core/test.py b/rasa/core/test.py
index 0839e3f74868..54a00f491cb9 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -475,7 +475,7 @@ def _in_training_data_fraction(action_list: List[Dict[Text, Any]]) -> float:
         if a["policy"] and not SimplePolicyEnsemble.is_not_memo_policy(a["policy"])
     ]
 
-    return len(in_training_data) / len(action_list) if len(action_list) else 0
+    return len(in_training_data) / len(action_list) if action_list else 0
 
 
 async def _collect_story_predictions(
diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py
index 86808bd35d13..50838729c816 100644
--- a/rasa/core/training/interactive.py
+++ b/rasa/core/training/interactive.py
@@ -6,7 +6,6 @@
 import uuid
 from functools import partial
 from multiprocessing import Process
-from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Union, Set
 
 import numpy as np
diff --git a/rasa/core/training/story_reader/markdown_story_reader.py b/rasa/core/training/story_reader/markdown_story_reader.py
index 9f0a248bd729..3aa3458df5b4 100644
--- a/rasa/core/training/story_reader/markdown_story_reader.py
+++ b/rasa/core/training/story_reader/markdown_story_reader.py
@@ -1,12 +1,10 @@
-import asyncio
 import json
 import logging
 import os
 import re
-from pathlib import PurePath, Path
-from typing import Dict, Optional, Text, List, Any, Union
+from pathlib import Path
+from typing import Dict, Text, List, Any, Union
 
-from rasa import data
 import rasa.data
 from rasa.nlu.training_data import Message
 import rasa.utils.io as io_utils
@@ -23,7 +21,6 @@
 from rasa.core.interpreter import RegexInterpreter
 from rasa.core.training.story_reader.story_reader import StoryReader
 from rasa.core.training.structures import StoryStep, FORM_PREFIX
-from rasa.data import MARKDOWN_FILE_EXTENSIONS
 from rasa.nlu.constants import INTENT_NAME_KEY
 from rasa.utils.common import raise_warning
 
@@ -209,11 +206,10 @@ async def _add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> No
 
     @staticmethod
     def parse_e2e_message(line: Text) -> "Message":
-        f"""Parses an md list item line based on the current section type.
+        """Parses an md list item line based on the current section type.
 
         Matches expressions of the form `<intent>:<example>`. For the
-        syntax of `<example>` see the Rasa docs on NLU training data:
-        {DOCS_BASE_URL}/training-data-format/#markdown-format"""
+        syntax of `<example>` see the Rasa docs on NLU training data."""
 
         # Match three groups:
         # 1) Potential "form" annotation
@@ -273,7 +269,7 @@ def is_markdown_story_file(file_path: Union[Text, Path]) -> bool:
             `True` in case the file is a Core Markdown training data or rule data file,
             `False` otherwise.
         """
-        if not data.is_likely_markdown_file(file_path) or rasa.data.is_nlu_file(
+        if not rasa.data.is_likely_markdown_file(file_path) or rasa.data.is_nlu_file(
             file_path
         ):
             return False
@@ -306,7 +302,7 @@ def is_markdown_test_stories_file(file_path: Union[Text, Path]) -> bool:
         Returns:
             `True` if it's a file containing test stories, otherwise `False`.
         """
-        if not data.is_likely_markdown_file(file_path):
+        if not rasa.data.is_likely_markdown_file(file_path):
             return False
 
         dirname = os.path.dirname(file_path)
diff --git a/rasa/core/training/story_reader/yaml_story_reader.py b/rasa/core/training/story_reader/yaml_story_reader.py
index 39630a6b283f..964844d0940d 100644
--- a/rasa/core/training/story_reader/yaml_story_reader.py
+++ b/rasa/core/training/story_reader/yaml_story_reader.py
@@ -9,11 +9,9 @@
 import rasa.utils.common as common_utils
 import rasa.utils.io as io_utils
 from rasa.constants import (
-    DEFAULT_E2E_TESTS_PATH,
     DEFAULT_TEST_STORIES_FILE_PREFIX,
     DOCS_URL_STORIES,
     DOCS_URL_RULES,
-    DOCS_URL_TEST_CONVERSATIONS,
 )
 from rasa.core.constants import INTENT_MESSAGE_PREFIX
 from rasa.core.actions.action import RULE_SNIPPET_ACTION_NAME
@@ -66,7 +64,7 @@ def from_reader(cls, reader: "YAMLStoryReader") -> "YAMLStoryReader":
         return cls(
             reader.domain,
             reader.template_variables,
-            reader.use_e2e,  # TODO: I don't think we actually need this
+            reader.use_e2e,
             reader.source_name,
             reader.unfold_or_utterances,
         )
diff --git a/rasa/core/training/story_writer/yaml_story_writer.py b/rasa/core/training/story_writer/yaml_story_writer.py
index 8c355673c9ac..bba815b83127 100644
--- a/rasa/core/training/story_writer/yaml_story_writer.py
+++ b/rasa/core/training/story_writer/yaml_story_writer.py
@@ -2,26 +2,21 @@
 from pathlib import Path
 
 from ruamel import yaml
-import ruamel.yaml as ruamel_yaml
 from typing import Any, Dict, List, Text, Union, Optional
 
 from ruamel.yaml.comments import CommentedMap
-
-from rasa import data
-from rasa.utils.common import raise_warning
 from ruamel.yaml.scalarstring import (
     DoubleQuotedScalarString,
     LiteralScalarString,
-    PlainScalarString,
-    ScalarString,
 )
 
+from rasa.utils.common import raise_warning
+
 from rasa.constants import LATEST_TRAINING_DATA_FORMAT_VERSION, DOCS_URL_STORIES
 from rasa.core.events import UserUttered, ActionExecuted, SlotSet, ActiveLoop
 from rasa.core.training.story_reader.yaml_story_reader import (
     KEY_STORIES,
     KEY_STORY_NAME,
-    KEY_TEST_CONVERSATIONS,
     KEY_USER_INTENT,
     KEY_ENTITIES,
     KEY_ACTION,
@@ -48,14 +43,12 @@ def dumps(self, story_steps: List[StoryStep]) -> Text:
         Returns:
             String with story steps in the YAML format.
         """
-        stream = ruamel_yaml.StringIO()
+        stream = yaml.StringIO()
         self.dump(stream, story_steps)
         return stream.getvalue()
 
     def dump(
-        self,
-        target: Union[Text, Path, ruamel_yaml.StringIO],
-        story_steps: List[StoryStep],
+        self, target: Union[Text, Path, yaml.StringIO], story_steps: List[StoryStep],
     ) -> None:
         """Writes Story steps into a target file/stream.
 
diff --git a/rasa/data.py b/rasa/data.py
index 95dd541a3449..0305473be36d 100644
--- a/rasa/data.py
+++ b/rasa/data.py
@@ -52,7 +52,8 @@ def is_likely_markdown_file(file_path: Text) -> bool:
             file_path: path to the file
 
         Returns:
-            `True` if the file likely contains data in markdown format, `False` otherwise.
+            `True` if the file likely contains data in markdown format,
+            `False` otherwise.
         """
     return Path(file_path).suffix in MARKDOWN_FILE_EXTENSIONS
 
diff --git a/rasa/importers/importer.py b/rasa/importers/importer.py
index ab6f2af8030c..02650a49eb43 100644
--- a/rasa/importers/importer.py
+++ b/rasa/importers/importer.py
@@ -4,7 +4,6 @@
 import logging
 
 from rasa.core.domain import Domain
-from rasa.core.interpreter import RegexInterpreter, NaturalLanguageInterpreter
 from rasa.core.training.structures import StoryGraph
 from rasa.nlu.training_data import TrainingData
 import rasa.utils.io as io_utils
@@ -20,7 +19,7 @@ async def get_domain(self) -> Domain:
         """Retrieves the domain of the bot.
 
         Returns:
-            Loaded ``Domain``.
+            Loaded `Domain`.
         """
         raise NotImplementedError()
 
@@ -39,7 +38,7 @@ async def get_stories(
             exclusion_percentage: Amount of training data that should be excluded.
 
         Returns:
-            ``StoryGraph`` containing all loaded stories.
+            `StoryGraph` containing all loaded stories.
         """
 
         raise NotImplementedError()
@@ -60,7 +59,7 @@ async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData:
             language: Can be used to only load training data for a certain language.
 
         Returns:
-            Loaded NLU ``TrainingData``.
+            Loaded NLU `TrainingData`.
         """
 
         raise NotImplementedError()
@@ -71,7 +70,7 @@ def load_from_config(
         domain_path: Optional[Text] = None,
         training_data_paths: Optional[List[Text]] = None,
     ) -> "TrainingDataImporter":
-        """Loads a ``TrainingDataImporter`` instance from a configuration file."""
+        """Loads a `TrainingDataImporter` instance from a configuration file."""
 
         config = io_utils.read_config_file(config_path)
         return TrainingDataImporter.load_from_dict(
@@ -84,7 +83,7 @@ def load_core_importer_from_config(
         domain_path: Optional[Text] = None,
         training_data_paths: Optional[List[Text]] = None,
     ) -> "TrainingDataImporter":
-        """Loads a ``TrainingDataImporter`` instance from a configuration file that
+        """Loads a `TrainingDataImporter` instance from a configuration file that
            only reads Core training data.
         """
 
@@ -100,7 +99,7 @@ def load_nlu_importer_from_config(
         domain_path: Optional[Text] = None,
         training_data_paths: Optional[List[Text]] = None,
     ) -> "TrainingDataImporter":
-        """Loads a ``TrainingDataImporter`` instance from a configuration file that
+        """Loads a `TrainingDataImporter` instance from a configuration file that
            only reads NLU training data.
         """
 
@@ -117,7 +116,7 @@ def load_from_dict(
         domain_path: Optional[Text] = None,
         training_data_paths: Optional[List[Text]] = None,
     ) -> "TrainingDataImporter":
-        """Loads a ``TrainingDataImporter`` instance from a dictionary."""
+        """Loads a `TrainingDataImporter` instance from a dictionary."""
 
         from rasa.importers.rasa import RasaFileImporter
 
@@ -219,8 +218,8 @@ async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData:
 
 
 class CombinedDataImporter(TrainingDataImporter):
-    """A ``TrainingDataImporter`` that supports using multiple ``TrainingDataImporter``s as
-        if they were a single instance.
+    """A `TrainingDataImporter` that supports using
+        multiple `TrainingDataImporter` instances as if they were a single instance.
     """
 
     def __init__(self, importers: List[TrainingDataImporter]):
diff --git a/rasa/importers/rasa.py b/rasa/importers/rasa.py
index a105b85525cb..17a524c09e75 100644
--- a/rasa/importers/rasa.py
+++ b/rasa/importers/rasa.py
@@ -3,7 +3,6 @@
 
 from rasa import data
 from rasa.core.domain import Domain, InvalidDomain
-from rasa.core.interpreter import NaturalLanguageInterpreter, RegexInterpreter
 from rasa.core.training.structures import StoryGraph
 from rasa.importers import utils, autoconfig
 from rasa.importers.importer import TrainingDataImporter
diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py
index 36c38d353fc6..9c93fee2ba0d 100644
--- a/rasa/nlu/test.py
+++ b/rasa/nlu/test.py
@@ -353,7 +353,7 @@ def plot_entity_confidences(
         for target, prediction, confidence in zip(
             merged_targets, merged_predictions, merged_confidences
         )
-        if prediction != NO_ENTITY and target != prediction
+        if prediction not in (NO_ENTITY, target)
     ]
 
     plot_utils.plot_histogram([pos_hist, neg_hist], title, hist_filename)
@@ -1618,6 +1618,7 @@ def _contains_entity_labels(entity_results: List[EntityEvaluationResult]) -> boo
     for result in entity_results:
         if result.entity_targets or result.entity_predictions:
             return True
+    return False
 
 
 def cross_validate(
@@ -1645,7 +1646,6 @@ def cross_validate(
               corresponds to the relevant result for one fold
     """
     import rasa.nlu.config
-    from collections import defaultdict
 
     if isinstance(nlu_config, str):
         nlu_config = rasa.nlu.config.load(nlu_config)
@@ -1883,7 +1883,9 @@ def compare_nlu(
                         model_output_path,
                         fixed_model_name=model_name,
                     )
-                except Exception as e:
+                except Exception as e:  # skipcq: PYL-W0703
+                    # general exception catching needed to continue evaluating other
+                    # model configurations
                     logger.warning(f"Training model '{model_name}' failed. Error: {e}")
                     f_score_results[model_name][run].append(0.0)
                     continue
diff --git a/rasa/nlu/training_data/entities_parser.py b/rasa/nlu/training_data/entities_parser.py
index 299dbfe97f03..03ed06a6339e 100644
--- a/rasa/nlu/training_data/entities_parser.py
+++ b/rasa/nlu/training_data/entities_parser.py
@@ -170,7 +170,6 @@ def replace_entities(training_example: Text) -> Text:
 
 def parse_training_example(example: Text, intent: Optional[Text]) -> "Message":
     """Extract entities and synonyms, and convert to plain text."""
-    from rasa.nlu.training_data import Message
 
     entities = find_entities_in_training_example(example)
     plain_text = replace_entities(example)
diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py
index 26a28534d0be..5e0dc8a03d06 100644
--- a/rasa/utils/plotting.py
+++ b/rasa/utils/plotting.py
@@ -20,8 +20,8 @@ def _fix_matplotlib_backend() -> None:
         try:
             # on OSX sometimes the tkinter package is broken and can't be imported.
             # we'll try to import it and if it fails we will use a different backend
-            import tkinter
-        except ImportError or ModuleNotFoundError:
+            import tkinter  # skipcq: PYL-W0611
+        except (ImportError, ModuleNotFoundError):
             logger.debug("Setting matplotlib backend to 'agg'")
             matplotlib.use("agg")
 
@@ -29,11 +29,11 @@ def _fix_matplotlib_backend() -> None:
     elif matplotlib.get_backend() is None:  # pragma: no cover
         try:
             # If the `tkinter` package is available, we can use the `TkAgg` backend
-            import tkinter
+            import tkinter  # skipcq: PYL-W0611
 
             logger.debug("Setting matplotlib backend to 'TkAgg'")
             matplotlib.use("TkAgg")
-        except ImportError or ModuleNotFoundError:
+        except (ImportError, ModuleNotFoundError):
             logger.debug("Setting matplotlib backend to 'agg'")
             matplotlib.use("agg")
 
@@ -68,7 +68,7 @@ def plot_confusion_matrix(
     import matplotlib.pyplot as plt
     from matplotlib.colors import LogNorm
 
-    zmax = confusion_matrix.max() if len(confusion_matrix) else 1
+    zmax = confusion_matrix.max() if confusion_matrix else 1
     plt.clf()
     if not color_map:
         color_map = plt.cm.Blues
diff --git a/tests/core/test_dsl.py b/tests/core/test_dsl.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tests/core/test_visualization.py b/tests/core/test_visualization.py
index e0fa22b7e5f9..767f7ea1d366 100644
--- a/tests/core/test_visualization.py
+++ b/tests/core/test_visualization.py
@@ -85,7 +85,6 @@ def test_common_action_prefix_unequal():
 async def test_graph_persistence(stories_file: Text, default_domain: Domain, tmpdir):
     from os.path import isfile
     from networkx.drawing import nx_pydot
-    from rasa.core.interpreter import RegexInterpreter
     import rasa.core.training.loading as core_loading
 
     story_steps = await core_loading.load_data_from_resource(
@@ -116,7 +115,6 @@ async def test_graph_persistence(stories_file: Text, default_domain: Domain, tmp
 )
 async def test_merge_nodes(stories_file: Text, default_domain: Domain, tmpdir):
     from os.path import isfile
-    from rasa.core.interpreter import RegexInterpreter
     import rasa.core.training.loading as core_loading
 
     story_steps = await core_loading.load_data_from_resource(
diff --git a/tests/core/training/story_reader/test_yaml_story_reader.py b/tests/core/training/story_reader/test_yaml_story_reader.py
index bc0761592275..cff776c7dfc7 100644
--- a/tests/core/training/story_reader/test_yaml_story_reader.py
+++ b/tests/core/training/story_reader/test_yaml_story_reader.py
@@ -12,7 +12,6 @@
 from rasa.core.events import ActionExecuted, UserUttered, SlotSet, ActiveLoop
 from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader
 from rasa.core.training.structures import StoryStep
-from rasa.utils import io as io_utils
 
 
 @pytest.fixture()
@@ -258,7 +257,7 @@ async def test_warning_if_intent_not_in_domain(default_domain: Domain):
     """
 
     reader = YAMLStoryReader(default_domain)
-    yaml_content = io_utils.read_yaml(stories)
+    yaml_content = rasa.utils.io.read_yaml(stories)
 
     with pytest.warns(UserWarning) as record:
         reader.read_from_parsed_yaml(yaml_content)
@@ -277,7 +276,7 @@ async def test_no_warning_if_intent_in_domain(default_domain: Domain):
     )
 
     reader = YAMLStoryReader(default_domain)
-    yaml_content = io_utils.read_yaml(stories)
+    yaml_content = rasa.utils.io.read_yaml(stories)
 
     with pytest.warns(None) as record:
         reader.read_from_parsed_yaml(yaml_content)
diff --git a/tests/core/training/story_writer/test_yaml_story_writer.py b/tests/core/training/story_writer/test_yaml_story_writer.py
index c7edd3265795..918f318a3389 100644
--- a/tests/core/training/story_writer/test_yaml_story_writer.py
+++ b/tests/core/training/story_writer/test_yaml_story_writer.py
@@ -6,7 +6,6 @@
 
 from rasa.core.domain import Domain
 from rasa.core.events import ActionExecuted, UserUttered
-from rasa.core.interpreter import RegexInterpreter
 from rasa.core.trackers import DialogueStateTracker
 from rasa.core.training.story_reader.markdown_story_reader import MarkdownStoryReader
 from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader
diff --git a/tests/test_data.py b/tests/test_data.py
index 597b22461311..41756b253f54 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -120,5 +120,5 @@ def test_is_md_file(path, is_md):
         ("my_file.md", False),
     ],
 )
-def test_is_md_file(path, is_json):
+def test_is_json_file(path, is_json):
     assert data.is_likely_json_file(path) == is_json
diff --git a/tests/test_test.py b/tests/test_test.py
index c8b907c71266..b6ecd481ceb5 100644
--- a/tests/test_test.py
+++ b/tests/test_test.py
@@ -3,7 +3,6 @@
 from pathlib import Path
 import textwrap
 from typing import Text
-from unittest.mock import Mock
 
 import pytest
 from _pytest.capture import CaptureFixture
@@ -11,7 +10,7 @@
 
 import rasa.utils.io
 from rasa.core.actions.action import ActionListen
-from rasa.core.events import ActionExecuted, UserUttered
+from rasa.core.events import UserUttered
 from rasa.core.test import (
     EvaluationStore,
     WronglyClassifiedUserUtterance,
@@ -21,8 +20,6 @@
 from rasa.core.training.story_writer.yaml_story_writer import YAMLStoryWriter
 import rasa.model
 import rasa.cli.utils
-from rasa.core.agent import Agent
-from rasa.core.interpreter import RasaNLUInterpreter, RegexInterpreter
 from rasa.nlu.test import NO_ENTITY
 import rasa.core
 

From ab3cb49b4c753f49eca6241fbde49da91f05e666 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 10:38:51 +0200
Subject: [PATCH 19/34] fixed some more deepsource issues

---
 rasa/cli/data.py                              |  2 --
 rasa/cli/test.py                              |  1 -
 rasa/core/agent.py                            | 25 ++++++++-----------
 rasa/core/interpreter.py                      |  2 +-
 rasa/core/training/interactive.py             | 15 ++++++-----
 .../story_reader/markdown_story_reader.py     |  3 +--
 tests/core/test_data.py                       |  8 +++---
 .../story_reader/test_yaml_story_reader.py    |  2 +-
 tests/test_test.py                            |  1 -
 9 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/rasa/cli/data.py b/rasa/cli/data.py
index 82b721ea4572..295aab84e737 100644
--- a/rasa/cli/data.py
+++ b/rasa/cli/data.py
@@ -50,8 +50,6 @@ def add_subparser(
 def _add_data_convert_parsers(
     data_subparsers, parents: List[argparse.ArgumentParser]
 ) -> None:
-    from rasa.nlu import convert
-
     convert_parser = data_subparsers.add_parser(
         "convert",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
diff --git a/rasa/cli/test.py b/rasa/cli/test.py
index d4618b4c4ea3..b5341a8173e6 100644
--- a/rasa/cli/test.py
+++ b/rasa/cli/test.py
@@ -8,7 +8,6 @@
     DEFAULT_CONFIG_PATH,
     DEFAULT_DATA_PATH,
     DEFAULT_E2E_TESTS_PATH,
-    DEFAULT_ENDPOINTS_PATH,
     DEFAULT_MODELS_PATH,
     DEFAULT_RESULTS_PATH,
     CONFIG_SCHEMA_FILE,
diff --git a/rasa/core/agent.py b/rasa/core/agent.py
index e7fa6d40c332..c41a530ef743 100644
--- a/rasa/core/agent.py
+++ b/rasa/core/agent.py
@@ -1,50 +1,47 @@
+from asyncio import CancelledError
 import logging
 import os
 import shutil
 import tempfile
-import uuid
-from asyncio import CancelledError
 from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Union
+import uuid
 
 import aiohttp
 from aiohttp import ClientError
 
 import rasa
-import rasa.utils.io
-import rasa.core.utils
-from rasa.constants import (
-    DEFAULT_DOMAIN_PATH,
-    DEFAULT_CORE_SUBDIRECTORY_NAME,
-)
-from rasa.core import constants, jobs, training
-from rasa.core.channels.channel import InputChannel, OutputChannel, UserMessage
+from rasa.constants import DEFAULT_CORE_SUBDIRECTORY_NAME, DEFAULT_DOMAIN_PATH
+from rasa.core import jobs, training
+from rasa.core.channels.channel import OutputChannel, UserMessage
 from rasa.core.constants import DEFAULT_REQUEST_TIMEOUT
 from rasa.core.domain import Domain
 from rasa.core.exceptions import AgentNotReady
 from rasa.core.interpreter import NaturalLanguageInterpreter, RegexInterpreter
-from rasa.core.lock_store import LockStore, InMemoryLockStore
+from rasa.core.lock_store import InMemoryLockStore, LockStore
 from rasa.core.nlg import NaturalLanguageGenerator
 from rasa.core.policies.ensemble import PolicyEnsemble, SimplePolicyEnsemble
 from rasa.core.policies.memoization import MemoizationPolicy
 from rasa.core.policies.policy import Policy
 from rasa.core.processor import MessageProcessor
 from rasa.core.tracker_store import (
+    FailSafeTrackerStore,
     InMemoryTrackerStore,
     TrackerStore,
-    FailSafeTrackerStore,
 )
 from rasa.core.trackers import DialogueStateTracker
+import rasa.core.utils
 from rasa.exceptions import ModelNotFound
 from rasa.importers.importer import TrainingDataImporter
 from rasa.model import (
-    get_model_subdirectories,
     get_latest_model,
-    unpack_model,
     get_model,
+    get_model_subdirectories,
+    unpack_model,
 )
 from rasa.nlu.utils import is_url
 from rasa.utils.common import raise_warning
 from rasa.utils.endpoints import EndpointConfig
+import rasa.utils.io
 
 logger = logging.getLogger(__name__)
 
diff --git a/rasa/core/interpreter.py b/rasa/core/interpreter.py
index ea686d1324e5..9ed45376bc83 100644
--- a/rasa/core/interpreter.py
+++ b/rasa/core/interpreter.py
@@ -243,7 +243,7 @@ async def _rasa_http_parse(
                             f"http. Error: {response_text}"
                         )
                         return None
-        except Exception:  # skipcq: YL-W0703
+        except Exception:  # skipcq: PYL-W0703
             # need to catch all possible exceptions when doing http requests
             # (timeouts, value errors, parser errors, ...)
             logger.exception(f"Failed to parse text '{text}' using rasa NLU over http.")
diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py
index 50838729c816..8241ff5b5047 100644
--- a/rasa/core/training/interactive.py
+++ b/rasa/core/training/interactive.py
@@ -22,7 +22,6 @@
 import rasa.cli.utils
 from questionary import Choice, Form, Question
 
-from rasa.cli import utils as cli_utils
 from rasa.core import constants, run, train, utils
 from rasa.core.actions.action import ACTION_LISTEN_NAME, default_action_names
 from rasa.core.channels.channel import UserMessage
@@ -258,7 +257,7 @@ def format_bot_output(message: BotUttered) -> Text:
 
     if data.get("buttons"):
         output += "\nButtons:"
-        choices = cli_utils.button_choices_from_message_data(
+        choices = rasa.cli.utils.button_choices_from_message_data(
             data, allow_free_text_input=True
         )
         for choice in choices:
@@ -267,13 +266,13 @@ def format_bot_output(message: BotUttered) -> Text:
     if data.get("elements"):
         output += "\nElements:"
         for idx, element in enumerate(data.get("elements")):
-            element_str = cli_utils.element_to_string(element, idx)
+            element_str = rasa.cli.utils.element_to_string(element, idx)
             output += "\n" + element_str
 
     if data.get("quick_replies"):
         output += "\nQuick replies:"
         for idx, element in enumerate(data.get("quick_replies")):
-            element_str = cli_utils.element_to_string(element, idx)
+            element_str = rasa.cli.utils.element_to_string(element, idx)
             output += "\n" + element_str
     return output
 
@@ -569,7 +568,7 @@ def _slot_history(tracker_dump: Dict[Text, Any]) -> List[Text]:
 
     slot_strings = []
     for k, s in tracker_dump.get("slots", {}).items():
-        colored_value = cli_utils.wrap_with_color(
+        colored_value = rasa.cli.utils.wrap_with_color(
             str(s), color=rasa.cli.utils.bcolors.WARNING
         )
         slot_strings.append(f"{k}: {colored_value}")
@@ -950,7 +949,7 @@ async def _predict_till_next_listen(
             "buttons", None
         ):
             response = _get_button_choice(last_event)
-            if response != cli_utils.FREE_TEXT_INPUT_PROMPT:
+            if response != rasa.cli.utils.FREE_TEXT_INPUT_PROMPT:
                 await send_message(endpoint, conversation_id, response)
 
 
@@ -958,11 +957,11 @@ def _get_button_choice(last_event: Dict[Text, Any]) -> Text:
     data = last_event["data"]
     message = last_event.get("text", "")
 
-    choices = cli_utils.button_choices_from_message_data(
+    choices = rasa.cli.utils.button_choices_from_message_data(
         data, allow_free_text_input=True
     )
     question = questionary.select(message, choices)
-    response = cli_utils.payload_from_button_question(question)
+    response = rasa.cli.utils.payload_from_button_question(question)
     return response
 
 
diff --git a/rasa/core/training/story_reader/markdown_story_reader.py b/rasa/core/training/story_reader/markdown_story_reader.py
index 3aa3458df5b4..813517b36daf 100644
--- a/rasa/core/training/story_reader/markdown_story_reader.py
+++ b/rasa/core/training/story_reader/markdown_story_reader.py
@@ -10,7 +10,6 @@
 import rasa.utils.io as io_utils
 from rasa.constants import (
     DEFAULT_E2E_TESTS_PATH,
-    DOCS_BASE_URL,
     DOCS_URL_DOMAINS,
     DOCS_URL_STORIES,
     LEGACY_DOCS_BASE_URL,
@@ -205,7 +204,7 @@ async def _add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> No
         self.current_step_builder.add_user_messages(parsed_messages)
 
     @staticmethod
-    def parse_e2e_message(line: Text) -> "Message":
+    def parse_e2e_message(line: Text) -> Message:
         """Parses an md list item line based on the current section type.
 
         Matches expressions of the form `<intent>:<example>`. For the
diff --git a/tests/core/test_data.py b/tests/core/test_data.py
index 5f209838f32e..353d945dca3b 100644
--- a/tests/core/test_data.py
+++ b/tests/core/test_data.py
@@ -77,8 +77,8 @@ def test_get_core_nlu_directories(project):
 def test_get_core_nlu_directories_with_none():
     directories = data.get_core_nlu_directories(None)
 
-    assert all([directory for directory in directories])
-    assert all([not os.listdir(directory) for directory in directories])
+    assert all(directories)
+    assert all(not os.listdir(directory) for directory in directories)
 
 
 def test_same_file_names_get_resolved(tmpdir):
@@ -102,12 +102,12 @@ def test_same_file_names_get_resolved(tmpdir):
     nlu_files = os.listdir(nlu_directory)
 
     assert len(nlu_files) == 2
-    assert all([f.endswith("nlu.yml") for f in nlu_files])
+    assert all(f.endswith("nlu.yml") for f in nlu_files)
 
     stories = os.listdir(core_directory)
 
     assert len(stories) == 2
-    assert all([f.endswith("stories.md") for f in stories])
+    assert all(f.endswith("stories.md") for f in stories)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/core/training/story_reader/test_yaml_story_reader.py b/tests/core/training/story_reader/test_yaml_story_reader.py
index cff776c7dfc7..e976fa9f4b4b 100644
--- a/tests/core/training/story_reader/test_yaml_story_reader.py
+++ b/tests/core/training/story_reader/test_yaml_story_reader.py
@@ -295,7 +295,7 @@ async def test_active_loop_is_parsed(default_domain: Domain):
     )
 
     reader = YAMLStoryReader(default_domain)
-    yaml_content = io_utils.read_yaml(stories)
+    yaml_content = rasa.utils.io.read_yaml(stories)
 
     with pytest.warns(None) as record:
         reader.read_from_parsed_yaml(yaml_content)
diff --git a/tests/test_test.py b/tests/test_test.py
index b6ecd481ceb5..9e72e009e6ca 100644
--- a/tests/test_test.py
+++ b/tests/test_test.py
@@ -9,7 +9,6 @@
 from _pytest.monkeypatch import MonkeyPatch
 
 import rasa.utils.io
-from rasa.core.actions.action import ActionListen
 from rasa.core.events import UserUttered
 from rasa.core.test import (
     EvaluationStore,

From 947ada2f6b1590be56ef7d183c4956852cba3a16 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 11:00:23 +0200
Subject: [PATCH 20/34] fixed some more deepsource issues...

---
 rasa/cli/utils.py                           |  2 +-
 rasa/core/interpreter.py                    |  9 ++-------
 rasa/core/training/interactive.py           | 10 ++++------
 rasa/data.py                                | 22 ++++++++++-----------
 rasa/importers/importer.py                  | 16 +++++++++------
 rasa/nlu/training_data/formats/rasa_yaml.py |  6 +++---
 rasa/server.py                              |  6 +-----
 7 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/rasa/cli/utils.py b/rasa/cli/utils.py
index 043daa32f337..6a49f036c11c 100644
--- a/rasa/cli/utils.py
+++ b/rasa/cli/utils.py
@@ -174,7 +174,7 @@ def element_to_string(element: Dict[Text, Any], idx: int = 0) -> Text:
 
 def button_choices_from_message_data(
     message: Dict[Text, Any], allow_free_text_input: bool = True
-) -> "Question":
+) -> List[Text]:
     """Return list of choices to present to the user.
 
     If allow_free_text_input is True, an additional option is added
diff --git a/rasa/core/interpreter.py b/rasa/core/interpreter.py
index 9ed45376bc83..0236d8491aa4 100644
--- a/rasa/core/interpreter.py
+++ b/rasa/core/interpreter.py
@@ -155,14 +155,9 @@ async def parse(
     ) -> Dict[Text, Any]:
         """Parse a text message."""
 
-        return self.synchronous_parse(text, message_id, tracker)
+        return self.synchronous_parse(text)
 
-    def synchronous_parse(
-        self,
-        text: Text,
-        message_id: Optional[Text] = None,
-        tracker: Optional[DialogueStateTracker] = None,
-    ) -> Dict[Text, Any]:
+    def synchronous_parse(self, text: Text,) -> Dict[Text, Any]:
         """Parse a text message."""
 
         intent, confidence, entities = self.extract_intent_and_entities(text)
diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py
index 8241ff5b5047..6d7641b4145b 100644
--- a/rasa/core/training/interactive.py
+++ b/rasa/core/training/interactive.py
@@ -28,7 +28,6 @@
 from rasa.core.constants import (
     DEFAULT_SERVER_FORMAT,
     DEFAULT_SERVER_PORT,
-    DEFAULT_SERVER_URL,
     REQUESTED_SLOT,
     UTTER_PREFIX,
 )
@@ -948,9 +947,9 @@ async def _predict_till_next_listen(
         if last_event.get("event") == BotUttered.type_name and last_event["data"].get(
             "buttons", None
         ):
-            response = _get_button_choice(last_event)
-            if response != rasa.cli.utils.FREE_TEXT_INPUT_PROMPT:
-                await send_message(endpoint, conversation_id, response)
+            user_selection = _get_button_choice(last_event)
+            if user_selection != rasa.cli.utils.FREE_TEXT_INPUT_PROMPT:
+                await send_message(endpoint, conversation_id, user_selection)
 
 
 def _get_button_choice(last_event: Dict[Text, Any]) -> Text:
@@ -961,8 +960,7 @@ def _get_button_choice(last_event: Dict[Text, Any]) -> Text:
         data, allow_free_text_input=True
     )
     question = questionary.select(message, choices)
-    response = rasa.cli.utils.payload_from_button_question(question)
-    return response
+    return rasa.cli.utils.payload_from_button_question(question)
 
 
 async def _correct_wrong_nlu(
diff --git a/rasa/data.py b/rasa/data.py
index 0305473be36d..87309e7cd32e 100644
--- a/rasa/data.py
+++ b/rasa/data.py
@@ -36,25 +36,25 @@ def is_likely_yaml_file(file_path: Text) -> bool:
 def is_likely_json_file(file_path: Text) -> bool:
     """Check if a file likely contains json.
 
-        Arguments:
-            file_path: path to the file
+    Arguments:
+        file_path: path to the file
 
-        Returns:
-            `True` if the file likely contains data in json format, `False` otherwise.
-        """
+    Returns:
+        `True` if the file likely contains data in json format, `False` otherwise.
+    """
     return Path(file_path).suffix in JSON_FILE_EXTENSIONS
 
 
 def is_likely_markdown_file(file_path: Text) -> bool:
     """Check if a file likely contains markdown.
 
-        Arguments:
-            file_path: path to the file
+    Arguments:
+        file_path: path to the file
 
-        Returns:
-            `True` if the file likely contains data in markdown format,
-            `False` otherwise.
-        """
+    Returns:
+        `True` if the file likely contains data in markdown format,
+        `False` otherwise.
+    """
     return Path(file_path).suffix in MARKDOWN_FILE_EXTENSIONS
 
 
diff --git a/rasa/importers/importer.py b/rasa/importers/importer.py
index 02650a49eb43..83d7fd3be45b 100644
--- a/rasa/importers/importer.py
+++ b/rasa/importers/importer.py
@@ -83,8 +83,9 @@ def load_core_importer_from_config(
         domain_path: Optional[Text] = None,
         training_data_paths: Optional[List[Text]] = None,
     ) -> "TrainingDataImporter":
-        """Loads a `TrainingDataImporter` instance from a configuration file that
-           only reads Core training data.
+        """Loads core `TrainingDataImporter` instance.
+
+        Instance loaded from configuration file will only read Core training data.
         """
 
         importer = TrainingDataImporter.load_from_config(
@@ -99,8 +100,9 @@ def load_nlu_importer_from_config(
         domain_path: Optional[Text] = None,
         training_data_paths: Optional[List[Text]] = None,
     ) -> "TrainingDataImporter":
-        """Loads a `TrainingDataImporter` instance from a configuration file that
-           only reads NLU training data.
+        """Loads nlu `TrainingDataImporter` instance.
+
+        Instance loaded from configuration file will only read NLU training data.
         """
 
         importer = TrainingDataImporter.load_from_config(
@@ -218,8 +220,10 @@ async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData:
 
 
 class CombinedDataImporter(TrainingDataImporter):
-    """A `TrainingDataImporter` that supports using
-        multiple `TrainingDataImporter` instances as if they were a single instance.
+    """A `TrainingDataImporter` that combines multiple importers.
+
+    Uses multiple `TrainingDataImporter` instances
+    to load the data as if they were a single instance.
     """
 
     def __init__(self, importers: List[TrainingDataImporter]):
diff --git a/rasa/nlu/training_data/formats/rasa_yaml.py b/rasa/nlu/training_data/formats/rasa_yaml.py
index cd003c210c78..853120cb4ad8 100644
--- a/rasa/nlu/training_data/formats/rasa_yaml.py
+++ b/rasa/nlu/training_data/formats/rasa_yaml.py
@@ -150,13 +150,13 @@ def _parse_responses(self, responses_data: Dict[Text, List[Any]]) -> None:
 
         self.responses = Domain.collect_templates(responses_data)
 
-    def _parse_intent(self, data: Dict[Text, Any]) -> None:
+    def _parse_intent(self, intent_data: Dict[Text, Any]) -> None:
         from rasa.nlu.training_data import Message
         import rasa.nlu.training_data.entities_parser as entities_parser
         import rasa.nlu.training_data.synonyms_parser as synonyms_parser
         import rasa.nlu.constants as nlu_constants
 
-        intent = data.get(KEY_INTENT, "")
+        intent = intent_data.get(KEY_INTENT, "")
         if not intent:
             raise_warning(
                 f"Issue found while processing '{self.filename}': "
@@ -167,7 +167,7 @@ def _parse_intent(self, data: Dict[Text, Any]) -> None:
             )
             return
 
-        examples = data.get(KEY_INTENT_EXAMPLES, "")
+        examples = intent_data.get(KEY_INTENT_EXAMPLES, "")
         for example, entities in self._parse_training_examples(examples, intent):
 
             plain_text = entities_parser.replace_entities(example)
diff --git a/rasa/server.py b/rasa/server.py
index fdca42cc0792..158997a7d5fa 100644
--- a/rasa/server.py
+++ b/rasa/server.py
@@ -3,7 +3,6 @@
 import logging
 import multiprocessing
 import os
-from pathlib import Path
 import tempfile
 import traceback
 import typing
@@ -12,8 +11,6 @@
 from pathlib import Path
 from typing import Any, Callable, List, Optional, Text, Union, Dict
 
-from sanic.exceptions import InvalidUsage
-
 from rasa.core.training.story_writer.yaml_story_writer import YAMLStoryWriter
 from rasa.nlu.training_data.formats import RasaYAMLReader
 import rasa
@@ -30,7 +27,6 @@
     MINIMUM_COMPATIBLE_VERSION,
     DOCS_URL_TRAINING_DATA_NLU,
 )
-from rasa.core import agent
 from rasa.core.agent import Agent
 from rasa.core.brokers.broker import EventBroker
 from rasa.core.channels.channel import (
@@ -344,7 +340,7 @@ async def _load_agent(
             if not lock_store:
                 lock_store = LockStore.create(endpoints.lock_store)
 
-        loaded_agent = await agent.load_agent(
+        loaded_agent = await rasa.core.agent.load_agent(
             model_path,
             model_server,
             remote_storage,

From e76ab645a1af55f7b74a35c24f95b1cae1238458 Mon Sep 17 00:00:00 2001
From: "deepsource-autofix[bot]"
 <62050782+deepsource-autofix[bot]@users.noreply.github.com>
Date: Wed, 26 Aug 2020 09:07:42 +0000
Subject: [PATCH 21/34] Autofix issues in 1 files

Resolved issues in the following files via DeepSource Autofix:
1. rasa/cli/utils.py
---
 rasa/cli/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/cli/utils.py b/rasa/cli/utils.py
index 6a49f036c11c..8c9c4529b1f3 100644
--- a/rasa/cli/utils.py
+++ b/rasa/cli/utils.py
@@ -88,7 +88,7 @@ def cancel_cause_not_found(
         "The path '{}' does not exist. Please make sure to {}specify it"
         " with '--{}'.".format(current, default_clause, parameter)
     )
-    exit(1)
+    sys.exit(1)
 
 
 def parse_last_positional_argument_as_model_path() -> None:

From 27d8bfa9ade35c4d8dd1d18a92aa77adf332e4f5 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 11:35:11 +0200
Subject: [PATCH 22/34] added comments for public funcionts

---
 rasa/cli/data.py               | 13 +++++++++++--
 rasa/core/training/__init__.py | 24 +++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/rasa/cli/data.py b/rasa/cli/data.py
index 295aab84e737..b61ebafe1bff 100644
--- a/rasa/cli/data.py
+++ b/rasa/cli/data.py
@@ -139,6 +139,11 @@ def _append_story_structure_arguments(parser: argparse.ArgumentParser) -> None:
 
 
 def split_nlu_data(args: argparse.Namespace) -> None:
+    """Load data from a file path and split the NLU data into test and train examples.
+
+    Args:
+        args: Commandline arguments
+    """
     from rasa.nlu.training_data.loading import load_data
     from rasa.nlu.training_data.util import get_file_format
 
@@ -155,8 +160,7 @@ def split_nlu_data(args: argparse.Namespace) -> None:
 
 
 def validate_files(args: argparse.Namespace, stories_only: bool = False) -> None:
-    """
-    Validates either the story structure or the entire project.
+    """Validates either the story structure or the entire project.
 
     Args:
         args: Commandline arguments
@@ -183,6 +187,11 @@ def validate_files(args: argparse.Namespace, stories_only: bool = False) -> None
 
 
 def validate_stories(args: argparse.Namespace) -> None:
+    """Validate that training data file content conforms to training data spec.
+
+    Args:
+        args: Commandline arguments
+    """
     validate_files(args, stories_only=True)
 
 
diff --git a/rasa/core/training/__init__.py b/rasa/core/training/__init__.py
index f6add029508c..10d73c041dea 100644
--- a/rasa/core/training/__init__.py
+++ b/rasa/core/training/__init__.py
@@ -51,9 +51,31 @@ async def load_data(
     augmentation_factor: int = 50,
     tracker_limit: Optional[int] = None,
     use_story_concatenation: bool = True,
-    debug_plots=False,
+    debug_plots: bool = False,
     exclusion_percentage: Optional[int] = None,
 ) -> List["DialogueStateTracker"]:
+    """
+    Load training data from a resource.
+
+    Args:
+        resource_name: resource to load the data from. either a path or an importer
+        domain: domain used for loading
+        remove_duplicates: should duplicated training examples be removed?
+        unique_last_num_states: number of states in a conversation that make the
+            a tracker unique (this is used to identify duplicates)
+        augmentation_factor:
+            by how much should the story training data be augmented
+        tracker_limit:
+            maximum number of trackers to generate during augmentation
+        use_story_concatenation:
+            should stories be concatenated when doing data augmentation
+        debug_plots:
+            generate debug plots during loading
+        exclusion_percentage:
+            how much data to exclude
+    Returns:
+        list of loaded trackers
+    """
     from rasa.core.training.generator import TrainingDataGenerator
     from rasa.importers.importer import TrainingDataImporter
 

From d483fbfe17441de5f033cb18796e088f68edd593 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 13:57:39 +0200
Subject: [PATCH 23/34] trying to fix tests

---
 rasa/utils/plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py
index 5e0dc8a03d06..1201dfed314a 100644
--- a/rasa/utils/plotting.py
+++ b/rasa/utils/plotting.py
@@ -68,7 +68,7 @@ def plot_confusion_matrix(
     import matplotlib.pyplot as plt
     from matplotlib.colors import LogNorm
 
-    zmax = confusion_matrix.max() if confusion_matrix else 1
+    zmax = confusion_matrix.max() if len(confusion_matrix) > 0 else 1
     plt.clf()
     if not color_map:
         color_map = plt.cm.Blues

From 2b73ba96500c07fb75194594a0fd912649a34181 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 16:45:46 +0200
Subject: [PATCH 24/34] fixed server tests

---
 rasa/core/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rasa/core/test.py b/rasa/core/test.py
index 54a00f491cb9..e655002ad329 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -642,12 +642,12 @@ async def test(
             out_directory,
         )
 
-    if errors:
+    if errors and out_directory:
         _log_stories(
             story_evaluation.failed_stories,
             os.path.join(out_directory, FAILED_STORIES_FILE),
         )
-    if successes:
+    if successes and out_directory:
         _log_stories(
             story_evaluation.successful_stories,
             os.path.join(out_directory, SUCCESSFUL_STORIES_FILE),

From de9be86645466bf5f9f537740e9477f581ff46ec Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 17:17:53 +0200
Subject: [PATCH 25/34] Apply suggestions from code review

Co-authored-by: Alexander Khizov <degiz@users.noreply.github.com>
---
 docs/docs/testing-your-assistant.mdx | 6 +++---
 rasa/cli/data.py                     | 2 +-
 rasa/core/trackers.py                | 4 +++-
 rasa/core/training/__init__.py       | 1 +
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/docs/docs/testing-your-assistant.mdx b/docs/docs/testing-your-assistant.mdx
index 05b0b2714e7e..66fd0822b51a 100644
--- a/docs/docs/testing-your-assistant.mdx
+++ b/docs/docs/testing-your-assistant.mdx
@@ -8,7 +8,7 @@ import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
 Rasa Open Source lets you test dialogues end-to-end by running through
-test stories. The test make sure that user messages are processed correctly
+test stories. The test makes sure that user messages are processed correctly
 and the dialogue predictions are correct. In addition to end-to-end tests, you can
 also test the dialogue handling (core) and the message processing (nlu)
 separately.
@@ -135,14 +135,14 @@ You can test your assistant against them by running:
 rasa test
 ```
 
-The command will always load all stories from any story files, where there file
+The command will always load all stories from any story files, where the file
 name starts with `test_`, e.g. `test_conversations.yml`. Your story test
 file names should always start with `test_` for this detection to work.
 
 :::info Custom Actions
 [Custom Actions](./custom-actions.mdx) are **not executed as part of end-to-end tests.** If your custom
 actions append any events to the conversation, this has to be reflected in your end-to-end
-tests (e.g. by adding `slot` events to your end-to-end story).
+tests (e.g. by adding `slot_was_set` events to your end-to-end story).
 
 If you want to test the code of your custom actions, you should write unit tests
 for them and include these tests in your CI/CD pipeline.
diff --git a/rasa/cli/data.py b/rasa/cli/data.py
index b61ebafe1bff..e05fc721beb5 100644
--- a/rasa/cli/data.py
+++ b/rasa/cli/data.py
@@ -187,7 +187,7 @@ def validate_files(args: argparse.Namespace, stories_only: bool = False) -> None
 
 
 def validate_stories(args: argparse.Namespace) -> None:
-    """Validate that training data file content conforms to training data spec.
+    """Validates that training data file content conforms to training data spec.
 
     Args:
         args: Commandline arguments
diff --git a/rasa/core/trackers.py b/rasa/core/trackers.py
index 4be86085c7be..28456f7da847 100644
--- a/rasa/core/trackers.py
+++ b/rasa/core/trackers.py
@@ -550,7 +550,9 @@ def as_story(self, include_source: bool = False) -> "Story":
     def export_stories(self, e2e: bool = False, include_source: bool = False) -> Text:
         """Dump the tracker as a story in the Rasa Core story format.
 
-        Returns the dumped tracker as a string."""
+        Returns: 
+            The dumped tracker as a string.
+        """
         # TODO: we need to revisit all usages of this, the caller needs to specify
         #       the format. this likely points to areas where we are not properly
         #       handling markdown vs yaml
diff --git a/rasa/core/training/__init__.py b/rasa/core/training/__init__.py
index 10d73c041dea..3d43493c33bf 100644
--- a/rasa/core/training/__init__.py
+++ b/rasa/core/training/__init__.py
@@ -73,6 +73,7 @@ async def load_data(
             generate debug plots during loading
         exclusion_percentage:
             how much data to exclude
+
     Returns:
         list of loaded trackers
     """

From ecaa5fcd64aa6db816b2eb55ce3f08c0f671319e Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 17:18:42 +0200
Subject: [PATCH 26/34] adressed review comments

---
 docs/docs/command-line-interface.mdx                 | 5 +++--
 rasa/constants.py                                    | 2 +-
 rasa/core/schemas/stories.yml                        | 1 -
 rasa/core/training/story_reader/yaml_story_reader.py | 4 ++--
 rasa/data.py                                         | 8 ++++----
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/docs/command-line-interface.mdx b/docs/docs/command-line-interface.mdx
index 7a02e9b9e310..e0058259a377 100644
--- a/docs/docs/command-line-interface.mdx
+++ b/docs/docs/command-line-interface.mdx
@@ -243,8 +243,8 @@ create a split of your NLU data, run:
 rasa data split nlu
 ```
 
-You can specify the training data, the fraction, and the output directory using t
-he following arguments:
+You can specify the training data, the fraction, and the output directory using
+the following arguments:
 
 ```text [rasa data split nlu --help]
 ```
@@ -269,6 +269,7 @@ You cam convert NLU data from
 - Markdown
 
 to
+- YAML or
 - JSON or
 - Markdown.
 
diff --git a/rasa/constants.py b/rasa/constants.py
index 02914bb06a3a..d77838489468 100644
--- a/rasa/constants.py
+++ b/rasa/constants.py
@@ -8,7 +8,6 @@
 DEFAULT_MODELS_PATH = "models"
 DEFAULT_DATA_PATH = "data"
 DEFAULT_E2E_TESTS_PATH = "tests"
-DEFAULT_TEST_STORIES_FILE_PREFIX = "test_"
 DEFAULT_RESULTS_PATH = "results"
 DEFAULT_NLU_RESULTS_PATH = "nlu_comparison_results"
 DEFAULT_CORE_SUBDIRECTORY_NAME = "core"
@@ -16,6 +15,7 @@
 DEFAULT_REQUEST_TIMEOUT = 60 * 5  # 5 minutes
 DEFAULT_RESPONSE_TIMEOUT = 60 * 60  # 1 hour
 
+TEST_STORIES_FILE_PREFIX = "test_"
 TEST_DATA_FILE = "test.md"
 TRAIN_DATA_FILE = "train.md"
 NLG_DATA_FILE = "responses.md"
diff --git a/rasa/core/schemas/stories.yml b/rasa/core/schemas/stories.yml
index cc1f9e5c2f98..345484ca1e4a 100644
--- a/rasa/core/schemas/stories.yml
+++ b/rasa/core/schemas/stories.yml
@@ -28,7 +28,6 @@ mapping:
                 allowempty: False
               user:
                 type: "str"
-                required: False
                 allowempty: False
               entities:
                 type: "seq"
diff --git a/rasa/core/training/story_reader/yaml_story_reader.py b/rasa/core/training/story_reader/yaml_story_reader.py
index 964844d0940d..b8f492b55de8 100644
--- a/rasa/core/training/story_reader/yaml_story_reader.py
+++ b/rasa/core/training/story_reader/yaml_story_reader.py
@@ -9,7 +9,7 @@
 import rasa.utils.common as common_utils
 import rasa.utils.io as io_utils
 from rasa.constants import (
-    DEFAULT_TEST_STORIES_FILE_PREFIX,
+    TEST_STORIES_FILE_PREFIX,
     DOCS_URL_STORIES,
     DOCS_URL_RULES,
 )
@@ -169,7 +169,7 @@ def _has_test_prefix(cls, file_path: Text) -> bool:
         Returns:
             `True` if the filename starts with the prefix, `False` otherwise.
         """
-        return Path(file_path).name.startswith(DEFAULT_TEST_STORIES_FILE_PREFIX)
+        return Path(file_path).name.startswith(TEST_STORIES_FILE_PREFIX)
 
     @classmethod
     def is_yaml_test_stories_file(cls, file_path: Union[Text, Path]) -> bool:
diff --git a/rasa/data.py b/rasa/data.py
index 87309e7cd32e..e76e17c2eb2b 100644
--- a/rasa/data.py
+++ b/rasa/data.py
@@ -120,13 +120,13 @@ def get_core_nlu_directories(
 
 
 def get_data_files(
-    paths: Optional[Union[Text, List[Text]]], filter_property: Callable[[Text], bool]
+    paths: Optional[Union[Text, List[Text]]], filter_predicate: Callable[[Text], bool]
 ) -> List[Text]:
     """Recursively collects all training files from a list of paths.
 
     Args:
         paths: List of paths to training files or folders containing them.
-        filter_property: property to use when filtering the paths, e.g. `is_nlu_file`.
+        filter_predicate: property to use when filtering the paths, e.g. `is_nlu_file`.
 
     Returns:
         paths of training data files.
@@ -144,10 +144,10 @@ def get_data_files(
             continue
 
         if _is_valid_filetype(path):
-            if filter_property(path):
+            if filter_predicate(path):
                 data_files.add(os.path.abspath(path))
         else:
-            new_data_files = _find_data_files_in_directory(path, filter_property)
+            new_data_files = _find_data_files_in_directory(path, filter_predicate)
             data_files.update(new_data_files)
 
     return sorted(data_files)

From 58ec1d3ba60584ba8902b051bfe28b90975fc01d Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 22:39:24 +0200
Subject: [PATCH 27/34] renamed files to stay in test stories naming convention

---
 docs/docs/chitchat-faqs.mdx                      | 10 +++++-----
 docs/docs/command-line-interface.mdx             |  2 +-
 docs/docs/jupyter-notebooks.mdx                  |  2 +-
 docs/docs/setting-up-ci-cd.mdx                   |  2 +-
 docs/docs/testing-your-assistant.mdx             | 16 ++++++++--------
 docs/docs/training-data-format.mdx               |  2 +-
 .../{test_conversations.yml => test_stories.yml} |  0
 .../{test_conversations.yml => test_stories.yml} |  0
 rasa/constants.py                                |  2 +-
 rasa/core/test.py                                |  6 +++---
 .../training/story_reader/yaml_story_reader.py   |  1 -
 rasa/importers/autoconfig.py                     |  6 ++++++
 tests/importers/test_multi_project.py            | 12 ++++++------
 tests/test_data.py                               |  2 +-
 14 files changed, 34 insertions(+), 29 deletions(-)
 rename examples/formbot/tests/{test_conversations.yml => test_stories.yml} (100%)
 rename rasa/cli/initial_project/tests/{test_conversations.yml => test_stories.yml} (100%)

diff --git a/docs/docs/chitchat-faqs.mdx b/docs/docs/chitchat-faqs.mdx
index 5c802343479e..0cb89973bbe2 100644
--- a/docs/docs/chitchat-faqs.mdx
+++ b/docs/docs/chitchat-faqs.mdx
@@ -182,7 +182,7 @@ the dialogue handling is tested as well as the message handling.
 The initial project already contains test conversations, you can replace
 them with some test conversations for your assistant:
 
-```yaml-rasa title="tests/test_conversations.yml"
+```yaml-rasa title="tests/test_stories.yml"
 stories:
 - story: greet and goodybe
   steps:
@@ -226,7 +226,7 @@ This test file contains three separate test stories. You can
 test your bot on all of them using `rasa test`:
 
 ```bash
-rasa test --stories tests/test_conversations.yml
+rasa test --stories tests/test_stories.yml
 ```
 
 The test command will produce a directory named `results`. It will contain a file
@@ -236,7 +236,7 @@ CI/CD pipeline, the test option `--fail-on-prediction-errors` can be used to thr
 an exception that stops the pipeline:
 
 ```bash
-rasa test --stories tests/test_conversations.yml --fail-on-prediction-errors
+rasa test --stories tests/test_stories.yml --fail-on-prediction-errors
 ```
 
 ## Handling FAQs using a Response Selector
@@ -388,7 +388,7 @@ rasa shell
 
 At this stage it makes sense to add a few test cases for our conversations:
 
-```yaml-rasa title="tests/test_conversations.yml"
+```yaml-rasa title="tests/test_stories.yml"
 stories:
 - story: ask channels
   steps:
@@ -427,4 +427,4 @@ Here's a **minimal checklist of files you need to modify** to build a basic FAQ
 
 * `data/stories.yml`: Add a simple story for FAQs
 
-* `tests/test_conversations.yml`: Add E2E test stories for your FAQs
+* `tests/test_stories.yml`: Add E2E test stories for your FAQs
diff --git a/docs/docs/command-line-interface.mdx b/docs/docs/command-line-interface.mdx
index e0058259a377..4ece80398364 100644
--- a/docs/docs/command-line-interface.mdx
+++ b/docs/docs/command-line-interface.mdx
@@ -49,7 +49,7 @@ This creates the following files:
 ├── models
 │   └── <timestamp>.tar.gz
 └── tests
-   └── test_conversations.yml
+   └── test_stories.yml
 ```
 
 The `rasa init` command will ask you if you want to train an initial model using this data.
diff --git a/docs/docs/jupyter-notebooks.mdx b/docs/docs/jupyter-notebooks.mdx
index 3d421f004c5f..2b5d20c09a5f 100644
--- a/docs/docs/jupyter-notebooks.mdx
+++ b/docs/docs/jupyter-notebooks.mdx
@@ -109,5 +109,5 @@ else:
 if os.path.isdir("results"):
       print("\n")
       print("Core Errors:")
-      print(open("results/failed_conversations.yml").read())
+      print(open("results/failed_test_stories.yml").read())
 ```
diff --git a/docs/docs/setting-up-ci-cd.mdx b/docs/docs/setting-up-ci-cd.mdx
index 696d82fc68ba..4dbb1f8e9294 100644
--- a/docs/docs/setting-up-ci-cd.mdx
+++ b/docs/docs/setting-up-ci-cd.mdx
@@ -93,7 +93,7 @@ important as you start introducing more complicated stories from user
 conversations.
 
 ```bash
-rasa test --stories tests/test_conversations.yml --fail-on-prediction-errors
+rasa test --stories tests/test_stories.yml --fail-on-prediction-errors
 ```
 
 The `--fail-on-prediction-errors` flag ensures the test will fail if any test
diff --git a/docs/docs/testing-your-assistant.mdx b/docs/docs/testing-your-assistant.mdx
index 66fd0822b51a..3ba4df183f73 100644
--- a/docs/docs/testing-your-assistant.mdx
+++ b/docs/docs/testing-your-assistant.mdx
@@ -30,7 +30,7 @@ Here are some examples:
 <Tabs values={[{"label": "Basics", "value": "basics"}, {"label": "Custom Actions", "value": "customactions"}, {"label": "Forms Happy Path", "value": "formshappypath"}, {"label": "Forms Unhappy Path", "value": "formsunhappypath"}]} defaultValue="basics">
   <TabItem value="basics">
 
-  ```yaml-rasa title="tests/test_conversations.yml"
+  ```yaml-rasa title="tests/test_stories.yml"
   stories:
   - story: A basic end-to-end test
     steps:
@@ -51,7 +51,7 @@ Here are some examples:
   </TabItem>
   <TabItem value="customactions">
 
-  ```yaml-rasa title="tests/test_conversations.yml"
+  ```yaml-rasa title="tests/test_stories.yml"
   stories:
   - story: A test where a custom action returns events
     steps:
@@ -72,7 +72,7 @@ Here are some examples:
   </TabItem>
   <TabItem value="formshappypath">
 
-  ```yaml-rasa title="tests/test_conversations.yml"
+  ```yaml-rasa title="tests/test_stories.yml"
   stories:
   - story: A test conversation with a form
     steps:
@@ -100,7 +100,7 @@ Here are some examples:
   </TabItem>
   <TabItem value="formsunhappypath">
 
-  ```yaml-rasa title="tests/test_conversations.yml"
+  ```yaml-rasa title="tests/test_stories.yml"
   stories:
   - story: A test conversation with unexpected input during a form
     steps:
@@ -128,7 +128,7 @@ Here are some examples:
   </TabItem>
 </Tabs>
 
-By default Rasa Open Source saves conversation tests to `tests/test_conversations.yml`.
+By default Rasa Open Source saves conversation tests to `tests/test_stories.yml`.
 You can test your assistant against them by running:
 
 ```bash
@@ -136,7 +136,7 @@ rasa test
 ```
 
 The command will always load all stories from any story files, where the file
-name starts with `test_`, e.g. `test_conversations.yml`. Your story test
+name starts with `test_`, e.g. `test_stories.yml`. Your story test
 file names should always start with `test_` for this detection to work.
 
 :::info Custom Actions
@@ -277,10 +277,10 @@ You can evaluate your trained model on a set of test stories
 by using the evaluate script:
 
 ```bash
-rasa test core --stories test_conversations.yml --out results
+rasa test core --stories test_stories.yml --out results
 ```
 
-This will print the failed stories to `results/failed_conversations.yml`.
+This will print the failed stories to `results/failed_test_stories.yml`.
 We count any story as failed if at least one of the actions
 was predicted incorrectly.
 
diff --git a/docs/docs/training-data-format.mdx b/docs/docs/training-data-format.mdx
index 619b9eed5499..0c66a2e0133b 100644
--- a/docs/docs/training-data-format.mdx
+++ b/docs/docs/training-data-format.mdx
@@ -86,7 +86,7 @@ rules:
 ```
 
 If you want to specify your test stories, you need to put them into a separate file:
-```yaml-rasa title="tests/test_conversations.yml"
+```yaml-rasa title="tests/test_stories.yml"
 stories:
 - story: greet and ask language
 - steps:
diff --git a/examples/formbot/tests/test_conversations.yml b/examples/formbot/tests/test_stories.yml
similarity index 100%
rename from examples/formbot/tests/test_conversations.yml
rename to examples/formbot/tests/test_stories.yml
diff --git a/rasa/cli/initial_project/tests/test_conversations.yml b/rasa/cli/initial_project/tests/test_stories.yml
similarity index 100%
rename from rasa/cli/initial_project/tests/test_conversations.yml
rename to rasa/cli/initial_project/tests/test_stories.yml
diff --git a/rasa/constants.py b/rasa/constants.py
index d77838489468..ba2365982e6d 100644
--- a/rasa/constants.py
+++ b/rasa/constants.py
@@ -39,7 +39,7 @@
 DOCS_URL_DOMAINS = DOCS_BASE_URL + "/core/domains/"
 DOCS_URL_STORIES = DOCS_BASE_URL + "/core/stories/"
 DOCS_URL_RULES = DOCS_BASE_URL + "/core/rules/"
-DOCS_URL_TEST_CONVERSATIONS = DOCS_BASE_URL + "/testing-your-assistant"
+DOCS_URL_TEST_STORIES = DOCS_BASE_URL + "/testing-your-assistant"
 DOCS_URL_ACTIONS = DOCS_BASE_URL + "/core/actions/"
 DOCS_URL_CONNECTORS = DOCS_BASE_URL + "/user-guide/connectors/"
 DOCS_URL_EVENT_BROKERS = DOCS_BASE_URL + "/api/event-brokers/"
diff --git a/rasa/core/test.py b/rasa/core/test.py
index e655002ad329..5433ccbba274 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -33,8 +33,8 @@
 
 CONFUSION_MATRIX_STORIES_FILE = "story_confusion_matrix.png"
 REPORT_STORIES_FILE = "story_report.json"
-FAILED_STORIES_FILE = "failed_conversations.yml"
-SUCCESSFUL_STORIES_FILE = "successful_conversations.yml"
+FAILED_STORIES_FILE = "failed_test_stories.yml"
+SUCCESSFUL_STORIES_FILE = "successful_test_stories.yml"
 
 
 logger = logging.getLogger(__name__)
@@ -162,7 +162,7 @@ class EndToEndUserUtterance(UserUttered):
     """End-to-end user utterance.
 
     Mostly used to print the full end-to-end user message in the
-    `failed_conversations.yml` output file."""
+    `failed_test_stories.yml` output file."""
 
     def as_story_string(self, e2e: bool = True) -> Text:
         return super().as_story_string(e2e=True)
diff --git a/rasa/core/training/story_reader/yaml_story_reader.py b/rasa/core/training/story_reader/yaml_story_reader.py
index b8f492b55de8..0177c2c4bb7c 100644
--- a/rasa/core/training/story_reader/yaml_story_reader.py
+++ b/rasa/core/training/story_reader/yaml_story_reader.py
@@ -25,7 +25,6 @@
 
 KEY_STORIES = "stories"
 KEY_STORY_NAME = "story"
-KEY_TEST_CONVERSATIONS = "test_conversations"
 KEY_RULES = "rules"
 KEY_RULE_NAME = "rule"
 KEY_STEPS = "steps"
diff --git a/rasa/importers/autoconfig.py b/rasa/importers/autoconfig.py
index 91646833c41c..99d737773813 100644
--- a/rasa/importers/autoconfig.py
+++ b/rasa/importers/autoconfig.py
@@ -126,6 +126,12 @@ def _dump_config(
         auto_configured_keys: Keys for which a commented out auto configuration section
                               needs to be added to the config file.
     """
+
+    if (
+        os.path.abspath(config_file_path)
+        == "/Users/tmbo/lastmile/bot-ai/rasa/rasa/cli/initial_project/config.yml"
+    ):
+        raise Exception("NOOOOOOO!")
     config_as_expected = _is_config_file_as_expected(
         config_file_path, missing_keys, auto_configured_keys
     )
diff --git a/tests/importers/test_multi_project.py b/tests/importers/test_multi_project.py
index 9bab4b4b5554..8b0361cd32b1 100644
--- a/tests/importers/test_multi_project.py
+++ b/tests/importers/test_multi_project.py
@@ -215,10 +215,10 @@ def test_not_importing_not_relevant_additional_files(tmpdir_factory):
 
 
 @pytest.mark.parametrize(
-    "e2e_filename,e2e_story_test",
+    "test_stories_filename,test_story",
     [
         (
-            "test_conversations.yml",
+            "test_stories.yml",
             """
         stories:
         - story: story test
@@ -239,7 +239,7 @@ def test_not_importing_not_relevant_additional_files(tmpdir_factory):
     ],
 )
 async def test_only_getting_e2e_conversation_tests_if_e2e_enabled(
-    tmpdir_factory: TempdirFactory, e2e_filename: Text, e2e_story_test: Text
+    tmpdir_factory: TempdirFactory, test_stories_filename: Text, test_story: Text
 ):
     from rasa.core.training.structures import StoryGraph
     import rasa.core.training.loading as core_loading
@@ -260,9 +260,9 @@ async def test_only_getting_e2e_conversation_tests_if_e2e_enabled(
     )
 
     e2e_story_test_file = (
-        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / e2e_filename
+        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / test_stories_filename
     )
-    e2e_story_test_file.write(e2e_story_test, ensure=True)
+    e2e_story_test_file.write(test_story, ensure=True)
 
     selector = MultiProjectImporter(config_path)
 
@@ -293,7 +293,7 @@ def test_not_importing_e2e_conversation_tests_in_project(
     story_file.write("""## story""", ensure=True)
 
     e2e_story_test_file = (
-        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "test_conversations.yml"
+        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "test_stories.yml"
     )
     e2e_story_test_file.write("""stories:""", ensure=True)
 
diff --git a/tests/test_data.py b/tests/test_data.py
index 41756b253f54..e08bdda02a80 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -40,7 +40,7 @@ def test_default_conversation_tests_are_conversation_tests_yml(tmpdir: Path):
     parent = tmpdir / DEFAULT_E2E_TESTS_PATH
     Path(parent).mkdir(parents=True)
 
-    e2e_path = parent / "test_conversations.yml"
+    e2e_path = parent / "test_stories.yml"
     e2e_story = """stories:"""
     write_text_file(e2e_story, e2e_path)
 

From 5d98ab035dc4f36deb455a49af2f209d3a41bc5a Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Wed, 26 Aug 2020 22:47:24 +0200
Subject: [PATCH 28/34] removed trailing whitespace

---
 rasa/core/trackers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/trackers.py b/rasa/core/trackers.py
index 28456f7da847..dabb243cc25f 100644
--- a/rasa/core/trackers.py
+++ b/rasa/core/trackers.py
@@ -550,7 +550,7 @@ def as_story(self, include_source: bool = False) -> "Story":
     def export_stories(self, e2e: bool = False, include_source: bool = False) -> Text:
         """Dump the tracker as a story in the Rasa Core story format.
 
-        Returns: 
+        Returns:
             The dumped tracker as a string.
         """
         # TODO: we need to revisit all usages of this, the caller needs to specify

From a13b422ff9136786311a7a05c53d06505d75b6a5 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Thu, 27 Aug 2020 01:39:51 +0200
Subject: [PATCH 29/34] fixed shitty tests

---
 .../event_brokers/file_endpoint.yml           |   3 -
 data/test_multi_domain/config.yml             |   9 +-
 rasa/core/test.py                             |   2 +-
 rasa/importers/autoconfig.py                  |   5 -
 rasa/nlu/test.py                              |  12 +-
 rasa/server.py                                |   6 +-
 tests/cli/test_rasa_export.py                 |   6 +-
 tests/cli/test_rasa_test.py                   |  15 +-
 tests/cli/test_utils.py                       |  21 +--
 tests/core/conftest.py                        |   2 +-
 tests/core/test_broker.py                     |  43 +++--
 tests/core/test_data.py                       |  16 +-
 tests/core/test_domain.py                     |  11 +-
 tests/core/test_lock_store.py                 |   8 +-
 tests/core/test_model.py                      |  27 +--
 tests/core/test_policies.py                   |  55 +++---
 tests/core/test_trackers.py                   |  17 +-
 tests/core/test_visualization.py              |  11 +-
 .../story_reader/test_common_story_reader.py  |   4 +-
 .../test_markdown_story_reader.py             |  19 +-
 .../test_example_bots_training_data.py        |  25 ++-
 tests/importers/test_multi_project.py         | 173 +++++++++---------
 tests/nlu/classifiers/test_diet_classifier.py |  14 +-
 tests/nlu/conftest.py                         |  30 ---
 tests/nlu/test_evaluation.py                  |  51 ++++--
 tests/nlu/test_persistor.py                   |   4 +-
 tests/nlu/tokenizers/test_jieba_tokenizer.py  |   5 +-
 tests/test_server.py                          |  25 ++-
 tests/test_train.py                           |  45 +++--
 29 files changed, 352 insertions(+), 312 deletions(-)
 delete mode 100644 data/test_endpoints/event_brokers/file_endpoint.yml

diff --git a/data/test_endpoints/event_brokers/file_endpoint.yml b/data/test_endpoints/event_brokers/file_endpoint.yml
deleted file mode 100644
index 13c7002f397e..000000000000
--- a/data/test_endpoints/event_brokers/file_endpoint.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-event_broker:
-  path: "rasa_event.log"
-  type: file
diff --git a/data/test_multi_domain/config.yml b/data/test_multi_domain/config.yml
index 61e0b5b7133b..dba37fe301bd 100644
--- a/data/test_multi_domain/config.yml
+++ b/data/test_multi_domain/config.yml
@@ -1,17 +1,10 @@
 language: en
 
 pipeline:
-  - name: SpacyNLP
-  - name: SpacyTokenizer
-  - name: SpacyFeaturizer
-  - name: RegexFeaturizer
-  - name: CRFEntityExtractor
-  - name: EntitySynonymMapper
-  - name: SklearnIntentClassifier
+  - name: "KeywordIntentClassifier"
 
 policies:
   - name: MemoizationPolicy
-  - name: TEDPolicy
 
 importers:
   - name: MultiProjectImporter
diff --git a/rasa/core/test.py b/rasa/core/test.py
index 5433ccbba274..92cda8dc2c04 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -635,7 +635,7 @@ async def test(
         include_report=False,
     )
 
-    if not disable_plotting:
+    if not disable_plotting and out_directory:
         _plot_story_evaluation(
             evaluation_store.action_targets,
             evaluation_store.action_predictions,
diff --git a/rasa/importers/autoconfig.py b/rasa/importers/autoconfig.py
index 99d737773813..7033c14fc35c 100644
--- a/rasa/importers/autoconfig.py
+++ b/rasa/importers/autoconfig.py
@@ -127,11 +127,6 @@ def _dump_config(
                               needs to be added to the config file.
     """
 
-    if (
-        os.path.abspath(config_file_path)
-        == "/Users/tmbo/lastmile/bot-ai/rasa/rasa/cli/initial_project/config.yml"
-    ):
-        raise Exception("NOOOOOOO!")
     config_as_expected = _is_config_file_as_expected(
         config_file_path, missing_keys, auto_configured_keys
     )
diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py
index 9c93fee2ba0d..639146313dca 100644
--- a/rasa/nlu/test.py
+++ b/rasa/nlu/test.py
@@ -608,17 +608,13 @@ def evaluate_intents(
         if isinstance(report, str):
             log_evaluation_table(report, precision, f1, accuracy)
 
-    if successes:
-        successes_filename = "intent_successes.json"
-        if output_directory:
-            successes_filename = os.path.join(output_directory, successes_filename)
+    if successes and output_directory:
+        successes_filename = os.path.join(output_directory, "intent_successes.json")
         # save classified samples to file for debugging
         write_intent_successes(intent_results, successes_filename)
 
-    if errors:
-        errors_filename = "intent_errors.json"
-        if output_directory:
-            errors_filename = os.path.join(output_directory, errors_filename)
+    if errors and output_directory:
+        errors_filename = os.path.join(output_directory, "intent_errors.json")
         # log and save misclassified samples to file for debugging
         write_intent_errors(intent_results, errors_filename)
 
diff --git a/rasa/server.py b/rasa/server.py
index fc1d7a08297b..ffc4d9e7b0b1 100644
--- a/rasa/server.py
+++ b/rasa/server.py
@@ -827,7 +827,9 @@ async def evaluate_stories(request: Request) -> HTTPResponse:
         use_e2e = rasa.utils.endpoints.bool_arg(request, "e2e", default=False)
 
         try:
-            evaluation = await test(test_data, app.agent, e2e=use_e2e)
+            evaluation = await test(
+                test_data, app.agent, e2e=use_e2e, disable_plotting=True
+            )
             return response.json(evaluation)
         except Exception as e:
             logger.error(traceback.format_exc())
@@ -869,7 +871,7 @@ async def evaluate_intents(request: Request) -> HTTPResponse:
         _, nlu_model = model.get_model_subdirectories(model_directory)
 
         try:
-            evaluation = run_evaluation(data_path, nlu_model)
+            evaluation = run_evaluation(data_path, nlu_model, disable_plotting=True)
             return response.json(evaluation)
         except Exception as e:
             logger.error(traceback.format_exc())
diff --git a/tests/cli/test_rasa_export.py b/tests/cli/test_rasa_export.py
index 2a5e0380a919..e2f2f56b0b24 100644
--- a/tests/cli/test_rasa_export.py
+++ b/tests/cli/test_rasa_export.py
@@ -65,7 +65,11 @@ def test_validate_timestamp_options_with_invalid_timestamps():
 def test_get_event_broker_and_tracker_store_from_endpoint_config(tmp_path: Path):
     # write valid config to file
     endpoints_path = write_endpoint_config_to_yaml(
-        tmp_path, {"event_broker": {"type": "sql"}, "tracker_store": {"type": "sql"}}
+        tmp_path,
+        {
+            "event_broker": {"type": "sql", "db": str(tmp_path / "rasa.db")},
+            "tracker_store": {"type": "sql"},
+        },
     )
 
     available_endpoints = rasa_core_utils.read_endpoints_from_path(endpoints_path)
diff --git a/tests/cli/test_rasa_test.py b/tests/cli/test_rasa_test.py
index 394475806f7e..5884d6a94246 100644
--- a/tests/cli/test_rasa_test.py
+++ b/tests/cli/test_rasa_test.py
@@ -21,6 +21,14 @@ def test_test_core_no_plot(run_in_simple_project: Callable[..., RunResult]):
 
 
 def test_test(run_in_simple_project_with_model: Callable[..., RunResult]):
+    write_yaml(
+        {
+            "pipeline": "KeywordIntentClassifier",
+            "policies": [{"name": "MemoizationPolicy"}],
+        },
+        "config2.yml",
+    )
+
     run_in_simple_project_with_model("test")
 
     assert os.path.exists("results")
@@ -61,14 +69,15 @@ def test_test_nlu_cross_validation(run_in_simple_project: Callable[..., RunResul
 
 
 def test_test_nlu_comparison(run_in_simple_project: Callable[..., RunResult]):
-    copyfile("config.yml", "config-1.yml")
+    write_yaml({"pipeline": "KeywordIntentClassifier"}, "config.yml")
+    write_yaml({"pipeline": "KeywordIntentClassifier"}, "config2.yml")
 
     run_in_simple_project(
         "test",
         "nlu",
         "--config",
         "config.yml",
-        "config-1.yml",
+        "config2.yml",
         "--run",
         "2",
         "--percentages",
@@ -123,8 +132,6 @@ def test_test_core_comparison_after_train(
         "--percentages",
         "25",
         "75",
-        "--augmentation",
-        "5",
         "--out",
         "comparison_models",
     )
diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py
index 6257da93f10f..239c27e6b1fa 100644
--- a/tests/cli/test_utils.py
+++ b/tests/cli/test_utils.py
@@ -72,30 +72,29 @@ def test_validate_invalid_path():
         get_validated_path("test test test", "out", "default")
 
 
-def test_validate_valid_path():
-    tempdir = tempfile.mkdtemp()
-
-    assert get_validated_path(tempdir, "out", "default") == tempdir
+def test_validate_valid_path(tmp_path: pathlib.Path):
+    assert get_validated_path(str(tmp_path), "out", "default") == str(tmp_path)
 
 
 def test_validate_if_none_is_valid():
     assert get_validated_path(None, "out", "default", True) is None
 
 
-def test_validate_with_none_if_default_is_valid(caplog: LogCaptureFixture):
-    tempdir = tempfile.mkdtemp()
-
+def test_validate_with_none_if_default_is_valid(
+    caplog: LogCaptureFixture, tmp_path: pathlib.Path
+):
     with caplog.at_level(logging.WARNING, rasa.cli.utils.logger.name):
-        assert get_validated_path(None, "out", tempdir) == tempdir
+        assert get_validated_path(None, "out", str(tmp_path)) == str(tmp_path)
 
     assert caplog.records == []
 
 
-def test_validate_with_invalid_directory_if_default_is_valid():
-    tempdir = tempfile.mkdtemp()
+def test_validate_with_invalid_directory_if_default_is_valid(tmp_path: pathlib.Path):
     invalid_directory = "gcfhvjkb"
     with pytest.warns(UserWarning) as record:
-        assert get_validated_path(invalid_directory, "out", tempdir) == tempdir
+        assert get_validated_path(invalid_directory, "out", str(tmp_path)) == str(
+            tmp_path
+        )
     assert len(record) == 1
     assert "does not seem to exist" in record[0].message.args[0]
 
diff --git a/tests/core/conftest.py b/tests/core/conftest.py
index b3e08caaa91f..dba0e8f308e0 100644
--- a/tests/core/conftest.py
+++ b/tests/core/conftest.py
@@ -226,7 +226,7 @@ def project() -> Text:
 
 
 @pytest.fixture
-async def form_bot_agent(trained_async, tmpdir_factory) -> Agent:
+async def form_bot_agent(trained_async) -> Agent:
     zipped_model = await trained_async(
         domain="examples/formbot/domain.yml",
         config="examples/formbot/config.yml",
diff --git a/tests/core/test_broker.py b/tests/core/test_broker.py
index a3008c3803b1..71bbd3b976f4 100644
--- a/tests/core/test_broker.py
+++ b/tests/core/test_broker.py
@@ -1,6 +1,7 @@
 import json
 import logging
 from pathlib import Path
+import textwrap
 
 from typing import Union, Text, List, Optional, Type
 
@@ -9,6 +10,7 @@
 
 from _pytest.monkeypatch import MonkeyPatch
 
+import rasa.utils.io
 from rasa.core.brokers.broker import EventBroker
 from rasa.core.brokers.file import FileEventBroker
 from rasa.core.brokers.kafka import KafkaEventBroker
@@ -117,14 +119,21 @@ def test_sql_broker_logs_to_sql_db():
     assert events_types == ["user", "slot", "restart"]
 
 
-def test_file_broker_from_config():
-    cfg = read_endpoint_config(
-        "data/test_endpoints/event_brokers/file_endpoint.yml", "event_broker"
+def test_file_broker_from_config(tmp_path: Path):
+    endpoint_config = textwrap.dedent(
+        f"""
+        event_broker:
+          path: "{tmp_path / 'rasa_test_event.log'}"
+          type: "file"
+    """
     )
+    rasa.utils.io.write_text_file(endpoint_config, tmp_path / "endpoint.yml")
+
+    cfg = read_endpoint_config(str(tmp_path / "endpoint.yml"), "event_broker")
     actual = EventBroker.create(cfg)
 
     assert isinstance(actual, FileEventBroker)
-    assert actual.path == "rasa_event.log"
+    assert actual.path.endswith("rasa_test_event.log")
 
 
 def test_file_broker_logs_to_file(tmp_path: Path):
@@ -166,8 +175,13 @@ def test_file_broker_properly_logs_newlines(tmp_path):
     assert recovered == [event_with_newline]
 
 
-def test_load_custom_broker_name():
-    config = EndpointConfig(**{"type": "rasa.core.brokers.file.FileEventBroker"})
+def test_load_custom_broker_name(tmp_path: Path):
+    config = EndpointConfig(
+        **{
+            "type": "rasa.core.brokers.file.FileEventBroker",
+            "path": str(tmp_path / "rasa_event.log"),
+        }
+    )
     assert EventBroker.create(config)
 
 
@@ -209,12 +223,15 @@ def test_no_pika_logs_if_no_debug_mode(caplog: LogCaptureFixture):
 
 
 def test_pika_logs_in_debug_mode(caplog: LogCaptureFixture, monkeypatch: MonkeyPatch):
-    from rasa.core.brokers import pika
+    from rasa.core.brokers.pika import _pika_log_level
 
-    with caplog.at_level(logging.DEBUG):
-        with pytest.raises(Exception):
-            pika.initialise_pika_connection(
-                "localhost", "user", "password", connection_attempts=1
-            )
+    pika_level = logging.getLogger("pika").level
 
-    assert len(caplog.records) > 0
+    with caplog.at_level(logging.INFO):
+        with _pika_log_level(logging.CRITICAL):
+            assert logging.getLogger("pika").level == logging.CRITICAL
+
+    with caplog.at_level(logging.DEBUG):
+        with _pika_log_level(logging.CRITICAL):
+            # level should not change
+            assert logging.getLogger("pika").level == pika_level
diff --git a/tests/core/test_data.py b/tests/core/test_data.py
index 353d945dca3b..783b3c110b0b 100644
--- a/tests/core/test_data.py
+++ b/tests/core/test_data.py
@@ -81,23 +81,23 @@ def test_get_core_nlu_directories_with_none():
     assert all(not os.listdir(directory) for directory in directories)
 
 
-def test_same_file_names_get_resolved(tmpdir):
+def test_same_file_names_get_resolved(tmp_path):
     # makes sure the resolution properly handles if there are two files with
     # with the same name in different directories
 
-    tmpdir.join("one").mkdir()
-    tmpdir.join("two").mkdir()
-    data_dir_one = os.path.join(tmpdir.join("one").join("stories.md").strpath)
-    data_dir_two = os.path.join(tmpdir.join("two").join("stories.md").strpath)
+    (tmp_path / "one").mkdir()
+    (tmp_path / "two").mkdir()
+    data_dir_one = str(tmp_path / "one" / "stories.md")
+    data_dir_two = str(tmp_path / "two" / "stories.md")
     shutil.copy2(DEFAULT_STORIES_FILE, data_dir_one)
     shutil.copy2(DEFAULT_STORIES_FILE, data_dir_two)
 
-    nlu_dir_one = os.path.join(tmpdir.join("one").join("nlu.yml").strpath)
-    nlu_dir_two = os.path.join(tmpdir.join("two").join("nlu.yml").strpath)
+    nlu_dir_one = str(tmp_path / "one" / "nlu.yml")
+    nlu_dir_two = str(tmp_path / "two" / "nlu.yml")
     shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_one)
     shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_two)
 
-    core_directory, nlu_directory = data.get_core_nlu_directories([tmpdir.strpath])
+    core_directory, nlu_directory = data.get_core_nlu_directories([str(tmp_path)])
 
     nlu_files = os.listdir(nlu_directory)
 
diff --git a/tests/core/test_domain.py b/tests/core/test_domain.py
index 987497f1ee70..74ba595f5c23 100644
--- a/tests/core/test_domain.py
+++ b/tests/core/test_domain.py
@@ -505,17 +505,16 @@ def test_collect_intent_properties(intents, entities, intent_properties):
     assert Domain.collect_intent_properties(intents, entities) == intent_properties
 
 
-def test_load_domain_from_directory_tree(tmpdir_factory: TempdirFactory):
-    root = tmpdir_factory.mktemp("Parent Bot")
+def test_load_domain_from_directory_tree(tmp_path: Path):
     root_domain = {"actions": ["utter_root", "utter_root2"]}
-    utils.dump_obj_as_yaml_to_file(root / "domain_pt1.yml", root_domain)
+    utils.dump_obj_as_yaml_to_file(tmp_path / "domain_pt1.yml", root_domain)
 
-    subdirectory_1 = root / "Skill 1"
+    subdirectory_1 = tmp_path / "Skill 1"
     subdirectory_1.mkdir()
     skill_1_domain = {"actions": ["utter_skill_1"]}
     utils.dump_obj_as_yaml_to_file(subdirectory_1 / "domain_pt2.yml", skill_1_domain)
 
-    subdirectory_2 = root / "Skill 2"
+    subdirectory_2 = tmp_path / "Skill 2"
     subdirectory_2.mkdir()
     skill_2_domain = {"actions": ["utter_skill_2"]}
     utils.dump_obj_as_yaml_to_file(subdirectory_2 / "domain_pt3.yml", skill_2_domain)
@@ -528,7 +527,7 @@ def test_load_domain_from_directory_tree(tmpdir_factory: TempdirFactory):
         subsubdirectory / "domain_pt4.yaml", skill_2_1_domain
     )
 
-    actual = Domain.load(str(root))
+    actual = Domain.load(str(tmp_path))
     expected = [
         "utter_root",
         "utter_root2",
diff --git a/tests/core/test_lock_store.py b/tests/core/test_lock_store.py
index 38b767170aa7..804e04d063b0 100644
--- a/tests/core/test_lock_store.py
+++ b/tests/core/test_lock_store.py
@@ -1,5 +1,6 @@
 import asyncio
 import os
+from pathlib import Path
 
 import numpy as np
 import pytest
@@ -148,15 +149,14 @@ async def test_multiple_conversation_ids(default_agent: Agent):
     assert processed_ids == conversation_ids
 
 
-async def test_message_order(tmpdir_factory: TempdirFactory, default_agent: Agent):
+async def test_message_order(tmp_path: Path, default_agent: Agent):
     start_time = time.time()
     n_messages = 10
     lock_wait = 0.1
 
     # let's write the incoming order of messages and the order of results to temp files
-    temp_path = tmpdir_factory.mktemp("message_order")
-    results_file = temp_path / "results_file"
-    incoming_order_file = temp_path / "incoming_order_file"
+    results_file = tmp_path / "results_file"
+    incoming_order_file = tmp_path / "incoming_order_file"
 
     # We need to mock `Agent.handle_message()` so we can introduce an
     # artificial holdup (`wait_time_in_seconds`). In the mocked method, we'll
diff --git a/tests/core/test_model.py b/tests/core/test_model.py
index 13485fc80974..addcdd5ff70c 100644
--- a/tests/core/test_model.py
+++ b/tests/core/test_model.py
@@ -80,7 +80,7 @@ def test_get_model_exception(model_path):
 
 
 def test_get_model_from_directory_with_subdirectories(
-    trained_rasa_model, tmpdir_factory: TempdirFactory
+    trained_rasa_model: Text, tmp_path: Path
 ):
     unpacked = get_model(trained_rasa_model)
     unpacked_core, unpacked_nlu = get_model_subdirectories(unpacked)
@@ -88,9 +88,8 @@ def test_get_model_from_directory_with_subdirectories(
     assert unpacked_core
     assert unpacked_nlu
 
-    directory = tmpdir_factory.mktemp("empty_model_dir").strpath
     with pytest.raises(ModelNotFound):
-        get_model_subdirectories(directory)
+        get_model_subdirectories(str(tmp_path))  # temp path should be empty
 
 
 def test_get_model_from_directory_nlu_only(trained_rasa_model):
@@ -235,7 +234,9 @@ async def test_create_fingerprint_from_invalid_paths(project, project_files):
 
 
 @pytest.mark.parametrize("use_fingerprint", [True, False])
-async def test_rasa_packaging(trained_rasa_model, project, use_fingerprint):
+async def test_rasa_packaging(
+    trained_rasa_model: Text, project: Text, use_fingerprint: bool, tmp_path: Path
+):
     unpacked_model_path = get_model(trained_rasa_model)
 
     os.remove(os.path.join(unpacked_model_path, FINGERPRINT_FILE_PATH))
@@ -244,8 +245,7 @@ async def test_rasa_packaging(trained_rasa_model, project, use_fingerprint):
     else:
         fingerprint = None
 
-    tempdir = tempfile.mkdtemp()
-    output_path = os.path.join(tempdir, "test.tar.gz")
+    output_path = str(tmp_path / "test.tar.gz")
 
     create_package_rasa(unpacked_model_path, output_path, fingerprint)
 
@@ -314,23 +314,26 @@ async def test_rasa_packaging(trained_rasa_model, project, use_fingerprint):
         },
     ],
 )
-def test_should_retrain(trained_rasa_model: Text, fingerprint: Fingerprint):
-    old_model = set_fingerprint(trained_rasa_model, fingerprint["old"])
+def test_should_retrain(
+    trained_rasa_model: Text, fingerprint: Fingerprint, tmp_path: Path
+):
+    old_model = set_fingerprint(trained_rasa_model, fingerprint["old"], tmp_path)
 
-    retrain = should_retrain(fingerprint["new"], old_model, tempfile.mkdtemp())
+    retrain = should_retrain(fingerprint["new"], old_model, str(tmp_path))
 
     assert retrain.should_retrain_core() == fingerprint["retrain_core"]
     assert retrain.should_retrain_nlg() == fingerprint["retrain_nlg"]
     assert retrain.should_retrain_nlu() == fingerprint["retrain_nlu"]
 
 
-def set_fingerprint(trained_rasa_model: Text, fingerprint: Fingerprint) -> Text:
+def set_fingerprint(
+    trained_rasa_model: Text, fingerprint: Fingerprint, tmp_path: Path
+) -> Text:
     unpacked_model_path = get_model(trained_rasa_model)
 
     os.remove(os.path.join(unpacked_model_path, FINGERPRINT_FILE_PATH))
 
-    tempdir = tempfile.mkdtemp()
-    output_path = os.path.join(tempdir, "test.tar.gz")
+    output_path = str(tmp_path / "test.tar.gz")
 
     create_package_rasa(unpacked_model_path, output_path, fingerprint)
 
diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py
index 9232677cbb40..a96dab7da207 100644
--- a/tests/core/test_policies.py
+++ b/tests/core/test_policies.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from typing import Type
 from unittest.mock import Mock, patch
 
@@ -105,23 +106,25 @@ async def trained_policy(self, featurizer, priority):
         policy.train(training_trackers, default_domain, RegexInterpreter())
         return policy
 
-    def test_featurizer(self, trained_policy, tmpdir):
+    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
         assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer)
         assert trained_policy.featurizer.max_history == self.max_history
         assert isinstance(
             trained_policy.featurizer.state_featurizer, BinarySingleStateFeaturizer
         )
-        trained_policy.persist(tmpdir.strpath)
-        loaded = trained_policy.__class__.load(tmpdir.strpath)
+        trained_policy.persist(str(tmp_path))
+        loaded = trained_policy.__class__.load(str(tmp_path))
         assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer)
         assert loaded.featurizer.max_history == self.max_history
         assert isinstance(
             loaded.featurizer.state_featurizer, BinarySingleStateFeaturizer
         )
 
-    async def test_persist_and_load(self, trained_policy, default_domain, tmpdir):
-        trained_policy.persist(tmpdir.strpath)
-        loaded = trained_policy.__class__.load(tmpdir.strpath)
+    async def test_persist_and_load(
+        self, trained_policy: Policy, default_domain: Domain, tmp_path: Path
+    ):
+        trained_policy.persist(str(tmp_path))
+        loaded = trained_policy.__class__.load(str(tmp_path))
         trackers = await train_trackers(default_domain, augmentation_factor=20)
 
         for tracker in trackers:
@@ -147,10 +150,10 @@ def test_prediction_on_empty_tracker(self, trained_policy, default_domain):
     @pytest.mark.filterwarnings(
         "ignore:.*without a trained model present.*:UserWarning"
     )
-    def test_persist_and_load_empty_policy(self, tmpdir):
+    def test_persist_and_load_empty_policy(self, tmp_path: Path):
         empty_policy = self.create_policy(None, None)
-        empty_policy.persist(tmpdir.strpath)
-        loaded = empty_policy.__class__.load(tmpdir.strpath)
+        empty_policy.persist(str(tmp_path))
+        loaded = empty_policy.__class__.load(str(tmp_path))
         assert loaded is not None
 
     @staticmethod
@@ -418,14 +421,14 @@ def create_policy(self, featurizer, priority):
         p = TEDPolicy(priority=priority)
         return p
 
-    def test_featurizer(self, trained_policy, tmpdir):
+    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
         assert isinstance(trained_policy.featurizer, FullDialogueTrackerFeaturizer)
         assert isinstance(
             trained_policy.featurizer.state_featurizer,
             LabelTokenizerSingleStateFeaturizer,
         )
-        trained_policy.persist(tmpdir.strpath)
-        loaded = trained_policy.__class__.load(tmpdir.strpath)
+        trained_policy.persist(str(tmp_path))
+        loaded = trained_policy.__class__.load(str(tmp_path))
         assert isinstance(loaded.featurizer, FullDialogueTrackerFeaturizer)
         assert isinstance(
             loaded.featurizer.state_featurizer, LabelTokenizerSingleStateFeaturizer
@@ -440,15 +443,15 @@ def create_policy(self, featurizer, priority):
         p = TEDPolicy(priority=priority, max_history=self.max_history)
         return p
 
-    def test_featurizer(self, trained_policy, tmpdir):
+    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
         assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer)
         assert trained_policy.featurizer.max_history == self.max_history
         assert isinstance(
             trained_policy.featurizer.state_featurizer,
             LabelTokenizerSingleStateFeaturizer,
         )
-        trained_policy.persist(tmpdir.strpath)
-        loaded = trained_policy.__class__.load(tmpdir.strpath)
+        trained_policy.persist(str(tmp_path))
+        loaded = trained_policy.__class__.load(str(tmp_path))
         assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer)
         assert loaded.featurizer.max_history == self.max_history
         assert isinstance(
@@ -495,15 +498,17 @@ def create_policy(self, featurizer, priority):
         p = MemoizationPolicy(priority=priority, max_history=max_history)
         return p
 
-    def test_featurizer(self, trained_policy, tmpdir):
+    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
         assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer)
         assert trained_policy.featurizer.state_featurizer is None
-        trained_policy.persist(tmpdir.strpath)
-        loaded = trained_policy.__class__.load(tmpdir.strpath)
+        trained_policy.persist(str(tmp_path))
+        loaded = trained_policy.__class__.load(str(tmp_path))
         assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer)
         assert loaded.featurizer.state_featurizer is None
 
-    async def test_memorise(self, trained_policy, default_domain):
+    async def test_memorise(
+        self, trained_policy: MemoizationPolicy, default_domain: Domain
+    ):
         trackers = await train_trackers(default_domain, augmentation_factor=20)
         trained_policy.train(trackers, default_domain, RegexInterpreter())
         lookup_with_augmentation = trained_policy.lookup
@@ -639,10 +644,10 @@ def create_policy(self, featurizer, priority):
         p = MappingPolicy()
         return p
 
-    def test_featurizer(self, trained_policy, tmpdir):
+    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
         assert trained_policy.featurizer is None
-        trained_policy.persist(tmpdir.strpath)
-        loaded = trained_policy.__class__.load(tmpdir.strpath)
+        trained_policy.persist(str(tmp_path))
+        loaded = trained_policy.__class__.load(str(tmp_path))
         assert loaded.featurizer is None
 
     @pytest.fixture(scope="module")
@@ -720,10 +725,10 @@ def create_policy(self, featurizer, priority):
         p = FallbackPolicy(priority=priority)
         return p
 
-    def test_featurizer(self, trained_policy, tmpdir):
+    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
         assert trained_policy.featurizer is None
-        trained_policy.persist(tmpdir.strpath)
-        loaded = trained_policy.__class__.load(tmpdir.strpath)
+        trained_policy.persist(str(tmp_path))
+        loaded = trained_policy.__class__.load(str(tmp_path))
         assert loaded.featurizer is None
 
     @pytest.mark.parametrize(
diff --git a/tests/core/test_trackers.py b/tests/core/test_trackers.py
index 78888eec0745..99e1db5095ec 100644
--- a/tests/core/test_trackers.py
+++ b/tests/core/test_trackers.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+from pathlib import Path
 import tempfile
 from typing import List, Text, Dict, Any, Type
 
@@ -129,14 +130,14 @@ def test_tracker_store(store, pair):
     assert restored == tracker
 
 
-async def test_tracker_write_to_story(tmpdir, moodbot_domain: Domain):
+async def test_tracker_write_to_story(tmp_path: Path, moodbot_domain: Domain):
     tracker = tracker_from_dialogue_file(
         "data/test_dialogues/moodbot.json", moodbot_domain
     )
-    p = tmpdir.join("export.md")
-    tracker.export_stories_to_file(p.strpath)
+    p = tmp_path / "export.md"
+    tracker.export_stories_to_file(str(p))
     trackers = await training.load_data(
-        p.strpath,
+        str(p),
         moodbot_domain,
         use_story_concatenation=False,
         tracker_limit=1000,
@@ -465,17 +466,17 @@ def test_traveling_back_in_time(default_domain: Domain):
     assert len(list(tracker.generate_all_prior_trackers())) == 2
 
 
-async def test_dump_and_restore_as_json(default_agent, tmpdir_factory):
+async def test_dump_and_restore_as_json(default_agent: Agent, tmp_path: Path):
     trackers = await default_agent.load_data(DEFAULT_STORIES_FILE)
 
     for tracker in trackers:
-        out_path = tmpdir_factory.mktemp("tracker").join("dumped_tracker.json")
+        out_path = tmp_path / "dumped_tracker.json"
 
         dumped = tracker.current_state(EventVerbosity.AFTER_RESTART)
-        rasa.utils.io.dump_obj_as_json_to_file(out_path.strpath, dumped)
+        rasa.utils.io.dump_obj_as_json_to_file(str(out_path), dumped)
 
         restored_tracker = restore.load_tracker_from_json(
-            out_path.strpath, default_agent.domain
+            str(out_path), default_agent.domain
         )
 
         assert restored_tracker == tracker
diff --git a/tests/core/test_visualization.py b/tests/core/test_visualization.py
index 767f7ea1d366..3d30b0635014 100644
--- a/tests/core/test_visualization.py
+++ b/tests/core/test_visualization.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from typing import Text
 
 import pytest
@@ -82,7 +83,9 @@ def test_common_action_prefix_unequal():
     "stories_file",
     ["data/test_stories/stories.md", "data/test_yaml_stories/stories.yml"],
 )
-async def test_graph_persistence(stories_file: Text, default_domain: Domain, tmpdir):
+async def test_graph_persistence(
+    stories_file: Text, default_domain: Domain, tmp_path: Path
+):
     from os.path import isfile
     from networkx.drawing import nx_pydot
     import rasa.core.training.loading as core_loading
@@ -90,7 +93,7 @@ async def test_graph_persistence(stories_file: Text, default_domain: Domain, tmp
     story_steps = await core_loading.load_data_from_resource(
         stories_file, default_domain
     )
-    out_file = tmpdir.join("graph.html").strpath
+    out_file = str(tmp_path / "graph.html")
     generated_graph = await visualization.visualize_stories(
         story_steps,
         default_domain,
@@ -113,14 +116,14 @@ async def test_graph_persistence(stories_file: Text, default_domain: Domain, tmp
     "stories_file",
     ["data/test_stories/stories.md", "data/test_yaml_stories/stories.yml"],
 )
-async def test_merge_nodes(stories_file: Text, default_domain: Domain, tmpdir):
+async def test_merge_nodes(stories_file: Text, default_domain: Domain, tmp_path: Path):
     from os.path import isfile
     import rasa.core.training.loading as core_loading
 
     story_steps = await core_loading.load_data_from_resource(
         stories_file, default_domain
     )
-    out_file = tmpdir.join("graph.html").strpath
+    out_file = str(tmp_path / "graph.html")
     await visualization.visualize_stories(
         story_steps,
         default_domain,
diff --git a/tests/core/training/story_reader/test_common_story_reader.py b/tests/core/training/story_reader/test_common_story_reader.py
index f1d86cb5fde6..c9339be1e24c 100644
--- a/tests/core/training/story_reader/test_common_story_reader.py
+++ b/tests/core/training/story_reader/test_common_story_reader.py
@@ -173,13 +173,13 @@ async def test_generate_training_data_original_and_augmented_trackers(
     ],
 )
 async def test_visualize_training_data_graph(
-    stories_file: Text, tmpdir, default_domain: Domain
+    stories_file: Text, tmp_path: Path, default_domain: Domain
 ):
     graph = await training.extract_story_graph(stories_file, default_domain)
 
     graph = graph.with_cycles_removed()
 
-    out_path = tmpdir.join("graph.html").strpath
+    out_path = str(tmp_path / "graph.html")
 
     # this will be the plotted networkx graph
     G = graph.visualize(out_path)
diff --git a/tests/core/training/story_reader/test_markdown_story_reader.py b/tests/core/training/story_reader/test_markdown_story_reader.py
index 8229acd9a510..90d0f63d8a32 100644
--- a/tests/core/training/story_reader/test_markdown_story_reader.py
+++ b/tests/core/training/story_reader/test_markdown_story_reader.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from typing import Dict, Text
 
 import pytest
@@ -20,15 +21,17 @@
 from rasa.core.training.structures import Story
 
 
-async def test_persist_and_read_test_story_graph(tmpdir, default_domain: Domain):
+async def test_persist_and_read_test_story_graph(
+    tmp_path: Path, default_domain: Domain
+):
     graph = await training.extract_story_graph(
         "data/test_stories/stories.md", default_domain
     )
-    out_path = tmpdir.join("persisted_story.md")
-    rasa.utils.io.write_text_file(graph.as_story_string(), out_path.strpath)
+    out_path = tmp_path / "persisted_story.md"
+    rasa.utils.io.write_text_file(graph.as_story_string(), str(out_path))
 
     recovered_trackers = await training.load_data(
-        out_path.strpath,
+        str(out_path),
         default_domain,
         use_story_concatenation=False,
         tracker_limit=1000,
@@ -49,15 +52,15 @@ async def test_persist_and_read_test_story_graph(tmpdir, default_domain: Domain)
         existing_stories.discard(story_str)
 
 
-async def test_persist_and_read_test_story(tmpdir, default_domain: Domain):
+async def test_persist_and_read_test_story(tmp_path: Path, default_domain: Domain):
     graph = await training.extract_story_graph(
         "data/test_stories/stories.md", default_domain
     )
-    out_path = tmpdir.join("persisted_story.md")
-    Story(graph.story_steps).dump_to_file(out_path.strpath)
+    out_path = tmp_path / "persisted_story.md"
+    Story(graph.story_steps).dump_to_file(str(out_path))
 
     recovered_trackers = await training.load_data(
-        out_path.strpath,
+        str(out_path),
         default_domain,
         use_story_concatenation=False,
         tracker_limit=1000,
diff --git a/tests/examples/test_example_bots_training_data.py b/tests/examples/test_example_bots_training_data.py
index fee0156c0358..4ba07428ab2d 100644
--- a/tests/examples/test_example_bots_training_data.py
+++ b/tests/examples/test_example_bots_training_data.py
@@ -1,7 +1,9 @@
+from pathlib import Path
 from typing import Text
 
 import pytest
 
+from rasa.cli import scaffold
 from rasa.importers.importer import TrainingDataImporter
 
 
@@ -33,11 +35,6 @@
             "examples/rules/domain.yml",
             "examples/rules/data",
         ),
-        (
-            "rasa/cli/initial_project/config.yml",
-            "rasa/cli/initial_project/domain.yml",
-            "rasa/cli/initial_project/data",
-        ),
     ],
 )
 async def test_example_bot_training_data_not_raises(
@@ -53,3 +50,21 @@ async def test_example_bot_training_data_not_raises(
         await importer.get_stories()
 
     assert not len(record)
+
+
+async def test_example_bot_training_on_initial_project(tmp_path: Path):
+    # we need to test this one separately, as we can't test it in place
+    # configuration suggestions would otherwise change the initial file
+    scaffold.create_initial_project(str(tmp_path))
+
+    importer = TrainingDataImporter.load_from_config(
+        str(tmp_path / "config.yml"),
+        str(tmp_path / "domain.yml"),
+        str(tmp_path / "data"),
+    )
+
+    with pytest.warns(None) as record:
+        await importer.get_nlu_data()
+        await importer.get_stories()
+
+    assert not len(record)
diff --git a/tests/importers/test_multi_project.py b/tests/importers/test_multi_project.py
index 8b0361cd32b1..bdfe83f863fc 100644
--- a/tests/importers/test_multi_project.py
+++ b/tests/importers/test_multi_project.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from typing import Dict, Text
 
 import pytest
@@ -10,25 +11,25 @@
     DEFAULT_E2E_TESTS_PATH,
 )
 from rasa.nlu.training_data.formats import RasaReader
+import rasa.utils.io
 from rasa import model
 from rasa.core import utils
 from rasa.core.domain import Domain
 from rasa.importers.multi_project import MultiProjectImporter
 
 
-def test_load_imports_from_directory_tree(tmpdir_factory: TempdirFactory):
-    root = tmpdir_factory.mktemp("Parent Bot")
+def test_load_imports_from_directory_tree(tmp_path: Path):
     root_imports = {"imports": ["Project A"]}
-    utils.dump_obj_as_yaml_to_file(root / "config.yml", root_imports)
+    utils.dump_obj_as_yaml_to_file(tmp_path / "config.yml", root_imports)
 
-    project_a_directory = root / "Project A"
+    project_a_directory = tmp_path / "Project A"
     project_a_directory.mkdir()
     project_a_imports = {"imports": ["../Project B"]}
     utils.dump_obj_as_yaml_to_file(
         project_a_directory / "config.yml", project_a_imports
     )
 
-    project_b_directory = root / "Project B"
+    project_b_directory = tmp_path / "Project B"
     project_b_directory.mkdir()
     project_b_imports = {"some other": ["../Project C"]}
     utils.dump_obj_as_yaml_to_file(
@@ -44,7 +45,7 @@ def test_load_imports_from_directory_tree(tmpdir_factory: TempdirFactory):
     )
 
     # should not be imported
-    subdirectory_3 = root / "Project C"
+    subdirectory_3 = tmp_path / "Project C"
     subdirectory_3.mkdir()
 
     expected = [
@@ -52,48 +53,43 @@ def test_load_imports_from_directory_tree(tmpdir_factory: TempdirFactory):
         os.path.join(str(project_b_directory)),
     ]
 
-    actual = MultiProjectImporter(str(root / "config.yml"))
+    actual = MultiProjectImporter(str(tmp_path / "config.yml"))
 
     assert actual._imports == expected
 
 
-def test_load_imports_without_imports(tmpdir_factory: TempdirFactory):
+def test_load_imports_without_imports(tmp_path: Path):
     empty_config = {}
-    root = tmpdir_factory.mktemp("Parent Bot")
-    utils.dump_obj_as_yaml_to_file(root / "config.yml", empty_config)
+    utils.dump_obj_as_yaml_to_file(tmp_path / "config.yml", empty_config)
 
-    project_a_directory = root / "Project A"
+    project_a_directory = tmp_path / "Project A"
     project_a_directory.mkdir()
     utils.dump_obj_as_yaml_to_file(project_a_directory / "config.yml", empty_config)
 
-    project_b_directory = root / "Project B"
+    project_b_directory = tmp_path / "Project B"
     project_b_directory.mkdir()
     utils.dump_obj_as_yaml_to_file(project_b_directory / "config.yml", empty_config)
 
-    actual = MultiProjectImporter(str(root / "config.yml"))
+    actual = MultiProjectImporter(str(tmp_path / "config.yml"))
 
-    assert actual.is_imported(str(root / "Project C"))
+    assert actual.is_imported(str(tmp_path / "Project C"))
 
 
 @pytest.mark.parametrize("input_dict", [{}, {"imports": None}])
-def test_load_from_none(input_dict: Dict, tmpdir_factory: TempdirFactory):
-    root = tmpdir_factory.mktemp("Parent Bot")
-    config_path = root / "config.yml"
-    utils.dump_obj_as_yaml_to_file(root / "config.yml", input_dict)
+def test_load_from_none(input_dict: Dict, tmp_path: Path):
+    config_path = tmp_path / "config.yml"
+    utils.dump_obj_as_yaml_to_file(tmp_path / "config.yml", input_dict)
 
     actual = MultiProjectImporter(str(config_path))
 
     assert actual._imports == list()
 
 
-def test_load_if_subproject_is_more_specific_than_parent(
-    tmpdir_factory: TempdirFactory,
-):
-    root = tmpdir_factory.mktemp("Parent Bot")
-    config_path = str(root / "config.yml")
-    utils.dump_obj_as_yaml_to_file(root / "config.yml", {})
+def test_load_if_subproject_is_more_specific_than_parent(tmp_path: Path,):
+    config_path = str(tmp_path / "config.yml")
+    utils.dump_obj_as_yaml_to_file(tmp_path / "config.yml", {})
 
-    project_a_directory = root / "Project A"
+    project_a_directory = tmp_path / "Project A"
     project_a_directory.mkdir()
     project_a_imports = {"imports": ["Project B"]}
     utils.dump_obj_as_yaml_to_file(
@@ -108,10 +104,11 @@ def test_load_if_subproject_is_more_specific_than_parent(
 @pytest.mark.parametrize(
     "input_path", ["A/A/A/B", "A/A/A", "A/B/A/A", "A/A/A/B/C/D/E.type"]
 )
-def test_in_imports(input_path: Text, tmpdir_factory: TempdirFactory):
-    root = tmpdir_factory.mktemp("Parent Bot")
-    config_path = str(root / "config.yml")
-    utils.dump_obj_as_yaml_to_file(root / "config.yml", {"imports": ["A/A/A", "A/B/A"]})
+def test_in_imports(input_path: Text, tmp_path: Path):
+    config_path = str(tmp_path / "config.yml")
+    utils.dump_obj_as_yaml_to_file(
+        tmp_path / "config.yml", {"imports": ["A/A/A", "A/B/A"]}
+    )
 
     importer = MultiProjectImporter(config_path, project_directory=os.getcwd())
 
@@ -119,52 +116,51 @@ def test_in_imports(input_path: Text, tmpdir_factory: TempdirFactory):
 
 
 @pytest.mark.parametrize("input_path", ["A/C", "A/A/B", "A/B"])
-def test_not_in_imports(input_path: Text, tmpdir_factory: TempdirFactory):
-    root = tmpdir_factory.mktemp("Parent Bot")
-    config_path = str(root / "config.yml")
-    utils.dump_obj_as_yaml_to_file(root / "config.yml", {"imports": ["A/A/A", "A/B/A"]})
+def test_not_in_imports(input_path: Text, tmp_path: Path):
+    config_path = str(tmp_path / "config.yml")
+    utils.dump_obj_as_yaml_to_file(
+        tmp_path / "config.yml", {"imports": ["A/A/A", "A/B/A"]}
+    )
     importer = MultiProjectImporter(config_path, project_directory=os.getcwd())
 
     assert not importer.is_imported(input_path)
 
 
-def test_cyclic_imports(tmpdir_factory):
-    root = tmpdir_factory.mktemp("Parent Bot")
+def test_cyclic_imports(tmp_path: Path):
     project_imports = {"imports": ["Project A"]}
-    utils.dump_obj_as_yaml_to_file(root / "config.yml", project_imports)
+    utils.dump_obj_as_yaml_to_file(tmp_path / "config.yml", project_imports)
 
-    project_a_directory = root / "Project A"
+    project_a_directory = tmp_path / "Project A"
     project_a_directory.mkdir()
     project_a_imports = {"imports": ["../Project B"]}
     utils.dump_obj_as_yaml_to_file(
         project_a_directory / "config.yml", project_a_imports
     )
 
-    project_b_directory = root / "Project B"
+    project_b_directory = tmp_path / "Project B"
     project_b_directory.mkdir()
     project_b_imports = {"imports": ["../Project A"]}
     utils.dump_obj_as_yaml_to_file(
         project_b_directory / "config.yml", project_b_imports
     )
 
-    actual = MultiProjectImporter(str(root / "config.yml"))
+    actual = MultiProjectImporter(str(tmp_path / "config.yml"))
 
     assert actual._imports == [str(project_a_directory), str(project_b_directory)]
 
 
-def test_import_outside_project_directory(tmpdir_factory):
-    root = tmpdir_factory.mktemp("Parent Bot")
+def test_import_outside_project_directory(tmp_path: Path):
     project_imports = {"imports": ["Project A"]}
-    utils.dump_obj_as_yaml_to_file(root / "config.yml", project_imports)
+    utils.dump_obj_as_yaml_to_file(tmp_path / "config.yml", project_imports)
 
-    project_a_directory = root / "Project A"
+    project_a_directory = tmp_path / "Project A"
     project_a_directory.mkdir()
     project_a_imports = {"imports": ["../Project B"]}
     utils.dump_obj_as_yaml_to_file(
         project_a_directory / "config.yml", project_a_imports
     )
 
-    project_b_directory = root / "Project B"
+    project_b_directory = tmp_path / "Project B"
     project_b_directory.mkdir()
     project_b_imports = {"imports": ["../Project C"]}
     utils.dump_obj_as_yaml_to_file(
@@ -173,42 +169,46 @@ def test_import_outside_project_directory(tmpdir_factory):
 
     actual = MultiProjectImporter(str(project_a_directory / "config.yml"))
 
-    assert actual._imports == [str(project_b_directory), str(root / "Project C")]
+    assert actual._imports == [str(project_b_directory), str(tmp_path / "Project C")]
 
 
-def test_importing_additional_files(tmpdir_factory):
-    root = tmpdir_factory.mktemp("Parent Bot")
+def test_importing_additional_files(tmp_path: Path):
     config = {"imports": ["bots/Bot A"]}
-    config_path = str(root / "config.yml")
+    config_path = str(tmp_path / "config.yml")
     utils.dump_obj_as_yaml_to_file(config_path, config)
 
-    additional_file = root / "directory" / "file.md"
+    additional_file = tmp_path / "directory" / "file.md"
+    additional_file.parent.mkdir()
 
     # create intermediate directories and fake files
-    additional_file.write("""## story""", ensure=True)
+    rasa.utils.io.write_text_file("""## story""", additional_file)
     selector = MultiProjectImporter(
-        config_path, training_data_paths=[str(root / "directory"), str(additional_file)]
+        config_path,
+        training_data_paths=[str(tmp_path / "directory"), str(additional_file)],
     )
 
     assert selector.is_imported(str(additional_file))
     assert str(additional_file) in selector._story_paths
 
 
-def test_not_importing_not_relevant_additional_files(tmpdir_factory):
-    root = tmpdir_factory.mktemp("Parent Bot")
+def test_not_importing_not_relevant_additional_files(tmp_path: Path):
     config = {"imports": ["bots/Bot A"]}
-    config_path = str(root / "config.yml")
+    config_path = str(tmp_path / "config.yml")
     utils.dump_obj_as_yaml_to_file(config_path, config)
 
-    additional_file = root / "directory" / "file.yml"
+    additional_file = tmp_path / "directory" / "file.yml"
+    additional_file.parent.mkdir()
+
     selector = MultiProjectImporter(
-        config_path, training_data_paths=[str(root / "data"), str(additional_file)]
+        config_path, training_data_paths=[str(tmp_path / "data"), str(additional_file)]
     )
 
-    not_relevant_file1 = root / "data" / "another directory" / "file.yml"
-    not_relevant_file1.write({}, ensure=True)
-    not_relevant_file2 = root / "directory" / "another_file.yml"
-    not_relevant_file2.write({}, ensure=True)
+    not_relevant_file1 = tmp_path / "data" / "another directory" / "file.yml"
+    not_relevant_file1.parent.mkdir()
+    rasa.utils.io.write_text_file("", not_relevant_file1)
+    not_relevant_file2 = tmp_path / "directory" / "another_file.yml"
+    not_relevant_file2.parent.mkdir()
+    rasa.utils.io.write_text_file("", not_relevant_file2)
 
     assert not selector.is_imported(str(not_relevant_file1))
     assert not selector.is_imported(str(not_relevant_file2))
@@ -239,35 +239,37 @@ def test_not_importing_not_relevant_additional_files(tmpdir_factory):
     ],
 )
 async def test_only_getting_e2e_conversation_tests_if_e2e_enabled(
-    tmpdir_factory: TempdirFactory, test_stories_filename: Text, test_story: Text
+    tmp_path: Path, test_stories_filename: Text, test_story: Text
 ):
     from rasa.core.training.structures import StoryGraph
     import rasa.core.training.loading as core_loading
 
-    root = tmpdir_factory.mktemp("Parent Bot")
     config = {"imports": ["bots/Bot A"]}
-    config_path = str(root / "config.yml")
+    config_path = str(tmp_path / "config.yml")
     utils.dump_obj_as_yaml_to_file(config_path, config)
 
-    story_file = root / "bots" / "Bot A" / "data" / "stories.md"
-    story_file.write(
+    story_file = tmp_path / "bots" / "Bot A" / "data" / "stories.md"
+    story_file.parent.mkdir()
+    rasa.utils.io.write_text_file(
         """
         ## story
         * greet
             - utter_greet
         """,
-        ensure=True,
+        story_file,
     )
 
-    e2e_story_test_file = (
-        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / test_stories_filename
+    story_test_file = (
+        tmp_path / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / test_stories_filename
     )
-    e2e_story_test_file.write(test_story, ensure=True)
+    story_test_file.parent.mkdir()
+    rasa.utils.io.write_text_file(test_story, story_test_file)
+    story_test_file.write(test_story, ensure=True)
 
     selector = MultiProjectImporter(config_path)
 
     story_steps = await core_loading.load_data_from_resource(
-        resource=str(e2e_story_test_file),
+        resource=str(story_test_file),
         domain=Domain.empty(),
         template_variables=None,
         use_e2e=True,
@@ -281,37 +283,36 @@ async def test_only_getting_e2e_conversation_tests_if_e2e_enabled(
     assert expected.as_story_string() == actual.as_story_string()
 
 
-def test_not_importing_e2e_conversation_tests_in_project(
-    tmpdir_factory: TempdirFactory,
-):
-    root = tmpdir_factory.mktemp("Parent Bot")
+def test_not_importing_e2e_conversation_tests_in_project(tmp_path: Path,):
     config = {"imports": ["bots/Bot A"]}
-    config_path = str(root / "config.yml")
+    config_path = str(tmp_path / "config.yml")
     utils.dump_obj_as_yaml_to_file(config_path, config)
 
-    story_file = root / "bots" / "Bot A" / "data" / "stories.md"
-    story_file.write("""## story""", ensure=True)
+    story_file = tmp_path / "bots" / "Bot A" / "data" / "stories.md"
+    story_file.parent.mkdir()
+    rasa.utils.io.write_text_file("""## story""", story_file)
 
-    e2e_story_test_file = (
-        root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "test_stories.yml"
+    story_test_file = (
+        tmp_path / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "test_stories.yml"
     )
-    e2e_story_test_file.write("""stories:""", ensure=True)
+    story_test_file.parent.mkdir()
+    rasa.utils.io.write_text_file("""stories:""", story_test_file)
 
     selector = MultiProjectImporter(config_path)
 
     # Conversation tests should not be included in story paths
     assert [str(story_file)] == selector._story_paths
-    assert [str(e2e_story_test_file)] == selector._e2e_story_paths
+    assert [str(story_test_file)] == selector._e2e_story_paths
 
 
-def test_single_additional_file(tmpdir_factory):
-    root = tmpdir_factory.mktemp("Parent Bot")
-    config_path = str(root / "config.yml")
+def test_single_additional_file(tmp_path: Path):
+    config_path = str(tmp_path / "config.yml")
     empty_config = {}
     utils.dump_obj_as_yaml_to_file(config_path, empty_config)
 
-    additional_file = root / "directory" / "file.yml"
-    additional_file.write({}, ensure=True)
+    additional_file = tmp_path / "directory" / "file.yml"
+    additional_file.parent.mkdir()
+    rasa.utils.io.write_yaml({}, additional_file)
 
     selector = MultiProjectImporter(
         config_path, training_data_paths=str(additional_file)
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index cd059da3095e..cf17d3e6ffea 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 import numpy as np
 import pytest
 from unittest.mock import Mock
@@ -100,13 +102,13 @@ def test_check_labels_features_exist(messages, expected):
 
 
 async def _train_persist_load_with_different_settings(
-    pipeline, component_builder, tmpdir
+    pipeline, component_builder, tmp_path
 ):
     _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "en"})
 
     (trainer, trained, persisted_path) = await train(
         _config,
-        path=tmpdir.strpath,
+        path=str(tmp_path),
         data="data/examples/rasa/demo-rasa-multi-intent.md",
         component_builder=component_builder,
     )
@@ -150,7 +152,7 @@ async def test_train_persist_load_with_different_settings(component_builder, tmp
     )
 
 
-async def test_raise_error_on_incorrect_pipeline(component_builder, tmpdir):
+async def test_raise_error_on_incorrect_pipeline(component_builder, tmp_path: Path):
     _config = RasaNLUModelConfig(
         {
             "pipeline": [
@@ -164,7 +166,7 @@ async def test_raise_error_on_incorrect_pipeline(component_builder, tmpdir):
     with pytest.raises(Exception) as e:
         await train(
             _config,
-            path=tmpdir.strpath,
+            path=str(tmp_path),
             data=DEFAULT_DATA_PATH,
             component_builder=component_builder,
         )
@@ -216,7 +218,7 @@ def as_pipeline(*components):
 )
 async def test_softmax_normalization(
     component_builder,
-    tmpdir,
+    tmp_path,
     classifier_params,
     data_path,
     output_length,
@@ -231,7 +233,7 @@ async def test_softmax_normalization(
     _config = RasaNLUModelConfig({"pipeline": pipeline})
     (trained_model, _, persisted_path) = await train(
         _config,
-        path=tmpdir.strpath,
+        path=str(tmp_path),
         data=data_path,
         component_builder=component_builder,
     )
diff --git a/tests/nlu/conftest.py b/tests/nlu/conftest.py
index cdd86c43c1ad..11ec0e1506fc 100644
--- a/tests/nlu/conftest.py
+++ b/tests/nlu/conftest.py
@@ -38,36 +38,6 @@ def blank_config() -> RasaNLUModelConfig:
     return RasaNLUModelConfig({"language": "en", "pipeline": []})
 
 
-@pytest.fixture(scope="session")
-def config_path() -> Text:
-    return write_file_config(
-        {
-            "language": "en",
-            "pipeline": [
-                {"name": "WhitespaceTokenizer"},
-                {"name": "CRFEntityExtractor", EPOCHS: 1, RANDOM_SEED: 42},
-                {"name": "CountVectorsFeaturizer"},
-                {"name": "DIETClassifier", EPOCHS: 1, RANDOM_SEED: 42},
-            ],
-        }
-    ).name
-
-
-@pytest.fixture(scope="session")
-def config_path_duplicate() -> Text:
-    return write_file_config(
-        {
-            "language": "en",
-            "pipeline": [
-                {"name": "WhitespaceTokenizer"},
-                {"name": "CRFEntityExtractor", EPOCHS: 1, RANDOM_SEED: 42},
-                {"name": "CountVectorsFeaturizer"},
-                {"name": "DIETClassifier", EPOCHS: 1, RANDOM_SEED: 42},
-            ],
-        }
-    ).name
-
-
 @pytest.fixture()
 def pretrained_embeddings_spacy_config() -> RasaNLUModelConfig:
     return RasaNLUModelConfig(
diff --git a/tests/nlu/test_evaluation.py b/tests/nlu/test_evaluation.py
index 8d0d607e2ece..313e299f829e 100644
--- a/tests/nlu/test_evaluation.py
+++ b/tests/nlu/test_evaluation.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 from sanic.request import Request
 from typing import Text, Iterator, List, Dict, Any
 
@@ -51,12 +53,12 @@
 from tests.nlu.conftest import DEFAULT_DATA_PATH
 from rasa.nlu.selectors.response_selector import ResponseSelector
 from rasa.nlu.test import is_response_selector_present
-from rasa.utils.tensorflow.constants import EPOCHS, ENTITY_RECOGNITION
-
+from rasa.utils.tensorflow.constants import EPOCHS, ENTITY_RECOGNITION, RANDOM_SEED
 
 # https://github.com/pytest-dev/pytest-asyncio/issues/68
 # this event_loop is used by pytest-asyncio, and redefining it
 # is currently the only way of changing the scope of this fixture
+from tests.nlu.utilities import write_file_config
 
 
 @pytest.yield_fixture(scope="session")
@@ -439,9 +441,10 @@ def test_response_selector_present():
     assert not is_response_selector_present(interpreter_without_response_selector)
 
 
-def test_intent_evaluation_report(tmpdir_factory):
-    path = tmpdir_factory.mktemp("evaluation").strpath
-    report_folder = os.path.join(path, "reports")
+def test_intent_evaluation_report(tmp_path: Path):
+    path = tmp_path / "evaluation"
+    path.mkdir()
+    report_folder = str(path / "reports")
     report_filename = os.path.join(report_folder, "intent_report.json")
 
     rasa.utils.io.create_directory(report_folder)
@@ -486,8 +489,9 @@ def test_intent_evaluation_report(tmpdir_factory):
     assert os.path.exists(os.path.join(report_folder, "intent_successes.json"))
 
 
-def test_intent_evaluation_report_large(tmpdir_factory: TempdirFactory):
-    path = tmpdir_factory.mktemp("evaluation")
+def test_intent_evaluation_report_large(tmp_path: Path):
+    path = tmp_path / "evaluation"
+    path.mkdir()
     report_folder = path / "reports"
     report_filename = report_folder / "intent_report.json"
 
@@ -509,7 +513,7 @@ def incorrect(label: Text, _label: Text) -> IntentEvaluationResult:
 
     evaluate_intents(
         intent_results,
-        report_folder,
+        str(report_folder),
         successes=False,
         errors=False,
         disable_plotting=True,
@@ -541,9 +545,10 @@ def incorrect(label: Text, _label: Text) -> IntentEvaluationResult:
     assert report["C"]["confused_with"] == c_confused_with
 
 
-def test_response_evaluation_report(tmpdir_factory):
-    path = tmpdir_factory.mktemp("evaluation").strpath
-    report_folder = os.path.join(path, "reports")
+def test_response_evaluation_report(tmp_path: Path):
+    path = tmp_path / "evaluation"
+    path.mkdir()
+    report_folder = str(path / "reports")
     report_filename = os.path.join(report_folder, "response_selection_report.json")
 
     rasa.utils.io.create_directory(report_folder)
@@ -629,7 +634,7 @@ def test_get_entity_extractors(components, expected_extractors):
     assert extractors == expected_extractors
 
 
-def test_entity_evaluation_report(tmpdir_factory):
+def test_entity_evaluation_report(tmp_path):
     class EntityExtractorA(EntityExtractor):
 
         provides = ["entities"]
@@ -646,8 +651,9 @@ def __init__(self, component_config=None) -> None:
 
             super().__init__(component_config)
 
-    path = tmpdir_factory.mktemp("evaluation").strpath
-    report_folder = os.path.join(path, "reports")
+    path = tmp_path / "evaluation"
+    path.mkdir()
+    report_folder = str(path / "reports")
 
     report_filename_a = os.path.join(report_folder, "EntityExtractorA_report.json")
     report_filename_b = os.path.join(report_folder, "EntityExtractorB_report.json")
@@ -845,12 +851,23 @@ def test_label_replacement():
     assert substitute_labels(original_labels, "O", "no_entity") == target_labels
 
 
-def test_nlu_comparison(tmpdir, config_path, config_path_duplicate):
+def test_nlu_comparison(tmp_path: Path):
+    config = {
+        "language": "en",
+        "pipeline": [
+            {"name": "WhitespaceTokenizer"},
+            {"name": "KeywordIntentClassifier"},
+            {"name": "RegexEntityExtractor"},
+        ],
+    }
     # the configs need to be at a different path, otherwise the results are
     # combined on the same dictionary key and cannot be plotted properly
-    configs = [config_path, config_path_duplicate]
+    configs = [
+        write_file_config(config).name,
+        write_file_config(config).name,
+    ]
 
-    output = tmpdir.strpath
+    output = str(tmp_path)
     compare_nlu_models(
         configs, DEFAULT_DATA_PATH, output, runs=2, exclusion_percentages=[50, 80]
     )
diff --git a/tests/nlu/test_persistor.py b/tests/nlu/test_persistor.py
index 25ce5cc76161..6e4016a91878 100644
--- a/tests/nlu/test_persistor.py
+++ b/tests/nlu/test_persistor.py
@@ -13,7 +13,7 @@ class Object:
 
 
 # noinspection PyPep8Naming
-async def test_list_method_method_in_AWS_persistor(component_builder, tmpdir):
+async def test_list_method_method_in_AWS_persistor(component_builder, tmp_path):
     with mock_s3():
         # artificially create a persisted model
         _config = RasaNLUModelConfig(
@@ -26,7 +26,7 @@ async def test_list_method_method_in_AWS_persistor(component_builder, tmpdir):
         (trained, _, persisted_path) = await train(
             _config,
             data="data/test/demo-rasa-small.json",
-            path=tmpdir.strpath,
+            path=str(tmp_path),
             storage="aws",
             component_builder=component_builder,
         )
diff --git a/tests/nlu/tokenizers/test_jieba_tokenizer.py b/tests/nlu/tokenizers/test_jieba_tokenizer.py
index 426215541587..45894b10d2fe 100644
--- a/tests/nlu/tokenizers/test_jieba_tokenizer.py
+++ b/tests/nlu/tokenizers/test_jieba_tokenizer.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from unittest.mock import patch
 
 from rasa.nlu.tokenizers.jieba_tokenizer import JiebaTokenizer
@@ -33,8 +34,8 @@ def test_jieba(text, expected_tokens, expected_indices):
     assert [t.end for t in tokens] == [i[1] for i in expected_indices]
 
 
-def test_jieba_load_dictionary(tmpdir_factory):
-    dictionary_path = tmpdir_factory.mktemp("jieba_custom_dictionary").strpath
+def test_jieba_load_dictionary(tmp_path: Path):
+    dictionary_path = str(tmp_path)
 
     component_config = {"dictionary_path": dictionary_path}
 
diff --git a/tests/test_server.py b/tests/test_server.py
index 2c77d79e4ff8..c9536bad5b09 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -370,6 +370,7 @@ def test_train_stack_success(
     default_stories_file: Text,
     default_stack_config: Text,
     default_nlu_data: Text,
+    tmp_path: Path,
 ):
     with ExitStack() as stack:
         domain_file = stack.enter_context(open(default_domain_path))
@@ -390,8 +391,7 @@ def test_train_stack_success(
     assert response.headers["filename"] is not None
 
     # save model to temporary file
-    tempdir = tempfile.mkdtemp()
-    model_path = os.path.join(tempdir, "model.tar.gz")
+    model_path = str(tmp_path / "model.tar.gz")
     with open(model_path, "wb") as f:
         f.write(response.body)
 
@@ -405,6 +405,7 @@ def test_train_nlu_success(
     default_stack_config: Text,
     default_nlu_data: Text,
     default_domain_path: Text,
+    tmp_path: Path,
 ):
     domain_data = rasa_utils.io.read_yaml_file(default_domain_path)
     config_data = rasa_utils.io.read_yaml_file(default_stack_config)
@@ -426,8 +427,7 @@ def test_train_nlu_success(
     assert response.status == 200
 
     # save model to temporary file
-    tempdir = tempfile.mkdtemp()
-    model_path = os.path.join(tempdir, "model.tar.gz")
+    model_path = str(tmp_path / "model.tar.gz")
     with open(model_path, "wb") as f:
         f.write(response.body)
 
@@ -441,6 +441,7 @@ def test_train_core_success(
     default_stack_config: Text,
     default_stories_file: Text,
     default_domain_path: Text,
+    tmp_path: Path,
 ):
     with ExitStack() as stack:
         domain_file = stack.enter_context(open(default_domain_path))
@@ -457,8 +458,7 @@ def test_train_core_success(
     assert response.status == 200
 
     # save model to temporary file
-    tempdir = tempfile.mkdtemp()
-    model_path = os.path.join(tempdir, "model.tar.gz")
+    model_path = str(tmp_path / "model.tar.gz")
     with open(model_path, "wb") as f:
         f.write(response.body)
 
@@ -468,7 +468,7 @@ def test_train_core_success(
 
 
 def test_train_with_retrieval_events_success(
-    rasa_app: SanicTestClient, default_stack_config: Text
+    rasa_app: SanicTestClient, default_stack_config: Text, tmp_path: Path
 ):
     with ExitStack() as stack:
         domain_file = stack.enter_context(
@@ -493,13 +493,12 @@ def test_train_with_retrieval_events_success(
 
     _, response = rasa_app.post("/model/train", json=payload)
     assert response.status == 200
-    assert_trained_model(response.body)
+    assert_trained_model(response.body, tmp_path)
 
 
-def assert_trained_model(response_body: bytes) -> None:
+def assert_trained_model(response_body: bytes, tmp_path: Path) -> None:
     # save model to temporary file
-    tempdir = tempfile.mkdtemp()
-    model_path = os.path.join(tempdir, "model.tar.gz")
+    model_path = str(tmp_path / "model.tar.gz")
     with open(model_path, "wb") as f:
         f.write(response_body)
 
@@ -534,7 +533,7 @@ def test_deprecation_warnings_json_payload(payload: Dict):
         rasa.server._validate_json_training_payload(payload)
 
 
-def test_train_with_yaml(rasa_app: SanicTestClient):
+def test_train_with_yaml(rasa_app: SanicTestClient, tmp_path: Path):
     training_data = """
 stories:
 - story: My story
@@ -580,7 +579,7 @@ def test_train_with_yaml(rasa_app: SanicTestClient):
     )
 
     assert response.status == 200
-    assert_trained_model(response.body)
+    assert_trained_model(response.body, tmp_path)
 
 
 def test_train_with_invalid_yaml(rasa_app: SanicTestClient):
diff --git a/tests/test_train.py b/tests/test_train.py
index 73fd2582301d..bd772e40c0c4 100644
--- a/tests/test_train.py
+++ b/tests/test_train.py
@@ -74,15 +74,18 @@ def count_temp_rasa_files(directory: Text) -> int:
 
 
 def test_train_temp_files(
-    tmp_path: Text,
+    tmp_path: Path,
     monkeypatch: MonkeyPatch,
     default_domain_path: Text,
     default_stories_file: Text,
     default_stack_config: Text,
     default_nlu_data: Text,
 ):
-    monkeypatch.setattr(tempfile, "tempdir", tmp_path)
-    output = "test_train_temp_files_models"
+    (tmp_path / "training").mkdir()
+    (tmp_path / "models").mkdir()
+
+    monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training")
+    output = str(tmp_path / "models")
 
     train(
         default_domain_path,
@@ -108,36 +111,40 @@ def test_train_temp_files(
 
 
 def test_train_core_temp_files(
-    tmp_path: Text,
+    tmp_path: Path,
     monkeypatch: MonkeyPatch,
     default_domain_path: Text,
     default_stories_file: Text,
     default_stack_config: Text,
 ):
-    monkeypatch.setattr(tempfile, "tempdir", tmp_path)
+    (tmp_path / "training").mkdir()
+    (tmp_path / "models").mkdir()
+
+    monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training")
 
     train_core(
         default_domain_path,
         default_stack_config,
         default_stories_file,
-        output="test_train_core_temp_files_models",
+        output=str(tmp_path / "models"),
     )
 
     assert count_temp_rasa_files(tempfile.tempdir) == 0
 
 
 def test_train_nlu_temp_files(
-    tmp_path: Text,
+    tmp_path: Path,
     monkeypatch: MonkeyPatch,
     default_stack_config: Text,
     default_nlu_data: Text,
 ):
-    monkeypatch.setattr(tempfile, "tempdir", tmp_path)
+    (tmp_path / "training").mkdir()
+    (tmp_path / "models").mkdir()
+
+    monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training")
 
     train_nlu(
-        default_stack_config,
-        default_nlu_data,
-        output="test_train_nlu_temp_files_models",
+        default_stack_config, default_nlu_data, output=str(tmp_path / "models"),
     )
 
     assert count_temp_rasa_files(tempfile.tempdir) == 0
@@ -150,12 +157,13 @@ def test_train_nlu_wrong_format_error_message(
     default_stack_config: Text,
     incorrect_nlu_data: Text,
 ):
-    monkeypatch.setattr(tempfile, "tempdir", tmp_path)
+    (tmp_path / "training").mkdir()
+    (tmp_path / "models").mkdir()
+
+    monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training")
 
     train_nlu(
-        default_stack_config,
-        incorrect_nlu_data,
-        output="test_train_nlu_temp_files_models",
+        default_stack_config, incorrect_nlu_data, output=str(tmp_path / "models"),
     )
 
     captured = capsys.readouterr()
@@ -168,9 +176,12 @@ def test_train_nlu_no_nlu_file_error_message(
     monkeypatch: MonkeyPatch,
     default_stack_config: Text,
 ):
-    monkeypatch.setattr(tempfile, "tempdir", tmp_path)
+    (tmp_path / "training").mkdir()
+    (tmp_path / "models").mkdir()
+
+    monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training")
 
-    train_nlu(default_stack_config, "", output="test_train_nlu_temp_files_models")
+    train_nlu(default_stack_config, "", output=str(tmp_path / "models"))
 
     captured = capsys.readouterr()
     assert "No NLU data given" in captured.out

From e9f419a377f779cfd9eff1742dc780565d713ee9 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Thu, 27 Aug 2020 01:51:51 +0200
Subject: [PATCH 30/34] added default argument

---
 rasa/core/training/interactive.py               | 2 +-
 rasa/nlu/training_data/entities_parser.py       | 2 +-
 tests/nlu/training_data/test_entities_parser.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py
index 6d7641b4145b..3ac7560369ee 100644
--- a/rasa/core/training/interactive.py
+++ b/rasa/core/training/interactive.py
@@ -1225,7 +1225,7 @@ async def _correct_entities(
     )
 
     annotation = await _ask_questions(question, conversation_id, endpoint)
-    parse_annotated = entities_parser.parse_training_example(annotation, intent=None)
+    parse_annotated = entities_parser.parse_training_example(annotation)
 
     corrected_entities = _merge_annotated_and_original_entities(
         parse_annotated, parse_original
diff --git a/rasa/nlu/training_data/entities_parser.py b/rasa/nlu/training_data/entities_parser.py
index 03ed06a6339e..02b3422a70d4 100644
--- a/rasa/nlu/training_data/entities_parser.py
+++ b/rasa/nlu/training_data/entities_parser.py
@@ -168,7 +168,7 @@ def replace_entities(training_example: Text) -> Text:
     )
 
 
-def parse_training_example(example: Text, intent: Optional[Text]) -> "Message":
+def parse_training_example(example: Text, intent: Optional[Text] = None) -> "Message":
     """Extract entities and synonyms, and convert to plain text."""
 
     entities = find_entities_in_training_example(example)
diff --git a/tests/nlu/training_data/test_entities_parser.py b/tests/nlu/training_data/test_entities_parser.py
index 7bdaec09bfe4..6573e5d5f416 100644
--- a/tests/nlu/training_data/test_entities_parser.py
+++ b/tests/nlu/training_data/test_entities_parser.py
@@ -122,7 +122,7 @@ def test_parse_training_example():
 
 
 def test_parse_empty_example():
-    message = entities_parser.parse_training_example("", intent=None)
+    message = entities_parser.parse_training_example("")
     assert message.get("intent") is None
     assert message.text == ""
 

From c95937b650c84649032011a40beb21d70eb8bb3c Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Thu, 27 Aug 2020 01:56:45 +0200
Subject: [PATCH 31/34] Update test_multi_project.py

---
 tests/importers/test_multi_project.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tests/importers/test_multi_project.py b/tests/importers/test_multi_project.py
index bdfe83f863fc..e79a43e92af9 100644
--- a/tests/importers/test_multi_project.py
+++ b/tests/importers/test_multi_project.py
@@ -204,10 +204,9 @@ def test_not_importing_not_relevant_additional_files(tmp_path: Path):
     )
 
     not_relevant_file1 = tmp_path / "data" / "another directory" / "file.yml"
-    not_relevant_file1.parent.mkdir()
+    not_relevant_file1.parent.mkdir(parents=True)
     rasa.utils.io.write_text_file("", not_relevant_file1)
     not_relevant_file2 = tmp_path / "directory" / "another_file.yml"
-    not_relevant_file2.parent.mkdir()
     rasa.utils.io.write_text_file("", not_relevant_file2)
 
     assert not selector.is_imported(str(not_relevant_file1))
@@ -249,7 +248,7 @@ async def test_only_getting_e2e_conversation_tests_if_e2e_enabled(
     utils.dump_obj_as_yaml_to_file(config_path, config)
 
     story_file = tmp_path / "bots" / "Bot A" / "data" / "stories.md"
-    story_file.parent.mkdir()
+    story_file.parent.mkdir(parents=True)
     rasa.utils.io.write_text_file(
         """
         ## story
@@ -262,9 +261,8 @@ async def test_only_getting_e2e_conversation_tests_if_e2e_enabled(
     story_test_file = (
         tmp_path / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / test_stories_filename
     )
-    story_test_file.parent.mkdir()
+    story_test_file.parent.mkdir(parents=True)
     rasa.utils.io.write_text_file(test_story, story_test_file)
-    story_test_file.write(test_story, ensure=True)
 
     selector = MultiProjectImporter(config_path)
 
@@ -289,13 +287,13 @@ def test_not_importing_e2e_conversation_tests_in_project(tmp_path: Path,):
     utils.dump_obj_as_yaml_to_file(config_path, config)
 
     story_file = tmp_path / "bots" / "Bot A" / "data" / "stories.md"
-    story_file.parent.mkdir()
+    story_file.parent.mkdir(parents=True)
     rasa.utils.io.write_text_file("""## story""", story_file)
 
     story_test_file = (
         tmp_path / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "test_stories.yml"
     )
-    story_test_file.parent.mkdir()
+    story_test_file.parent.mkdir(parents=True)
     rasa.utils.io.write_text_file("""stories:""", story_test_file)
 
     selector = MultiProjectImporter(config_path)

From 824d862463b6b6d6b9944ea57e626f6371182769 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Thu, 27 Aug 2020 09:24:35 +0200
Subject: [PATCH 32/34] fixed windows errors

---
 tests/cli/test_rasa_export.py | 6 +++++-
 tests/core/test_broker.py     | 5 +++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/cli/test_rasa_export.py b/tests/cli/test_rasa_export.py
index e2f2f56b0b24..2b3b82d8c052 100644
--- a/tests/cli/test_rasa_export.py
+++ b/tests/cli/test_rasa_export.py
@@ -6,6 +6,7 @@
 import pytest
 from _pytest.monkeypatch import MonkeyPatch
 from _pytest.pytester import RunResult
+from ruamel.yaml.scalarstring import SingleQuotedScalarString
 
 import rasa.core.utils as rasa_core_utils
 from rasa.cli import export
@@ -67,7 +68,10 @@ def test_get_event_broker_and_tracker_store_from_endpoint_config(tmp_path: Path)
     endpoints_path = write_endpoint_config_to_yaml(
         tmp_path,
         {
-            "event_broker": {"type": "sql", "db": str(tmp_path / "rasa.db")},
+            "event_broker": {
+                "type": "sql",
+                "db": SingleQuotedScalarString(str(tmp_path / "rasa.db")),
+            },
             "tracker_store": {"type": "sql"},
         },
     )
diff --git a/tests/core/test_broker.py b/tests/core/test_broker.py
index 71bbd3b976f4..3fb1ff44c2fc 100644
--- a/tests/core/test_broker.py
+++ b/tests/core/test_broker.py
@@ -123,8 +123,9 @@ def test_file_broker_from_config(tmp_path: Path):
     endpoint_config = textwrap.dedent(
         f"""
         event_broker:
-          path: "{tmp_path / 'rasa_test_event.log'}"
-          type: "file"
+          # needs to be single quotes to avoid yaml trying to parse escape chars...
+          path: '{tmp_path / 'rasa_test_event.log'}'
+          type: 'file'
     """
     )
     rasa.utils.io.write_text_file(endpoint_config, tmp_path / "endpoint.yml")

From 691ddb26f25db996015a7e110bb17a1d09b35e00 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Thu, 27 Aug 2020 11:28:19 +0200
Subject: [PATCH 33/34] trying to fix escaping issues

---
 tests/cli/test_rasa_export.py | 2 +-
 tests/core/test_broker.py     | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/cli/test_rasa_export.py b/tests/cli/test_rasa_export.py
index 2b3b82d8c052..c7a2fafef278 100644
--- a/tests/cli/test_rasa_export.py
+++ b/tests/cli/test_rasa_export.py
@@ -70,7 +70,7 @@ def test_get_event_broker_and_tracker_store_from_endpoint_config(tmp_path: Path)
         {
             "event_broker": {
                 "type": "sql",
-                "db": SingleQuotedScalarString(str(tmp_path / "rasa.db")),
+                "db": str(tmp_path / "rasa.db").replace("\\", "\\\\"),
             },
             "tracker_store": {"type": "sql"},
         },
diff --git a/tests/core/test_broker.py b/tests/core/test_broker.py
index 3fb1ff44c2fc..9450f4eaca34 100644
--- a/tests/core/test_broker.py
+++ b/tests/core/test_broker.py
@@ -120,12 +120,14 @@ def test_sql_broker_logs_to_sql_db():
 
 
 def test_file_broker_from_config(tmp_path: Path):
+    # backslashes need to be encoded (windows...) otherwise we run into unicode issues
+    path = str(tmp_path / "rasa_test_event.log").replace("\\", "\\\\")
     endpoint_config = textwrap.dedent(
         f"""
         event_broker:
-          # needs to be single quotes to avoid yaml trying to parse escape chars...
-          path: '{tmp_path / 'rasa_test_event.log'}'
-          type: 'file'
+          
+          path: "{path}"
+          type: "file"
     """
     )
     rasa.utils.io.write_text_file(endpoint_config, tmp_path / "endpoint.yml")

From ccb5f34b980e0bfd11603789d9189bd133268012 Mon Sep 17 00:00:00 2001
From: Tom Bocklisch <tom@rasa.com>
Date: Thu, 27 Aug 2020 11:34:33 +0200
Subject: [PATCH 34/34] fixed linter

---
 tests/core/test_broker.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/core/test_broker.py b/tests/core/test_broker.py
index 9450f4eaca34..bc0ef14ae537 100644
--- a/tests/core/test_broker.py
+++ b/tests/core/test_broker.py
@@ -125,7 +125,6 @@ def test_file_broker_from_config(tmp_path: Path):
     endpoint_config = textwrap.dedent(
         f"""
         event_broker:
-          
           path: "{path}"
           type: "file"
     """