RasaHQ · wochinge · Dec 11, 2020 · Dec 9, 2020 · Dec 9, 2020 · Dec 9, 2020
diff --git a/changelog/7495.removal.md b/changelog/7495.removal.md
@@ -0,0 +1,15 @@
+Deprecate training and test data in Markdown format. This includes:
+- reading and writing of story files in Markdown format
+- reading and writing of NLU data in Markdown format
+- reading and writing of retrieval intent data in Markdown format
+
+Support for Markdown data will be removed entirely in Rasa Open Source 3.0.0.
+
+Please convert your existing Markdown data by using the commands
+from the [migration guide](./migration-guide.mdx#rasa-21-to-rasa-22):
+
+```bash
+rasa data convert nlu -f yaml --data={SOURCE_DIR} --out={TARGET_DIR}
+rasa data convert nlg -f yaml --data={SOURCE_DIR} --out={TARGET_DIR}
+rasa data convert core -f yaml --data={SOURCE_DIR} --out={TARGET_DIR}
+``` 
diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx
@@ -10,6 +10,20 @@ description: |
 This page contains information about changes between major versions and
 how you can migrate from one version to another.
 
+## Rasa 2.1 to Rasa 2.2
+
+### Deprecations
+
+Training and test data in Markdown format is now deprecated. This includes:
+- reading and writing of story files in Markdown format
+- reading and writing of NLU data in Markdown format
+- reading and writing of retrieval intent data in Markdown format
+
+Support for Markdown data will be removed entirely in Rasa Open Source 3.0.0.
+
+Please convert your existing Markdown data by using the commands
+described [here](./migration-guide.mdx#training-data-files).
+
 ## Rasa 2.0 to Rasa 2.1
 
 ### Deprecations
@@ -52,7 +66,7 @@ in the [forum](https://forum.rasa.com/t/rasa-open-source-2-0-is-out-now-internal
 ### Training data files
 
 As of version 2.0, the new default training data format is yaml. Markdown is still supported,
-but this will be deprecated in a future release.
+but this will be deprecated in Rasa Open Source 3.0.0.
 
 You can convert existing NLU, Stories, and NLG (i.e. `responses.md`) training data
 files in the Markdown format to the new YAML format using following commands:

diff --git a/rasa/core/training/converters/story_markdown_to_yaml_converter.py b/rasa/core/training/converters/story_markdown_to_yaml_converter.py
@@ -41,12 +41,18 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None:
 
         # check if source file is test stories file
         if MarkdownStoryReader.is_test_stories_file(source_path):
-            reader = MarkdownStoryReader(is_used_for_training=False, use_e2e=True)
+            reader = MarkdownStoryReader(
+                is_used_for_training=False,
+                use_e2e=True,
+                ignore_deprecation_warning=True,
+            )
             output_core_path = cls._generate_path_for_converted_test_data_file(
                 source_path, output_path
             )
         else:
-            reader = MarkdownStoryReader(is_used_for_training=False)
+            reader = MarkdownStoryReader(
+                is_used_for_training=False, ignore_deprecation_warning=True
+            )
             output_core_path = cls.generate_path_for_converted_training_data_file(
                 source_path, output_path
             )

diff --git a/rasa/nlu/training_data/converters/nlg_markdown_to_yaml_converter.py b/rasa/nlu/training_data/converters/nlg_markdown_to_yaml_converter.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Dict, Text
+from typing import Text
 
 from rasa.shared.constants import UTTER_PREFIX
 from rasa.shared.nlu.training_data.formats import NLGMarkdownReader
@@ -31,7 +31,7 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None:
             source_path: Path to the training data file.
             output_path: Path to the output directory.
         """
-        reader = NLGMarkdownReader()
+        reader = NLGMarkdownReader(ignore_deprecation_warning=True)
         writer = RasaYAMLWriter()
 
         output_nlg_path = cls.generate_path_for_converted_training_data_file(

diff --git a/rasa/nlu/training_data/converters/nlu_markdown_to_yaml_converter.py b/rasa/nlu/training_data/converters/nlu_markdown_to_yaml_converter.py
@@ -35,7 +35,9 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None:
             source_path, output_path
         )
 
-        yaml_training_data = MarkdownReader().read(source_path)
+        yaml_training_data = MarkdownReader(ignore_deprecation_warning=True).read(
+            source_path
+        )
         RasaYAMLWriter().dump(output_nlu_path, yaml_training_data)
 
         for lookup_table in yaml_training_data.lookup_tables:

diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py
@@ -19,6 +19,9 @@
 DOCS_URL_TRACKER_STORES = DOCS_BASE_URL + "/tracker-stores"
 DOCS_URL_COMPONENTS = DOCS_BASE_URL + "/components"
 DOCS_URL_MIGRATION_GUIDE = DOCS_BASE_URL + "/migration-guide"
+DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION = (
+    f"{DOCS_URL_MIGRATION_GUIDE}#rasa-21-to-rasa-22"
+)
 DOCS_URL_TELEMETRY = DOCS_BASE_URL + "/telemetry/telemetry"
 DOCS_BASE_URL_RASA_X = "https://rasa.com/docs/rasa-x"
 

diff --git a/rasa/shared/core/training_data/story_reader/markdown_story_reader.py b/rasa/shared/core/training_data/story_reader/markdown_story_reader.py
@@ -3,9 +3,10 @@
 import os
 import re
 from pathlib import Path
-from typing import Dict, Text, List, Any, Union, Tuple
+from typing import Dict, Text, List, Any, Union, Tuple, Optional
 
 import rasa.shared.data
+from rasa.shared.core.domain import Domain
 from rasa.shared.nlu.constants import TEXT, INTENT_NAME_KEY
 from rasa.shared.nlu.training_data.message import Message
 from rasa.shared.constants import (
@@ -14,6 +15,7 @@
     LEGACY_DOCS_BASE_URL,
     DEFAULT_E2E_TESTS_PATH,
     DOCS_URL_STORIES,
+    DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
 )
 from rasa.shared.core.events import UserUttered
 from rasa.shared.nlu.interpreter import RegexInterpreter
@@ -28,11 +30,32 @@
 
 
 class MarkdownStoryReader(StoryReader):
-    """Class that reads the core training data in a Markdown format"""
+    """Class that reads the core training data in a Markdown format."""
+
+    def __init__(
+        self,
+        domain: Optional[Domain] = None,
+        template_vars: Optional[Dict] = None,
+        use_e2e: bool = False,
+        source_name: Optional[Text] = None,
+        is_used_for_training: bool = True,
+        ignore_deprecation_warning: bool = False,
+    ) -> None:
+        """Creates reader. See parent class docstring for more information."""
+        super().__init__(
+            domain, template_vars, use_e2e, source_name, is_used_for_training
+        )
+
+        if not ignore_deprecation_warning:
+            rasa.shared.utils.io.raise_deprecation_warning(
+                "Stories in Markdown format are deprecated and will be removed in Rasa "
+                "Open Source 3.0.0. Please convert your Markdown stories to the "
+                "new YAML format.",
+                docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
+            )
 
     def read_from_file(self, filename: Union[Text, Path]) -> List[StoryStep]:
         """Given a md file reads the contained stories."""
-
         try:
             with open(
                 filename, "r", encoding=rasa.shared.utils.io.DEFAULT_ENCODING

diff --git a/rasa/shared/core/training_data/story_writer/markdown_story_writer.py b/rasa/shared/core/training_data/story_writer/markdown_story_writer.py
@@ -1,10 +1,12 @@
 from pathlib import Path
 from typing import List, Text, Union
 
+from rasa.shared.constants import DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION
 from ruamel import yaml
 
 from rasa.shared.core.training_data.story_writer.story_writer import StoryWriter
 from rasa.shared.core.training_data.structures import StoryStep
+import rasa.shared.utils.io
 
 
 class MarkdownStoryWriter(StoryWriter):
@@ -35,6 +37,7 @@ def dumps(
         story_steps: List[StoryStep],
         is_appendable: bool = False,
         is_test_story: bool = False,
+        ignore_deprecation_warning: bool = False,
     ) -> Text:
         """Turns Story steps into a markdown string.
 
@@ -45,10 +48,19 @@ def dumps(
                            the existing story file.
             is_test_story: Identifies if the stories should be exported in test stories
                            format.
+            ignore_deprecation_warning: `True` if printing the deprecation warning
+                should be suppressed.
 
         Returns:
-            String with story steps in the markdown format.
+            Story steps in the markdown format.
         """
+        if not ignore_deprecation_warning:
+            rasa.shared.utils.io.raise_deprecation_warning(
+                "Stories in Markdown format are deprecated and will be removed in Rasa "
+                "Open Source 3.0.0. Please convert your Markdown stories to the "
+                "new YAML format.",
+                docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
+            )
         return MarkdownStoryWriter._stories_to_md(
             story_steps, is_appendable, is_test_story
         )

diff --git a/rasa/shared/nlu/training_data/formats/markdown.py b/rasa/shared/nlu/training_data/formats/markdown.py
@@ -5,7 +5,10 @@
 from pathlib import Path
 from typing import Any, Text, Optional, Tuple, Dict, Union
 
-from rasa.shared.constants import LEGACY_DOCS_BASE_URL
+from rasa.shared.constants import (
+    LEGACY_DOCS_BASE_URL,
+    DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
+)
 from rasa.shared.nlu.constants import TEXT
 from rasa.shared.nlu.training_data.formats.readerwriter import (
     TrainingDataReader,
@@ -38,7 +41,8 @@
 class MarkdownReader(TrainingDataReader):
     """Reads markdown training data and creates a TrainingData object."""
 
-    def __init__(self) -> None:
+    def __init__(self, ignore_deprecation_warning: bool = False,) -> None:
+        """Creates reader. See parent class docstring for more information."""
         super().__init__()
         self.current_title = None
         self.current_section = None
@@ -47,8 +51,16 @@ def __init__(self) -> None:
         self.regex_features = []
         self.lookup_tables = []
 
+        if not ignore_deprecation_warning:
+            rasa.shared.utils.io.raise_deprecation_warning(
+                "NLU data in Markdown format is deprecated and will be removed in Rasa "
+                "Open Source 3.0.0. Please convert your Markdown NLU data to the "
+                "new YAML training data format.",
+                docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
+            )
+
     def reads(self, s: Text, **kwargs: Any) -> "TrainingData":
-        """Read markdown string and create TrainingData object"""
+        """Read markdown string and create TrainingData object."""
         s = self._strip_comments(s)
         for line in s.splitlines():
             line = decode_string(line.strip())
@@ -179,9 +191,25 @@ def is_markdown_nlu_file(filename: Union[Text, Path]) -> bool:
 
 
 class MarkdownWriter(TrainingDataWriter):
+    """Converts NLU data to Markdown."""
+
+    def __init__(self, ignore_deprecation_warning: bool = False,) -> None:
+        """Creates writer.
+
+        Args:
+            ignore_deprecation_warning: `True` if deprecation warning for Markdown
+                format should be suppressed.
+        """
+        if not ignore_deprecation_warning:
+            rasa.shared.utils.io.raise_deprecation_warning(
+                "NLU data in Markdown format is deprecated and will be removed in Rasa "
+                "Open Source 3.0.0. Please convert your Markdown NLU data to the "
+                "new YAML training data format.",
+                docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
+            )
+
     def dumps(self, training_data: "TrainingData") -> Text:
         """Transforms a TrainingData object into a markdown string."""
-
         md = ""
         md += self._generate_training_examples_md(training_data)
         md += self._generate_synonyms_md(training_data)

diff --git a/rasa/shared/nlu/training_data/formats/markdown_nlg.py b/rasa/shared/nlu/training_data/formats/markdown_nlg.py
@@ -4,6 +4,7 @@
 from typing import Any, Dict, List, Text, Union
 
 from rasa.shared.nlu.constants import TEXT
+from rasa.shared.nlu.training_data.formats import MarkdownWriter, MarkdownReader
 from rasa.shared.nlu.training_data.formats.readerwriter import (
     TrainingDataReader,
     TrainingDataWriter,
@@ -21,16 +22,16 @@
 NLG_MARKDOWN_MARKER_REGEX = re.compile(r"##\s*.*\n\*[^:]*\/.*\n\s*\t*\-.*")
 
 
-class NLGMarkdownReader(TrainingDataReader):
+class NLGMarkdownReader(MarkdownReader):
     """Reads markdown training data containing NLG stories and creates a TrainingData object."""
 
-    def __init__(self) -> None:
+    def __init__(self, ignore_deprecation_warning: bool = False) -> None:
+        """Creates reader. See parent class docstring for more information."""
         self.responses = {}
-        super(NLGMarkdownReader, self).__init__()
+        super().__init__(ignore_deprecation_warning=ignore_deprecation_warning)
 
     def reads(self, s: Text, **kwargs: Any) -> "TrainingData":
-        """Read markdown string and create TrainingData object"""
-        self.__init__()
+        """Read markdown string and create TrainingData object."""
         lines = s.splitlines()
         self.responses = self.process_lines(lines)
         return TrainingData(responses=self.responses)
@@ -101,10 +102,11 @@ def is_markdown_nlg_file(filename: Union[Text, Path]) -> bool:
         return re.search(NLG_MARKDOWN_MARKER_REGEX, content) is not None
 
 
-class NLGMarkdownWriter(TrainingDataWriter):
+class NLGMarkdownWriter(MarkdownWriter):
+    """Converts retrieval intent data to Markdown."""
+
     def dumps(self, training_data: "TrainingData") -> Text:
         """Transforms the NlG part of TrainingData object into a markdown string."""
-
         md = ""
         for intent, utterances in training_data.responses.items():
             md += "## \n"