Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate Markdown Training & Test Data #7495

Merged
merged 13 commits into from
Dec 11, 2020
15 changes: 15 additions & 0 deletions changelog/7495.removal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Deprecate training and test data in Markdown format. This includes:
- reading and writing of story files in Markdown format
- reading and writing of NLU data in Markdown format
- reading and writing of retrieval intent data in Markdown format

Support for Markdown data will be removed entirely in Rasa Open Source 3.0.0.

Please convert your existing Markdown data by using the commands
from the [migration guide](./migration-guide.mdx#rasa-21-to-rasa-22):

```bash
rasa data convert nlu -f yaml --data={SOURCE_DIR} --out={TARGET_DIR}
rasa data convert nlg -f yaml --data={SOURCE_DIR} --out={TARGET_DIR}
rasa data convert core -f yaml --data={SOURCE_DIR} --out={TARGET_DIR}
```
16 changes: 15 additions & 1 deletion docs/docs/migration-guide.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@ description: |
This page contains information about changes between major versions and
how you can migrate from one version to another.

## Rasa 2.1 to Rasa 2.2

### Deprecations

Training and test data in Markdown format is now deprecated. This includes:
- reading and writing of story files in Markdown format
- reading and writing of NLU data in Markdown format
- reading and writing of retrieval intent data in Markdown format

Support for Markdown data will be removed entirely in Rasa Open Source 3.0.0.

Please convert your existing Markdown data by using the commands
described [here](./migration-guide.mdx#training-data-files).

## Rasa 2.0 to Rasa 2.1

### Deprecations
Expand Down Expand Up @@ -52,7 +66,7 @@ in the [forum](https://forum.rasa.com/t/rasa-open-source-2-0-is-out-now-internal
### Training data files

As of version 2.0, the new default training data format is yaml. Markdown is still supported,
but this will be deprecated in a future release.
but this will be deprecated in Rasa Open Source 3.0.0.

You can convert existing NLU, Stories, and NLG (i.e. `responses.md`) training data
files in the Markdown format to the new YAML format using following commands:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,18 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None:

# check if source file is test stories file
if MarkdownStoryReader.is_test_stories_file(source_path):
reader = MarkdownStoryReader(is_used_for_training=False, use_e2e=True)
reader = MarkdownStoryReader(
is_used_for_training=False,
use_e2e=True,
ignore_deprecation_warning=True,
)
output_core_path = cls._generate_path_for_converted_test_data_file(
source_path, output_path
)
else:
reader = MarkdownStoryReader(is_used_for_training=False)
reader = MarkdownStoryReader(
is_used_for_training=False, ignore_deprecation_warning=True
)
output_core_path = cls.generate_path_for_converted_training_data_file(
source_path, output_path
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Dict, Text
from typing import Text

from rasa.shared.constants import UTTER_PREFIX
from rasa.shared.nlu.training_data.formats import NLGMarkdownReader
Expand Down Expand Up @@ -31,7 +31,7 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None:
source_path: Path to the training data file.
output_path: Path to the output directory.
"""
reader = NLGMarkdownReader()
reader = NLGMarkdownReader(ignore_deprecation_warning=True)
writer = RasaYAMLWriter()

output_nlg_path = cls.generate_path_for_converted_training_data_file(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None:
source_path, output_path
)

yaml_training_data = MarkdownReader().read(source_path)
yaml_training_data = MarkdownReader(ignore_deprecation_warning=True).read(
source_path
)
RasaYAMLWriter().dump(output_nlu_path, yaml_training_data)

for lookup_table in yaml_training_data.lookup_tables:
Expand Down
3 changes: 3 additions & 0 deletions rasa/shared/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
DOCS_URL_TRACKER_STORES = DOCS_BASE_URL + "/tracker-stores"
DOCS_URL_COMPONENTS = DOCS_BASE_URL + "/components"
DOCS_URL_MIGRATION_GUIDE = DOCS_BASE_URL + "/migration-guide"
DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION = (
f"{DOCS_URL_MIGRATION_GUIDE}#rasa-21-to-rasa-22"
)
DOCS_URL_TELEMETRY = DOCS_BASE_URL + "/telemetry/telemetry"
DOCS_BASE_URL_RASA_X = "https://rasa.com/docs/rasa-x"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import os
import re
from pathlib import Path
from typing import Dict, Text, List, Any, Union, Tuple
from typing import Dict, Text, List, Any, Union, Tuple, Optional

import rasa.shared.data
from rasa.shared.core.domain import Domain
from rasa.shared.nlu.constants import TEXT, INTENT_NAME_KEY
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.constants import (
Expand All @@ -14,6 +15,7 @@
LEGACY_DOCS_BASE_URL,
DEFAULT_E2E_TESTS_PATH,
DOCS_URL_STORIES,
DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
)
from rasa.shared.core.events import UserUttered
from rasa.shared.nlu.interpreter import RegexInterpreter
Expand All @@ -28,11 +30,32 @@


class MarkdownStoryReader(StoryReader):
"""Class that reads the core training data in a Markdown format"""
"""Class that reads the core training data in a Markdown format."""

def __init__(
self,
domain: Optional[Domain] = None,
template_vars: Optional[Dict] = None,
use_e2e: bool = False,
source_name: Optional[Text] = None,
is_used_for_training: bool = True,
ignore_deprecation_warning: bool = False,
) -> None:
"""Creates reader. See parent class docstring for more information."""
super().__init__(
domain, template_vars, use_e2e, source_name, is_used_for_training
)

if not ignore_deprecation_warning:
rasa.shared.utils.io.raise_deprecation_warning(
"Stories in Markdown format are deprecated and will be removed in Rasa "
"Open Source 3.0.0. Please convert your Markdown stories to the "
"new YAML format.",
docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
)

def read_from_file(self, filename: Union[Text, Path]) -> List[StoryStep]:
"""Given a md file reads the contained stories."""

try:
with open(
filename, "r", encoding=rasa.shared.utils.io.DEFAULT_ENCODING
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from pathlib import Path
from typing import List, Text, Union

from rasa.shared.constants import DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION
from ruamel import yaml

from rasa.shared.core.training_data.story_writer.story_writer import StoryWriter
from rasa.shared.core.training_data.structures import StoryStep
import rasa.shared.utils.io


class MarkdownStoryWriter(StoryWriter):
Expand Down Expand Up @@ -35,6 +37,7 @@ def dumps(
story_steps: List[StoryStep],
is_appendable: bool = False,
is_test_story: bool = False,
ignore_deprecation_warning: bool = False,
) -> Text:
"""Turns Story steps into a markdown string.

Expand All @@ -45,10 +48,19 @@ def dumps(
the existing story file.
is_test_story: Identifies if the stories should be exported in test stories
format.
ignore_deprecation_warning: `True` if printing the deprecation warning
should be suppressed.

Returns:
String with story steps in the markdown format.
Story steps in the markdown format.
"""
if not ignore_deprecation_warning:
rasa.shared.utils.io.raise_deprecation_warning(
"Stories in Markdown format are deprecated and will be removed in Rasa "
"Open Source 3.0.0. Please convert your Markdown stories to the "
"new YAML format.",
docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
)
return MarkdownStoryWriter._stories_to_md(
story_steps, is_appendable, is_test_story
)
Expand Down
36 changes: 32 additions & 4 deletions rasa/shared/nlu/training_data/formats/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from pathlib import Path
from typing import Any, Text, Optional, Tuple, Dict, Union

from rasa.shared.constants import LEGACY_DOCS_BASE_URL
from rasa.shared.constants import (
LEGACY_DOCS_BASE_URL,
DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
)
from rasa.shared.nlu.constants import TEXT
from rasa.shared.nlu.training_data.formats.readerwriter import (
TrainingDataReader,
Expand Down Expand Up @@ -38,7 +41,8 @@
class MarkdownReader(TrainingDataReader):
"""Reads markdown training data and creates a TrainingData object."""

def __init__(self) -> None:
def __init__(self, ignore_deprecation_warning: bool = False,) -> None:
"""Creates reader. See parent class docstring for more information."""
super().__init__()
self.current_title = None
self.current_section = None
Expand All @@ -47,8 +51,16 @@ def __init__(self) -> None:
self.regex_features = []
self.lookup_tables = []

if not ignore_deprecation_warning:
rasa.shared.utils.io.raise_deprecation_warning(
"NLU data in Markdown format is deprecated and will be removed in Rasa "
"Open Source 3.0.0. Please convert your Markdown NLU data to the "
"new YAML training data format.",
docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
)

def reads(self, s: Text, **kwargs: Any) -> "TrainingData":
"""Read markdown string and create TrainingData object"""
"""Read markdown string and create TrainingData object."""
s = self._strip_comments(s)
for line in s.splitlines():
line = decode_string(line.strip())
Expand Down Expand Up @@ -179,9 +191,25 @@ def is_markdown_nlu_file(filename: Union[Text, Path]) -> bool:


class MarkdownWriter(TrainingDataWriter):
"""Converts NLU data to Markdown."""

def __init__(self, ignore_deprecation_warning: bool = False,) -> None:
"""Creates writer.

Args:
ignore_deprecation_warning: `True` if deprecation warning for Markdown
format should be suppressed.
"""
if not ignore_deprecation_warning:
rasa.shared.utils.io.raise_deprecation_warning(
"NLU data in Markdown format is deprecated and will be removed in Rasa "
"Open Source 3.0.0. Please convert your Markdown NLU data to the "
"new YAML training data format.",
docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION,
)

def dumps(self, training_data: "TrainingData") -> Text:
"""Transforms a TrainingData object into a markdown string."""

md = ""
md += self._generate_training_examples_md(training_data)
md += self._generate_synonyms_md(training_data)
Expand Down
16 changes: 9 additions & 7 deletions rasa/shared/nlu/training_data/formats/markdown_nlg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any, Dict, List, Text, Union

from rasa.shared.nlu.constants import TEXT
from rasa.shared.nlu.training_data.formats import MarkdownWriter, MarkdownReader
from rasa.shared.nlu.training_data.formats.readerwriter import (
TrainingDataReader,
TrainingDataWriter,
Expand All @@ -21,16 +22,16 @@
NLG_MARKDOWN_MARKER_REGEX = re.compile(r"##\s*.*\n\*[^:]*\/.*\n\s*\t*\-.*")


class NLGMarkdownReader(TrainingDataReader):
class NLGMarkdownReader(MarkdownReader):
"""Reads markdown training data containing NLG stories and creates a TrainingData object."""

def __init__(self) -> None:
def __init__(self, ignore_deprecation_warning: bool = False) -> None:
"""Creates reader. See parent class docstring for more information."""
self.responses = {}
super(NLGMarkdownReader, self).__init__()
super().__init__(ignore_deprecation_warning=ignore_deprecation_warning)

def reads(self, s: Text, **kwargs: Any) -> "TrainingData":
"""Read markdown string and create TrainingData object"""
self.__init__()
"""Read markdown string and create TrainingData object."""
lines = s.splitlines()
self.responses = self.process_lines(lines)
return TrainingData(responses=self.responses)
Expand Down Expand Up @@ -101,10 +102,11 @@ def is_markdown_nlg_file(filename: Union[Text, Path]) -> bool:
return re.search(NLG_MARKDOWN_MARKER_REGEX, content) is not None


class NLGMarkdownWriter(TrainingDataWriter):
class NLGMarkdownWriter(MarkdownWriter):
"""Converts retrieval intent data to Markdown."""

def dumps(self, training_data: "TrainingData") -> Text:
"""Transforms the NlG part of TrainingData object into a markdown string."""

md = ""
for intent, utterances in training_data.responses.items():
md += "## \n"
Expand Down
Loading