From 6e00a119b8314314054ffcfb7fb9300a28371f99 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 10:53:26 +0100 Subject: [PATCH 01/12] deprecate markdown story reading --- rasa/shared/constants.py | 3 +++ .../story_reader/markdown_story_reader.py | 26 ++++++++++++++++++- .../test_markdown_story_reader.py | 12 +++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py index 4c240bef3de0..a17682a45382 100644 --- a/rasa/shared/constants.py +++ b/rasa/shared/constants.py @@ -19,6 +19,9 @@ DOCS_URL_TRACKER_STORES = DOCS_BASE_URL + "/tracker-stores" DOCS_URL_COMPONENTS = DOCS_BASE_URL + "/components" DOCS_URL_MIGRATION_GUIDE = DOCS_BASE_URL + "/migration-guide" +DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION = ( + f"{DOCS_URL_MIGRATION_GUIDE}#training-data-files" +) DOCS_URL_TELEMETRY = DOCS_BASE_URL + "/telemetry/telemetry" DOCS_BASE_URL_RASA_X = "https://rasa.com/docs/rasa-x" diff --git a/rasa/shared/core/training_data/story_reader/markdown_story_reader.py b/rasa/shared/core/training_data/story_reader/markdown_story_reader.py index 4be37a33b61e..d8a4b933b58d 100644 --- a/rasa/shared/core/training_data/story_reader/markdown_story_reader.py +++ b/rasa/shared/core/training_data/story_reader/markdown_story_reader.py @@ -3,9 +3,10 @@ import os import re from pathlib import Path -from typing import Dict, Text, List, Any, Union, Tuple +from typing import Dict, Text, List, Any, Union, Tuple, Optional import rasa.shared.data +from rasa.shared.core.domain import Domain from rasa.shared.nlu.constants import TEXT, INTENT_NAME_KEY from rasa.shared.nlu.training_data.message import Message from rasa.shared.constants import ( @@ -14,6 +15,7 @@ LEGACY_DOCS_BASE_URL, DEFAULT_E2E_TESTS_PATH, DOCS_URL_STORIES, + DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, ) from rasa.shared.core.events import UserUttered from rasa.shared.nlu.interpreter import RegexInterpreter @@ -30,6 +32,28 @@ class MarkdownStoryReader(StoryReader): """Class that reads the core training data in a Markdown format""" + def __init__( + self, + domain: Optional[Domain] = None, + template_vars: Optional[Dict] = None, + use_e2e: bool = False, + source_name: Optional[Text] = None, + is_used_for_training: bool = True, + ignore_deprecation_warning: bool = False, + ) -> None: + """Creates reader. See parent class docstring for more information.""" + super().__init__( + domain, template_vars, use_e2e, source_name, is_used_for_training + ) + + if not ignore_deprecation_warning: + rasa.shared.utils.io.raise_deprecation_warning( + "Markdown Training data is deprecated and will be removed in Rasa Open " + "Source 3.0.0. Please convert your current training data to the " + "YAML training data format.", + docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, + ) + def read_from_file(self, filename: Union[Text, Path]) -> List[StoryStep]: """Given a md file reads the contained stories.""" diff --git a/tests/shared/core/training_data/story_reader/test_markdown_story_reader.py b/tests/shared/core/training_data/story_reader/test_markdown_story_reader.py index 2252c72b8c52..b659ecc29e4e 100644 --- a/tests/shared/core/training_data/story_reader/test_markdown_story_reader.py +++ b/tests/shared/core/training_data/story_reader/test_markdown_story_reader.py @@ -418,3 +418,15 @@ def test_invalid_end_to_end_format(line: Text): with pytest.raises(ValueError): # noinspection PyProtectedMember _ = reader.parse_e2e_message(line) + + +def test_markdown_deprecation(): + with pytest.warns(FutureWarning): + MarkdownStoryReader() + + +def test_skip_markdown_deprecation(): + with pytest.warns(None) as warnings: + MarkdownStoryReader(ignore_deprecation_warning=True) + + assert not warnings From 5a3ffd95dae25897f1d3378558db0983201aa0e5 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 10:54:27 +0100 Subject: [PATCH 02/12] deprecate NLU data reading / writing --- .../nlu/training_data/formats/markdown.py | 25 +++++++++++++++-- .../nlu/training_data/formats/markdown_nlg.py | 12 ++++++--- .../training_data/formats/test_markdown.py | 24 +++++++++++++++++ .../formats/test_markdown_nlg.py | 27 ++++++++++++++++++- 4 files changed, 81 insertions(+), 7 deletions(-) diff --git a/rasa/shared/nlu/training_data/formats/markdown.py b/rasa/shared/nlu/training_data/formats/markdown.py index e40409c0d4bd..21264341c3af 100644 --- a/rasa/shared/nlu/training_data/formats/markdown.py +++ b/rasa/shared/nlu/training_data/formats/markdown.py @@ -5,7 +5,10 @@ from pathlib import Path from typing import Any, Text, Optional, Tuple, Dict, Union -from rasa.shared.constants import LEGACY_DOCS_BASE_URL +from rasa.shared.constants import ( + LEGACY_DOCS_BASE_URL, + DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, +) from rasa.shared.nlu.constants import TEXT from rasa.shared.nlu.training_data.formats.readerwriter import ( TrainingDataReader, @@ -38,7 +41,8 @@ class MarkdownReader(TrainingDataReader): """Reads markdown training data and creates a TrainingData object.""" - def __init__(self) -> None: + def __init__(self, ignore_deprecation_warning: bool = False,) -> None: + """Creates reader. See parent class docstring for more information.""" super().__init__() self.current_title = None self.current_section = None @@ -47,6 +51,14 @@ def __init__(self) -> None: self.regex_features = [] self.lookup_tables = [] + if not ignore_deprecation_warning: + rasa.shared.utils.io.raise_deprecation_warning( + "Markdown Training data is deprecated and will be removed in Rasa Open " + "Source 3.0.0. Please convert your current training data to the " + "YAML training data format.", + docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, + ) + def reads(self, s: Text, **kwargs: Any) -> "TrainingData": """Read markdown string and create TrainingData object""" s = self._strip_comments(s) @@ -179,6 +191,15 @@ def is_markdown_nlu_file(filename: Union[Text, Path]) -> bool: class MarkdownWriter(TrainingDataWriter): + def __init__(self, ignore_deprecation_warning: bool = False,) -> None: + if not ignore_deprecation_warning: + rasa.shared.utils.io.raise_deprecation_warning( + "Markdown Training data is deprecated and will be removed in Rasa Open " + "Source 3.0.0. Please convert your current training data to the " + "YAML training data format.", + docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, + ) + def dumps(self, training_data: "TrainingData") -> Text: """Transforms a TrainingData object into a markdown string.""" diff --git a/rasa/shared/nlu/training_data/formats/markdown_nlg.py b/rasa/shared/nlu/training_data/formats/markdown_nlg.py index b3c683538f0a..f64eb58d1fbb 100644 --- a/rasa/shared/nlu/training_data/formats/markdown_nlg.py +++ b/rasa/shared/nlu/training_data/formats/markdown_nlg.py @@ -4,6 +4,7 @@ from typing import Any, Dict, List, Text, Union from rasa.shared.nlu.constants import TEXT +from rasa.shared.nlu.training_data.formats import MarkdownWriter, MarkdownReader from rasa.shared.nlu.training_data.formats.readerwriter import ( TrainingDataReader, TrainingDataWriter, @@ -21,12 +22,15 @@ NLG_MARKDOWN_MARKER_REGEX = re.compile(r"##\s*.*\n\*[^:]*\/.*\n\s*\t*\-.*") -class NLGMarkdownReader(TrainingDataReader): +class NLGMarkdownReader(MarkdownReader): """Reads markdown training data containing NLG stories and creates a TrainingData object.""" - def __init__(self) -> None: + def __init__(self, ignore_deprecation_warning: bool = False) -> None: + """Creates reader. See parent class docstring for more information.""" self.responses = {} - super(NLGMarkdownReader, self).__init__() + super(NLGMarkdownReader, self).__init__( + ignore_deprecation_warning=ignore_deprecation_warning + ) def reads(self, s: Text, **kwargs: Any) -> "TrainingData": """Read markdown string and create TrainingData object""" @@ -101,7 +105,7 @@ def is_markdown_nlg_file(filename: Union[Text, Path]) -> bool: return re.search(NLG_MARKDOWN_MARKER_REGEX, content) is not None -class NLGMarkdownWriter(TrainingDataWriter): +class NLGMarkdownWriter(MarkdownWriter): def dumps(self, training_data: "TrainingData") -> Text: """Transforms the NlG part of TrainingData object into a markdown string.""" diff --git a/tests/shared/nlu/training_data/formats/test_markdown.py b/tests/shared/nlu/training_data/formats/test_markdown.py index fa8e1f069dbb..582a16bf5438 100644 --- a/tests/shared/nlu/training_data/formats/test_markdown.py +++ b/tests/shared/nlu/training_data/formats/test_markdown.py @@ -229,3 +229,27 @@ def test_dump_entities(entity: Dict[Text, Any], expected_output: Text): training_data_object = RasaReader().read_from_json(training_data_json) md_dump = MarkdownWriter().dumps(training_data_object) assert md_dump.splitlines()[1] == expected_output + + +def test_markdown_reading_deprecation(): + with pytest.warns(FutureWarning): + MarkdownReader() + + +def test_skip_markdown_reading_deprecation(): + with pytest.warns(None) as warnings: + MarkdownReader(ignore_deprecation_warning=True) + + assert not warnings + + +def test_markdown_writing_deprecation(): + with pytest.warns(FutureWarning): + MarkdownWriter() + + +def test_skip_markdown_writing_deprecation(): + with pytest.warns(None) as warnings: + MarkdownWriter(ignore_deprecation_warning=True) + + assert not warnings diff --git a/tests/shared/nlu/training_data/formats/test_markdown_nlg.py b/tests/shared/nlu/training_data/formats/test_markdown_nlg.py index 96eac17736d6..a4467b4a229f 100644 --- a/tests/shared/nlu/training_data/formats/test_markdown_nlg.py +++ b/tests/shared/nlu/training_data/formats/test_markdown_nlg.py @@ -1,4 +1,5 @@ -from rasa.shared.nlu.training_data.formats import NLGMarkdownReader +import pytest +from rasa.shared.nlu.training_data.formats import NLGMarkdownReader, NLGMarkdownWriter def test_markdow_nlg_read_newlines(): @@ -13,3 +14,27 @@ def test_markdow_nlg_read_newlines(): assert result.responses == { "faq/ask_something": [{"text": "Super answer in 2\nlines"}] } + + +def test_markdown_reading_deprecation(): + with pytest.warns(FutureWarning): + NLGMarkdownReader() + + +def test_skip_markdown_reading_deprecation(): + with pytest.warns(None) as warnings: + NLGMarkdownReader(ignore_deprecation_warning=True) + + assert not warnings + + +def test_markdown_writing_deprecation(): + with pytest.warns(FutureWarning): + NLGMarkdownWriter() + + +def test_skip_markdown_writing_deprecation(): + with pytest.warns(None) as warnings: + NLGMarkdownWriter(ignore_deprecation_warning=True) + + assert not warnings From 98071aaeba43439b5f097f44154f79f3287766f8 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 10:58:08 +0100 Subject: [PATCH 03/12] deprecate markdown story writing --- .../story_writer/markdown_story_writer.py | 14 +++++++++++++- .../story_reader/test_markdown_story_reader.py | 16 ++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/rasa/shared/core/training_data/story_writer/markdown_story_writer.py b/rasa/shared/core/training_data/story_writer/markdown_story_writer.py index 8a34684b3dae..d7f2e27b8a16 100644 --- a/rasa/shared/core/training_data/story_writer/markdown_story_writer.py +++ b/rasa/shared/core/training_data/story_writer/markdown_story_writer.py @@ -1,10 +1,12 @@ from pathlib import Path from typing import List, Text, Union +from rasa.shared.constants import DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION from ruamel import yaml from rasa.shared.core.training_data.story_writer.story_writer import StoryWriter from rasa.shared.core.training_data.structures import StoryStep +import rasa.shared.utils.io class MarkdownStoryWriter(StoryWriter): @@ -35,6 +37,7 @@ def dumps( story_steps: List[StoryStep], is_appendable: bool = False, is_test_story: bool = False, + ignore_deprecation_warning: bool = False, ) -> Text: """Turns Story steps into a markdown string. @@ -45,10 +48,19 @@ def dumps( the existing story file. is_test_story: Identifies if the stories should be exported in test stories format. + ignore_deprecation_warning: `True` if printing the deprecation warning + should be suppressed. Returns: - String with story steps in the markdown format. + Story steps in the markdown format. """ + if not ignore_deprecation_warning: + rasa.shared.utils.io.raise_deprecation_warning( + "Markdown Training data is deprecated and will be removed in Rasa Open " + "Source 3.0.0. Please convert your current training data to the " + "YAML training data format.", + docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, + ) return MarkdownStoryWriter._stories_to_md( story_steps, is_appendable, is_test_story ) diff --git a/tests/shared/core/training_data/story_reader/test_markdown_story_reader.py b/tests/shared/core/training_data/story_reader/test_markdown_story_reader.py index b659ecc29e4e..8160472808d9 100644 --- a/tests/shared/core/training_data/story_reader/test_markdown_story_reader.py +++ b/tests/shared/core/training_data/story_reader/test_markdown_story_reader.py @@ -420,13 +420,25 @@ def test_invalid_end_to_end_format(line: Text): _ = reader.parse_e2e_message(line) -def test_markdown_deprecation(): +def test_markdown_reading_deprecation(): with pytest.warns(FutureWarning): MarkdownStoryReader() -def test_skip_markdown_deprecation(): +def test_skip_markdown_reading_deprecation(): with pytest.warns(None) as warnings: MarkdownStoryReader(ignore_deprecation_warning=True) assert not warnings + + +def test_markdown_writing_deprecation(): + with pytest.warns(FutureWarning): + MarkdownStoryWriter().dumps([]) + + +def test_skip_markdown_writing_deprecation(): + with pytest.warns(None) as warnings: + MarkdownStoryWriter.dumps([], ignore_deprecation_warning=True) + + assert not warnings From 270b1cea9362f17e020fe3290a4d93132efee567 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 11:03:13 +0100 Subject: [PATCH 04/12] skip md deprecation warning for story converting --- .../story_markdown_to_yaml_converter.py | 10 ++++++++-- .../test_story_markdown_to_yaml_converter.py | 18 ++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/rasa/core/training/converters/story_markdown_to_yaml_converter.py b/rasa/core/training/converters/story_markdown_to_yaml_converter.py index ff51d90d4593..4b9200cd6ccb 100644 --- a/rasa/core/training/converters/story_markdown_to_yaml_converter.py +++ b/rasa/core/training/converters/story_markdown_to_yaml_converter.py @@ -41,12 +41,18 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None: # check if source file is test stories file if MarkdownStoryReader.is_test_stories_file(source_path): - reader = MarkdownStoryReader(is_used_for_training=False, use_e2e=True) + reader = MarkdownStoryReader( + is_used_for_training=False, + use_e2e=True, + ignore_deprecation_warning=True, + ) output_core_path = cls._generate_path_for_converted_test_data_file( source_path, output_path ) else: - reader = MarkdownStoryReader(is_used_for_training=False) + reader = MarkdownStoryReader( + is_used_for_training=False, ignore_deprecation_warning=True + ) output_core_path = cls.generate_path_for_converted_training_data_file( source_path, output_path ) diff --git a/tests/core/training/converters/test_story_markdown_to_yaml_converter.py b/tests/core/training/converters/test_story_markdown_to_yaml_converter.py index 04f9a30dd2a0..2487c82108d7 100644 --- a/tests/core/training/converters/test_story_markdown_to_yaml_converter.py +++ b/tests/core/training/converters/test_story_markdown_to_yaml_converter.py @@ -51,9 +51,12 @@ async def test_stories_are_converted(tmpdir: Path): with open(training_data_file, "w") as f: f.write(simple_story_md) - await StoryMarkdownToYamlConverter().convert_and_write( - training_data_file, converted_data_folder - ) + with pytest.warns(None) as warnings: + await StoryMarkdownToYamlConverter().convert_and_write( + training_data_file, converted_data_folder + ) + + assert not warnings assert len(os.listdir(converted_data_folder)) == 1 @@ -95,9 +98,12 @@ async def test_test_stories(tmpdir: Path): with open(test_data_file, "w") as f: f.write(simple_story_md) - await StoryMarkdownToYamlConverter().convert_and_write( - test_data_file, converted_data_folder - ) + with pytest.warns(None) as warnings: + await StoryMarkdownToYamlConverter().convert_and_write( + test_data_file, converted_data_folder + ) + + assert not warnings assert len(os.listdir(converted_data_folder)) == 1 From 97414f689bab8f04fffe6daa4125e5930c512bc6 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 11:03:30 +0100 Subject: [PATCH 05/12] replace `tmpdir` with `tmp_path` as `tmpdir` has weird type --- .../test_story_markdown_to_yaml_converter.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/core/training/converters/test_story_markdown_to_yaml_converter.py b/tests/core/training/converters/test_story_markdown_to_yaml_converter.py index 2487c82108d7..afd19ff64b34 100644 --- a/tests/core/training/converters/test_story_markdown_to_yaml_converter.py +++ b/tests/core/training/converters/test_story_markdown_to_yaml_converter.py @@ -32,11 +32,11 @@ def test_converter_filters_correct_files(training_data_file: Text, should_filter ) -async def test_stories_are_converted(tmpdir: Path): - converted_data_folder = tmpdir / "converted_data" +async def test_stories_are_converted(tmp_path: Path): + converted_data_folder = tmp_path / "converted_data" os.mkdir(converted_data_folder) - training_data_folder = tmpdir / "data/core" + training_data_folder = tmp_path / "data/core" os.makedirs(training_data_folder, exist_ok=True) training_data_file = Path(training_data_folder / "stories.md") @@ -79,11 +79,11 @@ async def test_stories_are_converted(tmpdir: Path): ) -async def test_test_stories(tmpdir: Path): - converted_data_folder = tmpdir / "converted_data" +async def test_test_stories(tmp_path: Path): + converted_data_folder = tmp_path / "converted_data" os.mkdir(converted_data_folder) - test_data_folder = tmpdir / "tests" + test_data_folder = tmp_path / "tests" os.makedirs(test_data_folder, exist_ok=True) test_data_file = Path(test_data_folder / "test_stories.md") @@ -124,11 +124,11 @@ async def test_test_stories(tmpdir: Path): ) -async def test_test_stories_conversion_response_key(tmpdir: Path): - converted_data_folder = tmpdir / "converted_data" +async def test_test_stories_conversion_response_key(tmp_path: Path): + converted_data_folder = tmp_path / "converted_data" os.mkdir(converted_data_folder) - test_data_folder = tmpdir / "tests" + test_data_folder = tmp_path / "tests" os.makedirs(test_data_folder, exist_ok=True) test_data_file = Path(test_data_folder / "test_stories.md") @@ -161,11 +161,11 @@ async def test_test_stories_conversion_response_key(tmpdir: Path): ) -async def test_stories_conversion_response_key(tmpdir: Path): - converted_data_folder = tmpdir / "converted_data" +async def test_stories_conversion_response_key(tmp_path: Path): + converted_data_folder = tmp_path / "converted_data" os.mkdir(converted_data_folder) - training_data_folder = tmpdir / "data/core" + training_data_folder = tmp_path / "data/core" os.makedirs(training_data_folder, exist_ok=True) training_data_file = Path(training_data_folder / "stories.md") From aa9c3f66aa3064b96a6bf0a3b93d6cf1dee430d0 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 11:06:39 +0100 Subject: [PATCH 06/12] simplify --- .../test_nlg_markdown_to_yaml_converter.py | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/tests/nlu/training_data/converters/test_nlg_markdown_to_yaml_converter.py b/tests/nlu/training_data/converters/test_nlg_markdown_to_yaml_converter.py index f2a47c8e7d90..781109302301 100644 --- a/tests/nlu/training_data/converters/test_nlg_markdown_to_yaml_converter.py +++ b/tests/nlu/training_data/converters/test_nlg_markdown_to_yaml_converter.py @@ -23,12 +23,12 @@ def test_converter_filters_correct_files(training_data_file: Text, should_filter assert should_filter == NLGMarkdownToYamlConverter.filter(Path(training_data_file)) -async def test_nlu_intents_are_converted(tmpdir: Path): - converted_data_folder = tmpdir / "converted_data" - os.mkdir(converted_data_folder) +async def test_nlu_intents_are_converted(tmp_path: Path): + converted_data_folder = tmp_path / "converted_data" + converted_data_folder.mkdir() - training_data_folder = tmpdir / "data" - os.makedirs(training_data_folder, exist_ok=True) + training_data_folder = tmp_path / "data" + training_data_folder.mkdir() training_data_file = Path(training_data_folder) / "responses.md" simple_nlg_md = ( @@ -42,8 +42,7 @@ async def test_nlu_intents_are_converted(tmpdir: Path): "- We're located in the world\n\n" ) - with open(training_data_file, "w") as f: - f.write(simple_nlg_md) + training_data_file.write_text(simple_nlg_md) await NLGMarkdownToYamlConverter().convert_and_write( training_data_file, converted_data_folder @@ -51,13 +50,13 @@ async def test_nlu_intents_are_converted(tmpdir: Path): assert len(os.listdir(converted_data_folder)) == 1 - with open(f"{converted_data_folder}/responses_converted.yml", "r") as f: - content = f.read() - assert content == ( - f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' - "responses:\n" - " utter_chitchat/ask_name:\n" - " - text: my name is Sara, Rasa's documentation bot!\n" - " utter_faq/ask_location:\n" - " - text: We're located in the world\n" - ) + converted_responses = converted_data_folder / "responses_converted.yml" + content = converted_responses.read_text() + assert content == ( + f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' + "responses:\n" + " utter_chitchat/ask_name:\n" + " - text: my name is Sara, Rasa's documentation bot!\n" + " utter_faq/ask_location:\n" + " - text: We're located in the world\n" + ) From 619ae8cac0c43fe1a967e2acf2da2268f75fc149 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 11:14:12 +0100 Subject: [PATCH 07/12] skip markdown deprecation for converting response selector things --- .../converters/nlg_markdown_to_yaml_converter.py | 4 ++-- rasa/shared/nlu/training_data/formats/markdown_nlg.py | 5 +---- .../converters/test_nlg_markdown_to_yaml_converter.py | 9 ++++++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rasa/nlu/training_data/converters/nlg_markdown_to_yaml_converter.py b/rasa/nlu/training_data/converters/nlg_markdown_to_yaml_converter.py index b0eb7255643e..34d122cfc863 100644 --- a/rasa/nlu/training_data/converters/nlg_markdown_to_yaml_converter.py +++ b/rasa/nlu/training_data/converters/nlg_markdown_to_yaml_converter.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, Text +from typing import Text from rasa.shared.constants import UTTER_PREFIX from rasa.shared.nlu.training_data.formats import NLGMarkdownReader @@ -31,7 +31,7 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None: source_path: Path to the training data file. output_path: Path to the output directory. """ - reader = NLGMarkdownReader() + reader = NLGMarkdownReader(ignore_deprecation_warning=True) writer = RasaYAMLWriter() output_nlg_path = cls.generate_path_for_converted_training_data_file( diff --git a/rasa/shared/nlu/training_data/formats/markdown_nlg.py b/rasa/shared/nlu/training_data/formats/markdown_nlg.py index f64eb58d1fbb..22dccff66ddf 100644 --- a/rasa/shared/nlu/training_data/formats/markdown_nlg.py +++ b/rasa/shared/nlu/training_data/formats/markdown_nlg.py @@ -28,13 +28,10 @@ class NLGMarkdownReader(MarkdownReader): def __init__(self, ignore_deprecation_warning: bool = False) -> None: """Creates reader. See parent class docstring for more information.""" self.responses = {} - super(NLGMarkdownReader, self).__init__( - ignore_deprecation_warning=ignore_deprecation_warning - ) + super().__init__(ignore_deprecation_warning=ignore_deprecation_warning) def reads(self, s: Text, **kwargs: Any) -> "TrainingData": """Read markdown string and create TrainingData object""" - self.__init__() lines = s.splitlines() self.responses = self.process_lines(lines) return TrainingData(responses=self.responses) diff --git a/tests/nlu/training_data/converters/test_nlg_markdown_to_yaml_converter.py b/tests/nlu/training_data/converters/test_nlg_markdown_to_yaml_converter.py index 781109302301..f7b8b15cb1b7 100644 --- a/tests/nlu/training_data/converters/test_nlg_markdown_to_yaml_converter.py +++ b/tests/nlu/training_data/converters/test_nlg_markdown_to_yaml_converter.py @@ -44,9 +44,12 @@ async def test_nlu_intents_are_converted(tmp_path: Path): training_data_file.write_text(simple_nlg_md) - await NLGMarkdownToYamlConverter().convert_and_write( - training_data_file, converted_data_folder - ) + with pytest.warns(None) as warnings: + await NLGMarkdownToYamlConverter().convert_and_write( + training_data_file, converted_data_folder + ) + + assert not warnings assert len(os.listdir(converted_data_folder)) == 1 From c1edad83c8fa3d939cff9a7d86a6778181f2d29c Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 11:26:43 +0100 Subject: [PATCH 08/12] simplify --- .../test_nlu_markdown_to_yaml_converter.py | 86 +++++++++---------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/tests/nlu/training_data/converters/test_nlu_markdown_to_yaml_converter.py b/tests/nlu/training_data/converters/test_nlu_markdown_to_yaml_converter.py index ac29d60ce6bc..e2858d5c62a6 100644 --- a/tests/nlu/training_data/converters/test_nlu_markdown_to_yaml_converter.py +++ b/tests/nlu/training_data/converters/test_nlu_markdown_to_yaml_converter.py @@ -22,12 +22,12 @@ def test_converter_filters_correct_files(training_data_file: Text, should_filter assert should_filter == NLUMarkdownToYamlConverter.filter(Path(training_data_file)) -async def test_nlu_intents_are_converted(tmpdir: Path): - converted_data_folder = tmpdir / "converted_data" - os.mkdir(converted_data_folder) +async def test_nlu_intents_are_converted(tmp_path: Path): + converted_data_folder = tmp_path / "converted_data" + converted_data_folder.mkdir() - training_data_folder = tmpdir / "data/nlu" - os.makedirs(training_data_folder, exist_ok=True) + training_data_folder = tmp_path / "data" / "nlu" + training_data_folder.mkdir(parents=True) training_data_file = Path(training_data_folder / "nlu.md") simple_nlu_md = """ @@ -36,8 +36,7 @@ async def test_nlu_intents_are_converted(tmpdir: Path): - hello """ - with open(training_data_file, "w") as f: - f.write(simple_nlu_md) + training_data_file.write_text(simple_nlu_md) await NLUMarkdownToYamlConverter().convert_and_write( training_data_file, converted_data_folder @@ -45,42 +44,41 @@ async def test_nlu_intents_are_converted(tmpdir: Path): assert len(os.listdir(converted_data_folder)) == 1 - with open(f"{converted_data_folder}/nlu_converted.yml", "r") as f: - content = f.read() - assert content == ( - f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' - "nlu:\n" - "- intent: greet\n" - " examples: |\n" - " - hey\n" - " - hello\n" - ) + converted_file = converted_data_folder / "nlu_converted.yml" + content = converted_file.read_text() + assert content == ( + f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' + "nlu:\n" + "- intent: greet\n" + " examples: |\n" + " - hey\n" + " - hello\n" + ) -async def test_nlu_lookup_tables_are_converted(tmpdir: Path): - converted_data_folder = tmpdir / "converted_data" - os.mkdir(converted_data_folder) - training_data_folder = tmpdir / "data/nlu" - os.makedirs(training_data_folder, exist_ok=True) - training_data_file = Path(training_data_folder / "nlu.md") +async def test_nlu_lookup_tables_are_converted(tmp_path: Path): + converted_data_folder = tmp_path / "converted_data" + converted_data_folder.mkdir() - simple_nlu_md = f""" - ## lookup:products.txt - {tmpdir / "data/nlu/lookups/products.txt"} - """ - - with open(training_data_file, "w") as f: - f.write(simple_nlu_md) + training_data_folder = tmp_path / "data" / "nlu" + training_data_folder.mkdir(parents=True) + training_data_file = Path(training_data_folder / "nlu.md") lookup_data_folder = training_data_folder / "lookups" - os.makedirs(lookup_data_folder, exist_ok=True) + lookup_data_folder.mkdir() lookup_tables_file = lookup_data_folder / "products.txt" simple_lookup_table_txt = "core\n nlu\n x\n" - with open(lookup_tables_file, "w") as f: - f.write(simple_lookup_table_txt) + lookup_tables_file.write_text(simple_lookup_table_txt) + + simple_nlu_md = f""" + ## lookup:products.txt + {lookup_tables_file} + """ + + training_data_file.write_text(simple_nlu_md) await NLUMarkdownToYamlConverter().convert_and_write( training_data_file, converted_data_folder @@ -88,14 +86,14 @@ async def test_nlu_lookup_tables_are_converted(tmpdir: Path): assert len(os.listdir(converted_data_folder)) == 1 - with open(f"{converted_data_folder}/products_converted.yml", "r") as f: - content = f.read() - assert content == ( - f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' - "nlu:\n" - "- lookup: products\n" - " examples: |\n" - " - core\n" - " - nlu\n" - " - x\n" - ) + converted_file = converted_data_folder / "products_converted.yml" + content = converted_file.read_text() + assert content == ( + f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' + "nlu:\n" + "- lookup: products\n" + " examples: |\n" + " - core\n" + " - nlu\n" + " - x\n" + ) From 9e8a0a4863ac98bc54df33946002e7ab1be9b652 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 11:27:37 +0100 Subject: [PATCH 09/12] skip warnings for NLU conversion --- .../converters/nlu_markdown_to_yaml_converter.py | 4 +++- .../test_nlu_markdown_to_yaml_converter.py | 16 ++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/rasa/nlu/training_data/converters/nlu_markdown_to_yaml_converter.py b/rasa/nlu/training_data/converters/nlu_markdown_to_yaml_converter.py index 0009759f3b3e..d53affdef0f8 100644 --- a/rasa/nlu/training_data/converters/nlu_markdown_to_yaml_converter.py +++ b/rasa/nlu/training_data/converters/nlu_markdown_to_yaml_converter.py @@ -35,7 +35,9 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None: source_path, output_path ) - yaml_training_data = MarkdownReader().read(source_path) + yaml_training_data = MarkdownReader(ignore_deprecation_warning=True).read( + source_path + ) RasaYAMLWriter().dump(output_nlu_path, yaml_training_data) for lookup_table in yaml_training_data.lookup_tables: diff --git a/tests/nlu/training_data/converters/test_nlu_markdown_to_yaml_converter.py b/tests/nlu/training_data/converters/test_nlu_markdown_to_yaml_converter.py index e2858d5c62a6..68f384284586 100644 --- a/tests/nlu/training_data/converters/test_nlu_markdown_to_yaml_converter.py +++ b/tests/nlu/training_data/converters/test_nlu_markdown_to_yaml_converter.py @@ -38,9 +38,11 @@ async def test_nlu_intents_are_converted(tmp_path: Path): training_data_file.write_text(simple_nlu_md) - await NLUMarkdownToYamlConverter().convert_and_write( - training_data_file, converted_data_folder - ) + with pytest.warns(None) as warnings: + await NLUMarkdownToYamlConverter().convert_and_write( + training_data_file, converted_data_folder + ) + assert not warnings assert len(os.listdir(converted_data_folder)) == 1 @@ -80,9 +82,11 @@ async def test_nlu_lookup_tables_are_converted(tmp_path: Path): training_data_file.write_text(simple_nlu_md) - await NLUMarkdownToYamlConverter().convert_and_write( - training_data_file, converted_data_folder - ) + with pytest.warns(None) as warnings: + await NLUMarkdownToYamlConverter().convert_and_write( + training_data_file, converted_data_folder + ) + assert not warnings assert len(os.listdir(converted_data_folder)) == 1 From f6925b96bf0059e2ffe48fa6b31d1975c1c2dc00 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 11:37:17 +0100 Subject: [PATCH 10/12] add changelog and migration guide --- changelog/7495.removal.md | 15 +++++++++++++++ docs/docs/migration-guide.mdx | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 changelog/7495.removal.md diff --git a/changelog/7495.removal.md b/changelog/7495.removal.md new file mode 100644 index 000000000000..24e8f46baa92 --- /dev/null +++ b/changelog/7495.removal.md @@ -0,0 +1,15 @@ +Deprecate training and test data in Markdown format. This includes: +- reading and writing of story files in Markdown format +- reading and writing of NLU data in Markdown format +- reading and writing of retrieval intent data in Markdown format + +Support for Markdown data will be removed entirely in Rasa Open Source 3.0.0. + +Please convert your existing Markdown data by using the commands +from the [migration guide](./migration-guide.mdx#rasa-21-to-rasa-22): + +```bash +rasa data convert nlu -f yaml --data={SOURCE_DIR} --out={TARGET_DIR} +rasa data convert nlg -f yaml --data={SOURCE_DIR} --out={TARGET_DIR} +rasa data convert core -f yaml --data={SOURCE_DIR} --out={TARGET_DIR} +``` diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index b851feb34772..d209dd2979d5 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -10,6 +10,20 @@ description: | This page contains information about changes between major versions and how you can migrate from one version to another. +## Rasa 2.1 to Rasa 2.2 + +### Deprecations + +Training and test data in Markdown format is now deprecated. This includes: +- reading and writing of story files in Markdown format +- reading and writing of NLU data in Markdown format +- reading and writing of retrieval intent data in Markdown format + +Support for Markdown data will be removed entirely in Rasa Open Source 3.0.0. + +Please convert your existing Markdown data by using the commands +described [here](./migration-guide.mdx#training-data-files). + ## Rasa 2.0 to Rasa 2.1 ### Deprecations @@ -52,7 +66,7 @@ in the [forum](https://forum.rasa.com/t/rasa-open-source-2-0-is-out-now-internal ### Training data files As of version 2.0, the new default training data format is yaml. Markdown is still supported, -but this will be deprecated in a future release. +but this will be deprecated in Rasa Open Source 3.0.0. You can convert existing NLU, Stories, and NLG (i.e. `responses.md`) training data files in the Markdown format to the new YAML format using following commands: From 517426b2959d6c028d0dc782e56e5700ed422b4e Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 11:50:11 +0100 Subject: [PATCH 11/12] improve phrasing --- rasa/shared/constants.py | 2 +- .../story_reader/markdown_story_reader.py | 6 +++--- .../story_writer/markdown_story_writer.py | 6 +++--- rasa/shared/nlu/training_data/formats/markdown.py | 12 ++++++------ 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py index a17682a45382..c19c2da7f87e 100644 --- a/rasa/shared/constants.py +++ b/rasa/shared/constants.py @@ -20,7 +20,7 @@ DOCS_URL_COMPONENTS = DOCS_BASE_URL + "/components" DOCS_URL_MIGRATION_GUIDE = DOCS_BASE_URL + "/migration-guide" DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION = ( - f"{DOCS_URL_MIGRATION_GUIDE}#training-data-files" + f"{DOCS_URL_MIGRATION_GUIDE}#rasa-21-to-rasa-22" ) DOCS_URL_TELEMETRY = DOCS_BASE_URL + "/telemetry/telemetry" DOCS_BASE_URL_RASA_X = "https://rasa.com/docs/rasa-x" diff --git a/rasa/shared/core/training_data/story_reader/markdown_story_reader.py b/rasa/shared/core/training_data/story_reader/markdown_story_reader.py index d8a4b933b58d..b8bbf09ee8c5 100644 --- a/rasa/shared/core/training_data/story_reader/markdown_story_reader.py +++ b/rasa/shared/core/training_data/story_reader/markdown_story_reader.py @@ -48,9 +48,9 @@ def __init__( if not ignore_deprecation_warning: rasa.shared.utils.io.raise_deprecation_warning( - "Markdown Training data is deprecated and will be removed in Rasa Open " - "Source 3.0.0. Please convert your current training data to the " - "YAML training data format.", + "Stories in Markdown format are deprecated and will be removed in Rasa " + "Open Source 3.0.0. Please convert your Markdown stories to the " + "new YAML format.", docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, ) diff --git a/rasa/shared/core/training_data/story_writer/markdown_story_writer.py b/rasa/shared/core/training_data/story_writer/markdown_story_writer.py index d7f2e27b8a16..72596f900344 100644 --- a/rasa/shared/core/training_data/story_writer/markdown_story_writer.py +++ b/rasa/shared/core/training_data/story_writer/markdown_story_writer.py @@ -56,9 +56,9 @@ def dumps( """ if not ignore_deprecation_warning: rasa.shared.utils.io.raise_deprecation_warning( - "Markdown Training data is deprecated and will be removed in Rasa Open " - "Source 3.0.0. Please convert your current training data to the " - "YAML training data format.", + "Stories in Markdown format are deprecated and will be removed in Rasa " + "Open Source 3.0.0. Please convert your Markdown stories to the " + "new YAML format.", docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, ) return MarkdownStoryWriter._stories_to_md( diff --git a/rasa/shared/nlu/training_data/formats/markdown.py b/rasa/shared/nlu/training_data/formats/markdown.py index 21264341c3af..ba837e8ce499 100644 --- a/rasa/shared/nlu/training_data/formats/markdown.py +++ b/rasa/shared/nlu/training_data/formats/markdown.py @@ -53,9 +53,9 @@ def __init__(self, ignore_deprecation_warning: bool = False,) -> None: if not ignore_deprecation_warning: rasa.shared.utils.io.raise_deprecation_warning( - "Markdown Training data is deprecated and will be removed in Rasa Open " - "Source 3.0.0. Please convert your current training data to the " - "YAML training data format.", + "NLU data in Markdown format is deprecated and will be removed in Rasa " + "Open Source 3.0.0. Please convert your Markdown NLU data to the " + "new YAML training data format.", docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, ) @@ -194,9 +194,9 @@ class MarkdownWriter(TrainingDataWriter): def __init__(self, ignore_deprecation_warning: bool = False,) -> None: if not ignore_deprecation_warning: rasa.shared.utils.io.raise_deprecation_warning( - "Markdown Training data is deprecated and will be removed in Rasa Open " - "Source 3.0.0. Please convert your current training data to the " - "YAML training data format.", + "NLU data in Markdown format is deprecated and will be removed in Rasa " + "Open Source 3.0.0. Please convert your Markdown NLU data to the " + "new YAML training data format.", docs=DOCS_URL_MIGRATION_GUIDE_MD_DEPRECATION, ) From ea963bdb867db4bc6898fc6d2f3dcf370bb0a68b Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 9 Dec 2020 11:52:45 +0100 Subject: [PATCH 12/12] fix docstrings --- .../story_reader/markdown_story_reader.py | 3 +-- rasa/shared/nlu/training_data/formats/markdown.py | 11 +++++++++-- rasa/shared/nlu/training_data/formats/markdown_nlg.py | 5 +++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/rasa/shared/core/training_data/story_reader/markdown_story_reader.py b/rasa/shared/core/training_data/story_reader/markdown_story_reader.py index b8bbf09ee8c5..b3bac66b3b3a 100644 --- a/rasa/shared/core/training_data/story_reader/markdown_story_reader.py +++ b/rasa/shared/core/training_data/story_reader/markdown_story_reader.py @@ -30,7 +30,7 @@ class MarkdownStoryReader(StoryReader): - """Class that reads the core training data in a Markdown format""" + """Class that reads the core training data in a Markdown format.""" def __init__( self, @@ -56,7 +56,6 @@ def __init__( def read_from_file(self, filename: Union[Text, Path]) -> List[StoryStep]: """Given a md file reads the contained stories.""" - try: with open( filename, "r", encoding=rasa.shared.utils.io.DEFAULT_ENCODING diff --git a/rasa/shared/nlu/training_data/formats/markdown.py b/rasa/shared/nlu/training_data/formats/markdown.py index ba837e8ce499..195202586cf7 100644 --- a/rasa/shared/nlu/training_data/formats/markdown.py +++ b/rasa/shared/nlu/training_data/formats/markdown.py @@ -60,7 +60,7 @@ def __init__(self, ignore_deprecation_warning: bool = False,) -> None: ) def reads(self, s: Text, **kwargs: Any) -> "TrainingData": - """Read markdown string and create TrainingData object""" + """Read markdown string and create TrainingData object.""" s = self._strip_comments(s) for line in s.splitlines(): line = decode_string(line.strip()) @@ -191,7 +191,15 @@ def is_markdown_nlu_file(filename: Union[Text, Path]) -> bool: class MarkdownWriter(TrainingDataWriter): + """Converts NLU data to Markdown.""" + def __init__(self, ignore_deprecation_warning: bool = False,) -> None: + """Creates writer. + + Args: + ignore_deprecation_warning: `True` if deprecation warning for Markdown + format should be suppressed. + """ if not ignore_deprecation_warning: rasa.shared.utils.io.raise_deprecation_warning( "NLU data in Markdown format is deprecated and will be removed in Rasa " @@ -202,7 +210,6 @@ def __init__(self, ignore_deprecation_warning: bool = False,) -> None: def dumps(self, training_data: "TrainingData") -> Text: """Transforms a TrainingData object into a markdown string.""" - md = "" md += self._generate_training_examples_md(training_data) md += self._generate_synonyms_md(training_data) diff --git a/rasa/shared/nlu/training_data/formats/markdown_nlg.py b/rasa/shared/nlu/training_data/formats/markdown_nlg.py index 22dccff66ddf..c75cd5c0f330 100644 --- a/rasa/shared/nlu/training_data/formats/markdown_nlg.py +++ b/rasa/shared/nlu/training_data/formats/markdown_nlg.py @@ -31,7 +31,7 @@ def __init__(self, ignore_deprecation_warning: bool = False) -> None: super().__init__(ignore_deprecation_warning=ignore_deprecation_warning) def reads(self, s: Text, **kwargs: Any) -> "TrainingData": - """Read markdown string and create TrainingData object""" + """Read markdown string and create TrainingData object.""" lines = s.splitlines() self.responses = self.process_lines(lines) return TrainingData(responses=self.responses) @@ -103,9 +103,10 @@ def is_markdown_nlg_file(filename: Union[Text, Path]) -> bool: class NLGMarkdownWriter(MarkdownWriter): + """Converts retrieval intent data to Markdown.""" + def dumps(self, training_data: "TrainingData") -> Text: """Transforms the NlG part of TrainingData object into a markdown string.""" - md = "" for intent, utterances in training_data.responses.items(): md += "## \n"