diff --git a/changelog/7893.improvement.md b/changelog/7893.improvement.md new file mode 100644 index 000000000000..70ce787d1315 --- /dev/null +++ b/changelog/7893.improvement.md @@ -0,0 +1 @@ +Model configuration files are now validated whether they match the expected schema. diff --git a/rasa/cli/data.py b/rasa/cli/data.py index 53b7edbdabc4..1b8672e95bfd 100644 --- a/rasa/cli/data.py +++ b/rasa/cli/data.py @@ -485,7 +485,7 @@ def _migrate_model_config(args: argparse.Namespace) -> None: def _get_configuration(path: Path) -> Dict: config = {} try: - config = rasa.shared.utils.io.read_config_file(path) + config = rasa.shared.utils.io.read_model_configuration(path) except Exception: rasa.shared.utils.cli.print_error_and_exit( f"'{path}' is not a path to a valid model configuration. " diff --git a/rasa/cli/default_config.yml b/rasa/cli/default_config.yml deleted file mode 100644 index 93b81250a29c..000000000000 --- a/rasa/cli/default_config.yml +++ /dev/null @@ -1,15 +0,0 @@ -language: en - -pipeline: supervised_embeddings - -policies: - - name: TEDPolicy - epochs: 200 - batch_size: 50 - max_training_samples: 300 - - name: FallbackPolicy - fallback_action_name: 'action_default_fallback' - - name: MemoizationPolicy - max_history: 5 - - name: FormPolicy - - name: MappingPolicy diff --git a/rasa/core/config.py b/rasa/core/config.py index 5ac6859136d5..9aa68f5ca194 100644 --- a/rasa/core/config.py +++ b/rasa/core/config.py @@ -49,7 +49,7 @@ def load(config_file: Optional[Union[Text, Dict]]) -> List["Policy"]: config_data = {} if isinstance(config_file, str) and os.path.isfile(config_file): - config_data = rasa.shared.utils.io.read_config_file(config_file) + config_data = rasa.shared.utils.io.read_model_configuration(config_file) elif isinstance(config_file, Dict): config_data = config_file diff --git a/rasa/nlu/config.py b/rasa/nlu/config.py index ba4653de101d..ccf55052a433 100644 --- a/rasa/nlu/config.py +++ b/rasa/nlu/config.py @@ -42,7 +42,7 @@ def load( config = DEFAULT_CONFIG_PATH if config is not None: - file_config = rasa.shared.utils.io.read_config_file(config) + file_config = rasa.shared.utils.io.read_model_configuration(config) return _load_from_dict(file_config, **kwargs) diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py index 33e0a7e19e47..9fa50c4dcb66 100644 --- a/rasa/shared/constants.py +++ b/rasa/shared/constants.py @@ -31,12 +31,13 @@ PACKAGE_NAME = "rasa" NEXT_MAJOR_VERSION_FOR_DEPRECATIONS = "3.0.0" -CONFIG_SCHEMA_FILE = "shared/nlu/training_data/schemas/config.yml" +MODEL_CONFIG_SCHEMA_FILE = "shared/utils/schemas/model_config.yml" +CONFIG_SCHEMA_FILE = "shared/utils/schemas/config.yml" RESPONSES_SCHEMA_FILE = "shared/nlu/training_data/schemas/responses.yml" SCHEMA_EXTENSIONS_FILE = "shared/utils/pykwalify_extensions.py" LATEST_TRAINING_DATA_FORMAT_VERSION = "2.0" -DOMAIN_SCHEMA_FILE = "utils/schemas/domain.yml" +DOMAIN_SCHEMA_FILE = "shared/utils/schemas/domain.yml" DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES = 60 DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION = True diff --git a/rasa/shared/core/training_data/story_reader/yaml_story_reader.py b/rasa/shared/core/training_data/story_reader/yaml_story_reader.py index e1cb446e8971..000896146f73 100644 --- a/rasa/shared/core/training_data/story_reader/yaml_story_reader.py +++ b/rasa/shared/core/training_data/story_reader/yaml_story_reader.py @@ -56,7 +56,7 @@ KEY_RULE_FOR_CONVERSATION_START = "conversation_start" -CORE_SCHEMA_FILE = "utils/schemas/stories.yml" +CORE_SCHEMA_FILE = "shared/utils/schemas/stories.yml" DEFAULT_VALUE_TEXT_SLOTS = "filled" DEFAULT_VALUE_LIST_SLOTS = [DEFAULT_VALUE_TEXT_SLOTS] diff --git a/rasa/shared/importers/multi_project.py b/rasa/shared/importers/multi_project.py index aa5804e0efc4..59e73c595866 100644 --- a/rasa/shared/importers/multi_project.py +++ b/rasa/shared/importers/multi_project.py @@ -23,7 +23,7 @@ def __init__( training_data_paths: Optional[Union[List[Text], Text]] = None, project_directory: Optional[Text] = None, ): - self.config = rasa.shared.utils.io.read_config_file(config_file) + self.config = rasa.shared.utils.io.read_model_configuration(config_file) if domain_path: self._domain_paths = [domain_path] else: diff --git a/rasa/shared/nlu/training_data/schemas/config.yml b/rasa/shared/nlu/training_data/schemas/config.yml deleted file mode 100644 index 1cf59f653aaf..000000000000 --- a/rasa/shared/nlu/training_data/schemas/config.yml +++ /dev/null @@ -1,8 +0,0 @@ -allowempty: True -mapping: - language: - type: "str" - required: True - pipeline: - type: "any" - required: True diff --git a/rasa/shared/nlu/training_data/schemas/nlu.yml b/rasa/shared/nlu/training_data/schemas/nlu.yml index a15930b3d4ce..216225109f15 100644 --- a/rasa/shared/nlu/training_data/schemas/nlu.yml +++ b/rasa/shared/nlu/training_data/schemas/nlu.yml @@ -47,7 +47,7 @@ mapping: type: "str" examples: *examples_anchor responses: - # see rasa/utils/schemas.yml + # see rasa/shared/nlu/training_data/schemas/responses.yml include: responses regex;(.*): type: "any" diff --git a/rasa/shared/utils/io.py b/rasa/shared/utils/io.py index 008b5dc068a1..281bdae4f4d0 100644 --- a/rasa/shared/utils/io.py +++ b/rasa/shared/utils/io.py @@ -18,12 +18,15 @@ DEFAULT_LOG_LEVEL, ENV_LOG_LEVEL, NEXT_MAJOR_VERSION_FOR_DEPRECATIONS, + CONFIG_SCHEMA_FILE, + MODEL_CONFIG_SCHEMA_FILE, ) from rasa.shared.exceptions import ( FileIOException, FileNotFoundException, YamlSyntaxException, ) +import rasa.shared.utils.validation DEFAULT_ENCODING = "utf-8" YAML_VERSION = (1, 2) @@ -518,29 +521,68 @@ def raise_deprecation_warning( raise_warning(message, FutureWarning, docs, **kwargs) +def read_validated_yaml(filename: Union[Text, Path], schema: Text) -> Any: + """Validates YAML file content and returns parsed content. + + Args: + filename: The path to the file which should be read. + schema: The path to the schema file which should be used for validating the + file content. + + Returns: + The parsed file content. + + Raises: + YamlValidationException: In case the model configuration doesn't match the + expected schema. + """ + content = read_file(filename) + + rasa.shared.utils.validation.validate_yaml_schema(content, schema) + return read_yaml(content) + + def read_config_file(filename: Union[Path, Text]) -> Dict[Text, Any]: - """Parses a yaml configuration file. Content needs to be a dictionary + """Parses a yaml configuration file. Content needs to be a dictionary. Args: filename: The path to the file which should be read. + + Raises: + YamlValidationException: In case file content is not a `Dict`. + + Returns: + Parsed config file. """ - content = read_yaml_file(filename) + return read_validated_yaml(filename, CONFIG_SCHEMA_FILE) - if content is None: - return {} - elif isinstance(content, dict): - return content - else: - raise YamlSyntaxException( - filename, - ValueError( - f"Tried to load configuration file '{filename}'. " - f"Expected a key value mapping but found a {type(content).__name__}" - ), - ) + +def read_model_configuration(filename: Union[Path, Text]) -> Dict[Text, Any]: + """Parses a model configuration file. + + Args: + filename: The path to the file which should be read. + + Raises: + YamlValidationException: In case the model configuration doesn't match the + expected schema. + + Returns: + Parsed config file. + """ + return read_validated_yaml(filename, MODEL_CONFIG_SCHEMA_FILE) def is_subdirectory(path: Text, potential_parent_directory: Text) -> bool: + """Checks if `path` is a subdirectory of `potential_parent_directory`. + + Args: + path: Path to a file or directory. + potential_parent_directory: Potential parent directory. + + Returns: + `True` if `path` is a subdirectory of `potential_parent_directory`. + """ if path is None or potential_parent_directory is None: return False diff --git a/rasa/shared/utils/schemas/config.yml b/rasa/shared/utils/schemas/config.yml new file mode 100644 index 000000000000..6dea8c76af07 --- /dev/null +++ b/rasa/shared/utils/schemas/config.yml @@ -0,0 +1,2 @@ +allowempty: True +type: map diff --git a/rasa/utils/schemas/domain.yml b/rasa/shared/utils/schemas/domain.yml similarity index 100% rename from rasa/utils/schemas/domain.yml rename to rasa/shared/utils/schemas/domain.yml diff --git a/rasa/shared/utils/schemas/model_config.yml b/rasa/shared/utils/schemas/model_config.yml new file mode 100644 index 000000000000..33343250fabd --- /dev/null +++ b/rasa/shared/utils/schemas/model_config.yml @@ -0,0 +1,33 @@ +allowempty: True +mapping: + version: + type: "str" + required: False + allowempty: False + language: + type: "str" + required: False + pipeline: + type: "seq" + required: False + sequence: + - type: "map" + # Only validate required items but do not validate each potential config param + # for the the components + allowempty: True + mapping: + name: + type: str + required: True + policies: + type: "seq" + required: False + sequence: + - type: "map" + # Only validate required items but do not validate each potential config param + # for the the policies + allowempty: True + mapping: + name: + type: str + required: True diff --git a/rasa/utils/schemas/stories.yml b/rasa/shared/utils/schemas/stories.yml similarity index 100% rename from rasa/utils/schemas/stories.yml rename to rasa/shared/utils/schemas/stories.yml diff --git a/tests/cli/test_rasa_train.py b/tests/cli/test_rasa_train.py index bee77821c7a2..114439370fac 100644 --- a/tests/cli/test_rasa_train.py +++ b/tests/cli/test_rasa_train.py @@ -104,20 +104,12 @@ def test_train_core_compare(run_in_simple_project: Callable[..., RunResult]): temp_dir = os.getcwd() rasa.shared.utils.io.write_yaml( - { - "language": "en", - "pipeline": "supervised_embeddings", - "policies": [{"name": "MemoizationPolicy"}], - }, + {"language": "en", "policies": [{"name": "MemoizationPolicy"}],}, "config_1.yml", ) rasa.shared.utils.io.write_yaml( - { - "language": "en", - "pipeline": "supervised_embeddings", - "policies": [{"name": "MemoizationPolicy"}], - }, + {"language": "en", "policies": [{"name": "MemoizationPolicy"}],}, "config_2.yml", ) diff --git a/tests/conftest.py b/tests/conftest.py index b17c782f9973..58a055771f51 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,7 +47,7 @@ ) from rasa.shared.exceptions import RasaException -DEFAULT_CONFIG_PATH = "rasa/cli/default_config.yml" +DEFAULT_CONFIG_PATH = "rasa/shared/importers/default_config.yml" DEFAULT_NLU_DATA = "examples/moodbot/data/nlu.yml" @@ -287,11 +287,7 @@ async def trained_core_model( @pytest.fixture(scope="session") async def trained_nlu_model( - trained_async: Callable, - default_domain_path: Text, - default_config: List[Policy], - default_nlu_data: Text, - default_stories_file: Text, + trained_async: Callable, default_domain_path: Text, default_nlu_data: Text, ) -> Text: trained_nlu_model_path = await trained_async( domain=default_domain_path, diff --git a/tests/shared/utils/test_io.py b/tests/shared/utils/test_io.py index ba02d8a73eff..a0e49aba0f64 100644 --- a/tests/shared/utils/test_io.py +++ b/tests/shared/utils/test_io.py @@ -1,17 +1,18 @@ import os import string +import textwrap import uuid from collections import OrderedDict from pathlib import Path -from typing import Callable, Text, List, Set, Any +from typing import Callable, Text, List, Set, Any, Dict import pytest import rasa.shared from rasa.shared.exceptions import FileIOException, FileNotFoundException import rasa.shared.utils.io +import rasa.shared.utils.validation from rasa.shared.constants import NEXT_MAJOR_VERSION_FOR_DEPRECATIONS -from rasa.shared.utils.io import raise_deprecation_warning from rasa.utils import io as io_utils os.environ["USER_NAME"] = "user" @@ -345,7 +346,7 @@ def test_create_directory_if_already_exists(tmp_path: Path): def test_raise_deprecation_warning(): with pytest.warns(FutureWarning) as record: - raise_deprecation_warning( + rasa.shared.utils.io.raise_deprecation_warning( "This feature is deprecated.", warn_until_version="3.0.0" ) @@ -358,7 +359,7 @@ def test_raise_deprecation_warning(): def test_raise_deprecation_warning_version_already_in_message(): with pytest.warns(FutureWarning) as record: - raise_deprecation_warning( + rasa.shared.utils.io.raise_deprecation_warning( "This feature is deprecated and will be removed in 3.0.0!", warn_until_version="3.0.0", ) @@ -372,7 +373,7 @@ def test_raise_deprecation_warning_version_already_in_message(): def test_raise_deprecation_warning_default(): with pytest.warns(FutureWarning) as record: - raise_deprecation_warning("This feature is deprecated.") + rasa.shared.utils.io.raise_deprecation_warning("This feature is deprecated.") assert len(record) == 1 assert record[0].message.args[0] == ( @@ -386,3 +387,126 @@ def test_read_file_with_wrong_encoding(tmp_path: Path): file.write_text("รค", encoding="latin-1") with pytest.raises(FileIOException): rasa.shared.utils.io.read_file(file) + + +@pytest.mark.parametrize("config_file", Path("data", "configs_for_docs").glob("*.yml")) +def test_validate_config_file(config_file: Path): + # does not raise + rasa.shared.utils.io.read_model_configuration(config_file) + + +def test_validate_config_file_with_extra_keys(tmp_path: Path): + content = textwrap.dedent( + """ + language: en + pipeline: + policies: + + importers: + - RasaFileImporter + """ + ) + config_file = tmp_path / "config.yml" + config_file.write_text(content) + + rasa.shared.utils.io.read_model_configuration(config_file) + + +@pytest.mark.parametrize( + "config", + [ + # Pre 2.x pipeline templates are invalid + textwrap.dedent( + """ + pipeline: supervised_embeddings + """ + ), + # Each list item needs the `name` property + textwrap.dedent( + """ + pipeline: + - DIETClassier + policies: + """ + ), + # Name property is missing + textwrap.dedent( + """ + pipeline: + policies: + - some_attribute: "lala" + """ + ), + # Name property is not a string + textwrap.dedent( + """ + pipeline: + policies: + - name: 1234 + """ + ), + # Invalid training data version + textwrap.dedent( + """ + version: 2.0 + policies: + pipeline: + """ + ), + # Language has wrong type + textwrap.dedent( + """ + language: [] + policies: + pipeline: + """ + ), + ], +) +def test_invalid_config_files(config: Text, tmp_path: Path): + config_file = tmp_path / "config.yml" + config_file.write_text(config) + with pytest.raises(rasa.shared.utils.validation.YamlValidationException): + rasa.shared.utils.io.read_model_configuration(config_file) + + +@pytest.mark.parametrize( + "content, expected", + [ + ("rest:", {"rest": None}), + ( + textwrap.dedent( + """ + tracker_store: + password: test + """ + ), + {"tracker_store": {"password": "test"}}, + ), + ], +) +def test_read_config_file(tmp_path: Path, content: Text, expected: Dict): + config_file = tmp_path / "file.yml" + config_file.write_text(content) + + assert rasa.shared.utils.io.read_config_file(config_file) == expected + + +@pytest.mark.parametrize( + "content", + [ + "text", + textwrap.dedent( + """ + - item1 + - item2 + """ + ), + ], +) +def test_read_invalid_config_file(tmp_path: Path, content: Text): + config_file = tmp_path / "file.yml" + config_file.write_text(content) + + with pytest.raises(rasa.shared.utils.validation.YamlValidationException): + rasa.shared.utils.io.read_model_configuration(config_file) diff --git a/tests/shared/utils/test_validation.py b/tests/shared/utils/test_validation.py index 89cf95996af7..b679ee74c29e 100644 --- a/tests/shared/utils/test_validation.py +++ b/tests/shared/utils/test_validation.py @@ -1,3 +1,5 @@ +from typing import Text + import pytest from pep440_version_utils import Version @@ -36,10 +38,9 @@ def test_validate_yaml_schema(file, schema): ("data/test_domains/wrong_response_format.yml", DOMAIN_SCHEMA_FILE), ("data/test_domains/wrong_custom_response_format.yml", DOMAIN_SCHEMA_FILE), ("data/test_domains/empty_response_format.yml", DOMAIN_SCHEMA_FILE), - ("data/test_config/example_config.yaml", CONFIG_SCHEMA_FILE), ], ) -def test_validate_yaml_schema_raise_exception(file, schema): +def test_validate_yaml_schema_raise_exception(file: Text, schema: Text): with pytest.raises(YamlException): validation_utils.validate_yaml_schema( rasa.shared.utils.io.read_file(file), schema