From 0b7b3d7f5dba3da3fc0db683adb2b5c4731621c9 Mon Sep 17 00:00:00 2001 From: Alexander Khizov Date: Tue, 7 Jul 2020 15:22:17 +0200 Subject: [PATCH] Domain file changes for the 2.0 format - Add possibility to split the domain into separate files --- .github/scripts/mr_generate_summary.py | 3 +- changelog/6132.feature.rst | 5 + rasa/cli/arguments/default_arguments.py | 4 +- rasa/constants.py | 2 +- rasa/core/domain.py | 123 +++++++++++++++--------- rasa/data.py | 15 --- rasa/importers/multi_project.py | 2 +- tests/core/test_domain.py | 58 ++++++----- 8 files changed, 119 insertions(+), 93 deletions(-) create mode 100644 changelog/6132.feature.rst diff --git a/.github/scripts/mr_generate_summary.py b/.github/scripts/mr_generate_summary.py index c8d1bbc72c2d..ebfad6928ad9 100644 --- a/.github/scripts/mr_generate_summary.py +++ b/.github/scripts/mr_generate_summary.py @@ -32,6 +32,7 @@ def generate_json(file, task, data): return data + def read_results(file): with open(file) as json_file: data = json.load(json_file) @@ -53,7 +54,7 @@ def read_results(file): if f not in task_mapping.keys(): continue - data = generate_json(os.path.join(dirpath, f),task_mapping[f], data) + data = generate_json(os.path.join(dirpath, f), task_mapping[f], data) with open(SUMMARY_FILE, "w") as f: json.dump(data, f, sort_keys=True, indent=2) diff --git a/changelog/6132.feature.rst b/changelog/6132.feature.rst new file mode 100644 index 000000000000..b721782ca648 --- /dev/null +++ b/changelog/6132.feature.rst @@ -0,0 +1,5 @@ +Added possibility to split the domain into separate files. All YAML files under the path specified with ``--domain`` will be scanned for domain information (e.g. intents, actions, etc) and then combined into a single domain. + +The default value for ``--domain`` is still ``domain.yml``. + +Also, the default session expiration time is set to 60 minutes now. \ No newline at end of file diff --git a/rasa/cli/arguments/default_arguments.py b/rasa/cli/arguments/default_arguments.py index f4494f98146f..9431650c8b93 100644 --- a/rasa/cli/arguments/default_arguments.py +++ b/rasa/cli/arguments/default_arguments.py @@ -56,7 +56,9 @@ def add_domain_param( "--domain", type=str, default=DEFAULT_DOMAIN_PATH, - help="Domain specification (yml file).", + help="Domain specification. It can be a single 'yaml' file, or a directory " + "that contains several files with domain specification in it. The content " + "of these files will be read and merged together.", ) diff --git a/rasa/constants.py b/rasa/constants.py index c09aa9c741f4..9311afefdd5c 100644 --- a/rasa/constants.py +++ b/rasa/constants.py @@ -68,7 +68,7 @@ ENV_SANIC_WORKERS = "SANIC_WORKERS" ENV_SANIC_BACKLOG = "SANIC_BACKLOG" -DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES = 60 +DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES = 0 DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION = True ENV_GPU_CONFIG = "TF_GPU_MEMORY_ALLOC" diff --git a/rasa/core/domain.py b/rasa/core/domain.py index 40953e3c99a5..8f1728b99d65 100644 --- a/rasa/core/domain.py +++ b/rasa/core/domain.py @@ -7,6 +7,8 @@ from pathlib import Path from typing import Any, Dict, List, NamedTuple, Optional, Set, Text, Tuple, Union +from ruamel.yaml import YAMLError + import rasa.core.constants from rasa.utils.common import ( raise_warning, @@ -19,6 +21,7 @@ DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION, DOMAIN_SCHEMA_FILE, DOCS_URL_DOMAINS, + DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, ) from rasa.core import utils from rasa.core.actions import action # pytype: disable=pyi-error @@ -47,6 +50,23 @@ USE_ENTITIES_KEY = "use_entities" IGNORE_ENTITIES_KEY = "ignore_entities" +KEY_SLOTS = "slots" +KEY_INTENTS = "intents" +KEY_ENTITIES = "entities" +KEY_RESPONSES = "responses" +KEY_ACTIONS = "actions" +KEY_FORMS = "forms" + +ALL_DOMAIN_KEYS = [ + KEY_SLOTS, + KEY_FORMS, + KEY_ACTIONS, + KEY_ENTITIES, + KEY_INTENTS, + KEY_RESPONSES, +] + + if typing.TYPE_CHECKING: from rasa.core.trackers import DialogueStateTracker @@ -69,7 +89,10 @@ class SessionConfig(NamedTuple): @staticmethod def default() -> "SessionConfig": # TODO: 2.0, reconsider how to apply sessions to old projects - return SessionConfig(0, DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION) + return SessionConfig( + DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, + DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION, + ) def are_sessions_enabled(self) -> bool: return self.session_expiration_time > 0 @@ -134,7 +157,7 @@ def from_yaml(cls, yaml: Text) -> "Domain": @classmethod def from_dict(cls, data: Dict) -> "Domain": - utter_templates = cls.collect_templates(data.get("responses", {})) + utter_templates = cls.collect_templates(data.get(KEY_RESPONSES, {})) if "templates" in data: raise_warning( "Your domain file contains the key: 'templates'. This has been " @@ -146,54 +169,45 @@ def from_dict(cls, data: Dict) -> "Domain": ) utter_templates = cls.collect_templates(data.get("templates", {})) - slots = cls.collect_slots(data.get("slots", {})) + slots = cls.collect_slots(data.get(KEY_SLOTS, {})) additional_arguments = data.get("config", {}) session_config = cls._get_session_config(data.get(SESSION_CONFIG_KEY, {})) - intents = data.get("intents", {}) + intents = data.get(KEY_INTENTS, {}) return cls( intents, - data.get("entities", []), + data.get(KEY_ENTITIES, []), slots, utter_templates, - data.get("actions", []), - data.get("forms", []), + data.get(KEY_ACTIONS, []), + data.get(KEY_FORMS, []), session_config=session_config, **additional_arguments, ) @staticmethod def _get_session_config(session_config: Dict) -> SessionConfig: - session_expiration_time = session_config.get(SESSION_EXPIRATION_TIME_KEY) + session_expiration_time_min = session_config.get(SESSION_EXPIRATION_TIME_KEY) # TODO: 2.0 reconsider how to apply sessions to old projects and legacy trackers - if session_expiration_time is None: - raise_warning( - "No tracker session configuration was found in the loaded domain. " - "Domains without a session config will automatically receive a " - "session expiration time of 60 minutes in Rasa version 2.0 if not " - "configured otherwise.", - FutureWarning, - docs=DOCS_URL_DOMAINS + "#session-configuration", - ) - session_expiration_time = 0 + if session_expiration_time_min is None: + session_expiration_time_min = DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES carry_over_slots = session_config.get( CARRY_OVER_SLOTS_KEY, DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION ) - return SessionConfig(session_expiration_time, carry_over_slots) + return SessionConfig(session_expiration_time_min, carry_over_slots) @classmethod def from_directory(cls, path: Text) -> "Domain": """Loads and merges multiple domain files recursively from a directory tree.""" - from rasa import data domain = Domain.empty() for root, _, files in os.walk(path, followlinks=True): for file in files: full_path = os.path.join(root, file) - if data.is_domain_file(full_path): + if Domain.is_domain_file(full_path): other = Domain.from_file(full_path) domain = other.merge(domain) @@ -236,20 +250,20 @@ def merge_lists(l1: List[Any], l2: List[Any]) -> List[Any]: combined[SESSION_CONFIG_KEY] = domain_dict[SESSION_CONFIG_KEY] # intents is list of dicts - intents_1 = {list(i.keys())[0]: i for i in combined["intents"]} - intents_2 = {list(i.keys())[0]: i for i in domain_dict["intents"]} + intents_1 = {list(i.keys())[0]: i for i in combined[KEY_INTENTS]} + intents_2 = {list(i.keys())[0]: i for i in domain_dict[KEY_INTENTS]} merged_intents = merge_dicts(intents_1, intents_2, override) - combined["intents"] = list(merged_intents.values()) + combined[KEY_INTENTS] = list(merged_intents.values()) # remove existing forms from new actions - for form in combined["forms"]: - if form in domain_dict["actions"]: - domain_dict["actions"].remove(form) + for form in combined[KEY_FORMS]: + if form in domain_dict[KEY_ACTIONS]: + domain_dict[KEY_ACTIONS].remove(form) - for key in ["entities", "actions", "forms"]: + for key in [KEY_ENTITIES, KEY_ACTIONS, KEY_FORMS]: combined[key] = merge_lists(combined[key], domain_dict[key]) - for key in ["responses", "slots"]: + for key in [KEY_RESPONSES, KEY_SLOTS]: combined[key] = merge_dicts(combined[key], domain_dict[key], override) return self.__class__.from_dict(combined) @@ -431,8 +445,8 @@ def __init__( def __hash__(self) -> int: self_as_dict = self.as_dict() - self_as_dict["intents"] = sort_list_of_dicts_by_first_key( - self_as_dict["intents"] + self_as_dict[KEY_INTENTS] = sort_list_of_dicts_by_first_key( + self_as_dict[KEY_INTENTS] ) self_as_string = json.dumps(self_as_dict, sort_keys=True) text_hash = utils.get_text_hash(self_as_string) @@ -774,12 +788,12 @@ def as_dict(self) -> Dict[Text, Any]: SESSION_EXPIRATION_TIME_KEY: self.session_config.session_expiration_time, CARRY_OVER_SLOTS_KEY: self.session_config.carry_over_slots, }, - "intents": self._transform_intents_for_file(), - "entities": self.entities, - "slots": self._slot_definitions(), - "responses": self.templates, - "actions": self.user_actions, # class names of the actions - "forms": self.form_names, + KEY_INTENTS: self._transform_intents_for_file(), + KEY_ENTITIES: self.entities, + KEY_SLOTS: self._slot_definitions(), + KEY_RESPONSES: self.templates, + KEY_ACTIONS: self.user_actions, # class names of the actions + KEY_FORMS: self.form_names, } def persist(self, filename: Union[Text, Path]) -> None: @@ -827,16 +841,16 @@ def cleaned_domain(self) -> Dict[Text, Any]: """ domain_data = self.as_dict() - for idx, intent_info in enumerate(domain_data["intents"]): + for idx, intent_info in enumerate(domain_data[KEY_INTENTS]): for name, intent in intent_info.items(): if intent.get(USE_ENTITIES_KEY) is True: del intent[USE_ENTITIES_KEY] if not intent.get(IGNORE_ENTITIES_KEY): intent.pop(IGNORE_ENTITIES_KEY, None) if len(intent) == 0: - domain_data["intents"][idx] = name + domain_data[KEY_INTENTS][idx] = name - for slot in domain_data["slots"].values(): # pytype: disable=attribute-error + for slot in domain_data[KEY_SLOTS].values(): # pytype: disable=attribute-error if slot["initial_value"] is None: del slot["initial_value"] if slot["auto_fill"]: @@ -1040,9 +1054,9 @@ def get_duplicate_exception_message( raise InvalidDomain( get_exception_message( [ - (duplicate_actions, "actions"), - (duplicate_slots, "slots"), - (duplicate_entities, "entities"), + (duplicate_actions, KEY_ACTIONS), + (duplicate_slots, KEY_SLOTS), + (duplicate_entities, KEY_ENTITIES), ], incorrect_mappings, ) @@ -1074,6 +1088,29 @@ def is_empty(self) -> bool: return self.as_dict() == Domain.empty().as_dict() + @staticmethod + def is_domain_file(filename: Text) -> bool: + """Checks whether the given file path is a Rasa domain file. + + Args: + filename: Path of the file which should be checked. + + Returns: + `True` if it's a domain file, otherwise `False`. + """ + from rasa.data import YAML_FILE_EXTENSIONS + + if not Path(filename).suffix in YAML_FILE_EXTENSIONS: + return False + try: + content = rasa.utils.io.read_yaml_file(filename) + if any(key in content for key in ALL_DOMAIN_KEYS): + return True + except YAMLError: + pass + + return False + class TemplateDomain(Domain): pass diff --git a/rasa/data.py b/rasa/data.py index 2041ddf1a519..fbcf7c9d7e45 100644 --- a/rasa/data.py +++ b/rasa/data.py @@ -185,21 +185,6 @@ def is_end_to_end_conversation_test_file(file_path: Text) -> bool: ) -def is_domain_file(file_path: Text) -> bool: - """Checks whether the given file path is a Rasa domain file. - - Args: - file_path: Path of the file which should be checked. - - Returns: - `True` if it's a domain file, otherwise `False`. - """ - - file_name = os.path.basename(file_path) - - return file_name in ["domain.yml", "domain.yaml"] - - def is_config_file(file_path: Text) -> bool: """Checks whether the given file path is a Rasa config file. diff --git a/rasa/importers/multi_project.py b/rasa/importers/multi_project.py index 381e3a80cd05..17dff5af01af 100644 --- a/rasa/importers/multi_project.py +++ b/rasa/importers/multi_project.py @@ -97,7 +97,7 @@ def _init_from_directory(self, path: Text): if data.is_end_to_end_conversation_test_file(full_path): self._e2e_story_paths.append(full_path) - elif data.is_domain_file(full_path): + elif Domain.is_domain_file(full_path): self._domain_paths.append(full_path) elif data.is_nlu_file(full_path): self._nlu_paths.append(full_path) diff --git a/tests/core/test_domain.py b/tests/core/test_domain.py index 4953d7282374..405d82d84a7e 100644 --- a/tests/core/test_domain.py +++ b/tests/core/test_domain.py @@ -5,6 +5,7 @@ import pytest from _pytest.tmpdir import TempdirFactory +from rasa.constants import DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES from rasa.core.constants import ( DEFAULT_KNOWLEDGE_BASE_ACTION, SLOT_LISTED_ITEMS, @@ -237,7 +238,7 @@ def test_domain_fails_on_unknown_custom_slot_type(tmpdir, domain_unkown_slot_typ def test_domain_to_yaml(): - test_yaml = """config: + test_yaml = f"""config: store_entities_as_slots: true entities: [] forms: [] @@ -247,8 +248,8 @@ def test_domain_to_yaml(): - text: hey there! session_config: carry_over_slots_to_new_session: true - session_expiration_time: 60 -slots: {}""" + session_expiration_time: {DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES} +slots: {{}}""" domain = Domain.from_yaml(test_yaml) # python 3 and 2 are different here, python 3 will have a leading set @@ -258,7 +259,7 @@ def test_domain_to_yaml(): def test_domain_to_yaml_deprecated_templates(): - test_yaml = """actions: + test_yaml = f"""actions: - utter_greet config: store_entities_as_slots: true @@ -270,10 +271,10 @@ def test_domain_to_yaml_deprecated_templates(): - text: hey there! session_config: carry_over_slots_to_new_session: true - session_expiration_time: 60 -slots: {}""" + session_expiration_time: {DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES} +slots: {{}}""" - target_yaml = """actions: + target_yaml = f"""actions: - utter_greet config: store_entities_as_slots: true @@ -285,8 +286,8 @@ def test_domain_to_yaml_deprecated_templates(): - text: hey there! session_config: carry_over_slots_to_new_session: true - session_expiration_time: 60 -slots: {}""" + session_expiration_time: {DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES} +slots: {{}}""" domain = Domain.from_yaml(test_yaml) # python 3 and 2 are different here, python 3 will have a leading set @@ -430,29 +431,24 @@ def test_collect_intent_properties(intents, entities, intent_properties): def test_load_domain_from_directory_tree(tmpdir_factory: TempdirFactory): root = tmpdir_factory.mktemp("Parent Bot") root_domain = {"actions": ["utter_root", "utter_root2"]} - utils.dump_obj_as_yaml_to_file(root / "domain.yml", root_domain) + utils.dump_obj_as_yaml_to_file(root / "domain_pt1.yml", root_domain) subdirectory_1 = root / "Skill 1" subdirectory_1.mkdir() skill_1_domain = {"actions": ["utter_skill_1"]} - utils.dump_obj_as_yaml_to_file(subdirectory_1 / "domain.yml", skill_1_domain) + utils.dump_obj_as_yaml_to_file(subdirectory_1 / "domain_pt2.yml", skill_1_domain) subdirectory_2 = root / "Skill 2" subdirectory_2.mkdir() skill_2_domain = {"actions": ["utter_skill_2"]} - utils.dump_obj_as_yaml_to_file(subdirectory_2 / "domain.yml", skill_2_domain) + utils.dump_obj_as_yaml_to_file(subdirectory_2 / "domain_pt3.yml", skill_2_domain) subsubdirectory = subdirectory_2 / "Skill 2-1" subsubdirectory.mkdir() skill_2_1_domain = {"actions": ["utter_subskill", "utter_root"]} # Check if loading from `.yaml` also works - utils.dump_obj_as_yaml_to_file(subsubdirectory / "domain.yaml", skill_2_1_domain) - - subsubdirectory_2 = subdirectory_2 / "Skill 2-2" - subsubdirectory_2.mkdir() - excluded_domain = {"actions": ["should not be loaded"]} utils.dump_obj_as_yaml_to_file( - subsubdirectory_2 / "other_name.yaml", excluded_domain + subsubdirectory / "domain_pt4.yaml", skill_2_1_domain ) actual = Domain.load(str(root)) @@ -644,7 +640,7 @@ def test_clean_domain_for_file(): "actions": ["utter_default", "utter_goodbye", "utter_greet"], "session_config": { "carry_over_slots_to_new_session": True, - "session_expiration_time": 0, + "session_expiration_time": DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, }, } @@ -705,17 +701,17 @@ def test_add_knowledge_base_slots(default_domain): "input_domain, expected_session_expiration_time, expected_carry_over_slots", [ ( - """session_config: - session_expiration_time: 0 + f"""session_config: + session_expiration_time: {DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES} carry_over_slots_to_new_session: true""", - 0, + DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, True, ), - ("", 0, True), + ("", DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, True), ( """session_config: carry_over_slots_to_new_session: false""", - 0, + DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, False, ), ( @@ -725,7 +721,7 @@ def test_add_knowledge_base_slots(default_domain): 20.2, False, ), - ("""session_config: {}""", 0, True), + ("""session_config: {}""", DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, True), ], ) def test_session_config( @@ -765,7 +761,7 @@ def test_are_sessions_enabled(session_config: SessionConfig, enabled: bool): def test_domain_utterance_actions_deprecated_templates(): - new_yaml = """actions: + new_yaml = f"""actions: - utter_greet - utter_goodbye config: @@ -780,10 +776,10 @@ def test_domain_utterance_actions_deprecated_templates(): - text: bye! session_config: carry_over_slots_to_new_session: true - session_expiration_time: 60 -slots: {}""" + session_expiration_time: {DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES} +slots: {{}}""" - old_yaml = """config: + old_yaml = f"""config: store_entities_as_slots: true entities: [] forms: [] @@ -795,8 +791,8 @@ def test_domain_utterance_actions_deprecated_templates(): - text: bye! session_config: carry_over_slots_to_new_session: true - session_expiration_time: 60 -slots: {}""" + session_expiration_time: {DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES} +slots: {{}}""" old_domain = Domain.from_yaml(old_yaml) new_domain = Domain.from_yaml(new_yaml)