Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve domain slot ordering and multistrings while dumping #7418

Merged
merged 3 commits into from
Dec 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changelog/7418.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- Preserve `domain` slot ordering while dumping it back to the file.
- Preserve multiline `text` examples of `responses` defined in `domain` and `NLU` training data.
2 changes: 1 addition & 1 deletion rasa/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin
domain = copy.copy(domain)
# don't include the response texts in the fingerprint.
# Their fingerprint is separate.
domain.templates = []
domain.templates = {}

return {
FINGERPRINT_CONFIG_KEY: _get_fingerprint_of_config(
Expand Down
36 changes: 34 additions & 2 deletions rasa/shared/core/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
KEY_ACTIONS = "actions"
KEY_FORMS = "forms"
KEY_E2E_ACTIONS = "e2e_actions"
KEY_RESPONSES_TEXT = "text"

ALL_DOMAIN_KEYS = [
KEY_SLOTS,
Expand Down Expand Up @@ -286,8 +287,8 @@ def collect_slots(slot_dict: Dict[Text, Any]) -> List[Slot]:
slots = []
# make a copy to not alter the input dictionary
slot_dict = copy.deepcopy(slot_dict)
# Sort the slots so that the order of the slot states is consistent
for slot_name in sorted(slot_dict):
# Don't sort the slots, see https://github.com/RasaHQ/rasa-x/issues/3900
for slot_name in slot_dict:
slot_type = slot_dict[slot_name].pop("type", None)
slot_class = Slot.resolve_by_type(slot_type)

Expand Down Expand Up @@ -1057,6 +1058,33 @@ def as_dict(self) -> Dict[Text, Any]:
KEY_E2E_ACTIONS: self.action_texts,
}

@staticmethod
def get_responses_with_multilines(
responses: Dict[Text, List[Dict[Text, Any]]]
) -> Dict[Text, List[Dict[Text, Any]]]:
"""Returns `responses` with preserved multilines in the `text` key.

Args:
responses: Original `responses`.

Returns:
`responses` with preserved multilines in the `text` key.
"""
from ruamel.yaml.scalarstring import LiteralScalarString

final_responses = responses.copy()
for response_name, examples in final_responses.items():
for i, example in enumerate(examples):
response_text = example.get(KEY_RESPONSES_TEXT, "")
if not response_text or "\n" not in response_text:
continue
# Has new lines, use `LiteralScalarString`
final_responses[response_name][i][
KEY_RESPONSES_TEXT
] = LiteralScalarString(response_text)

return final_responses

def _transform_intents_for_file(self) -> List[Union[Text, Dict[Text, Any]]]:
"""Transform intent properties for displaying or writing into a domain file.

Expand Down Expand Up @@ -1200,6 +1228,10 @@ def as_yaml(self, clean_before_dump: bool = False) -> Text:
domain_data.update(self.cleaned_domain())
else:
domain_data.update(self.as_dict())
if domain_data.get(KEY_RESPONSES, {}):
domain_data[KEY_RESPONSES] = self.get_responses_with_multilines(
domain_data[KEY_RESPONSES]
)

return rasa.shared.utils.io.dump_obj_as_yaml_to_string(
domain_data, should_preserve_key_order=True
Expand Down
5 changes: 4 additions & 1 deletion rasa/shared/nlu/training_data/formats/rasa_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional

import rasa.shared.data
from rasa.shared.core.domain import Domain
from rasa.shared.exceptions import YamlException
from rasa.shared.utils import validation
from ruamel.yaml import StringIO
Expand Down Expand Up @@ -409,7 +410,9 @@ def training_data_to_dict(
result[KEY_NLU] = nlu_items

if training_data.responses:
result[KEY_RESPONSES] = training_data.responses
result[KEY_RESPONSES] = Domain.get_responses_with_multilines(
training_data.responses
)

return result

Expand Down
105 changes: 104 additions & 1 deletion tests/shared/core/test_domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@

from rasa.shared.exceptions import YamlSyntaxException
import rasa.shared.utils.io
from rasa.shared.constants import DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES
from rasa.shared.constants import (
DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES,
LATEST_TRAINING_DATA_FORMAT_VERSION,
)
from rasa.core import training, utils
from rasa.core.featurizers.tracker_featurizers import MaxHistoryTrackerFeaturizer
from rasa.shared.core.slots import InvalidSlotTypeException, TextSlot
Expand Down Expand Up @@ -1142,3 +1145,103 @@ def test_valid_slot_mappings(domain_as_dict: Dict[Text, Any]):
def test_form_invalid_mappings(domain_as_dict: Dict[Text, Any]):
with pytest.raises(InvalidDomain):
Domain.from_dict(domain_as_dict)


def test_slot_order_is_preserved():
degiz marked this conversation as resolved.
Show resolved Hide resolved
test_yaml = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}'
session_config:
session_expiration_time: 60
carry_over_slots_to_new_session: true
slots:
confirm:
type: bool
influence_conversation: false
previous_email:
type: text
influence_conversation: false
caller_id:
type: text
influence_conversation: false
email:
type: text
influence_conversation: false
incident_title:
type: text
influence_conversation: false
priority:
type: text
influence_conversation: false
problem_description:
type: text
influence_conversation: false
requested_slot:
type: text
influence_conversation: false
handoff_to:
type: text
influence_conversation: false
"""

domain = Domain.from_yaml(test_yaml)
assert domain.as_yaml(clean_before_dump=True) == test_yaml


def test_slot_order_is_preserved_when_merging():

slot_1 = """
b:
type: text
influence_conversation: false
a:
type: text
influence_conversation: false"""

test_yaml_1 = f"""
slots:{slot_1}
"""

slot_2 = """
d:
type: text
influence_conversation: false
c:
type: text
influence_conversation: false"""

test_yaml_2 = f"""
slots:{slot_2}
"""

test_yaml_merged = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}'
session_config:
session_expiration_time: 60
carry_over_slots_to_new_session: true
slots:{slot_2}{slot_1}
"""

domain_1 = Domain.from_yaml(test_yaml_1)
domain_2 = Domain.from_yaml(test_yaml_2)
domain_merged = domain_1.merge(domain_2)

assert domain_merged.as_yaml(clean_before_dump=True) == test_yaml_merged


def test_responses_text_multiline_is_preserved():
test_yaml = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}'
session_config:
session_expiration_time: 60
carry_over_slots_to_new_session: true
responses:
utter_confirm:
- text: |-
First line
Second line
Third line
- text: One more response
utter_cancel:
- text: First line
- text: Second line
"""

domain = Domain.from_yaml(test_yaml)
assert domain.as_yaml(clean_before_dump=True) == test_yaml
30 changes: 30 additions & 0 deletions tests/shared/nlu/training_data/formats/test_rasa_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,3 +441,33 @@ def test_responses_are_converted_from_markdown():

# dumping again should also not change the format
assert dumped == RasaYAMLWriter().dumps(dumped_result)


def test_responses_text_multiline_is_preserved():
responses_yml = textwrap.dedent(
"""
responses:
utter_confirm:
- text: |-
First line
Second line
Third line
- text: One more response
utter_cancel:
- text: First line
- text: Second line
"""
)

reader = RasaYAMLReader()
result = reader.reads(responses_yml)

dumped = RasaYAMLWriter().dumps(result)

validation_reader = RasaYAMLReader()
dumped_result = validation_reader.reads(dumped)

assert dumped_result.responses == result.responses

# dumping again should also not change the format
assert dumped == RasaYAMLWriter().dumps(dumped_result)