Skip to content

Commit

Permalink
Preserve slot ordering
Browse files Browse the repository at this point in the history
Add changelog

Preserve multiline strings in respones text inside domain

Preserve responses in NLU data as well

fix changelog entry

Take care of possible multiple text examples

Address PR comments
  • Loading branch information
Alexander Khizov committed Dec 3, 2020
1 parent 5539966 commit 573b9dc
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 5 deletions.
2 changes: 2 additions & 0 deletions changelog/7418.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- Preserve `domain` slot ordering while dumping it back to the file.
- Preserve multiline `text` examples of `responses` defined in `domain` and `NLU` training data.
2 changes: 1 addition & 1 deletion rasa/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin
domain = copy.copy(domain)
# don't include the response texts in the fingerprint.
# Their fingerprint is separate.
domain.templates = []
domain.templates = {}

return {
FINGERPRINT_CONFIG_KEY: _get_fingerprint_of_config(
Expand Down
36 changes: 34 additions & 2 deletions rasa/shared/core/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
KEY_ACTIONS = "actions"
KEY_FORMS = "forms"
KEY_E2E_ACTIONS = "e2e_actions"
KEY_RESPONSES_TEXT = "text"

ALL_DOMAIN_KEYS = [
KEY_SLOTS,
Expand Down Expand Up @@ -286,8 +287,8 @@ def collect_slots(slot_dict: Dict[Text, Any]) -> List[Slot]:
slots = []
# make a copy to not alter the input dictionary
slot_dict = copy.deepcopy(slot_dict)
# Sort the slots so that the order of the slot states is consistent
for slot_name in sorted(slot_dict):
# Don't sort the slots, see https://github.com/RasaHQ/rasa-x/issues/3900
for slot_name in slot_dict:
slot_type = slot_dict[slot_name].pop("type", None)
slot_class = Slot.resolve_by_type(slot_type)

Expand Down Expand Up @@ -1057,6 +1058,33 @@ def as_dict(self) -> Dict[Text, Any]:
KEY_E2E_ACTIONS: self.action_texts,
}

@staticmethod
def get_responses_with_multilines(
responses: Dict[Text, List[Dict[Text, Any]]]
) -> Dict[Text, List[Dict[Text, Any]]]:
"""Returns `responses` with preserved multilines in the `text` key.
Args:
responses: Original `responses`.
Returns:
`responses` with preserved multilines in the `text` key.
"""
from ruamel.yaml.scalarstring import LiteralScalarString

final_responses = responses.copy()
for response_name, examples in final_responses.items():
for i, example in enumerate(examples):
response_text = example.get(KEY_RESPONSES_TEXT, "")
if not response_text or "\n" not in response_text:
continue
# Has new lines, use `LiteralScalarString`
final_responses[response_name][i][
KEY_RESPONSES_TEXT
] = LiteralScalarString(response_text)

return final_responses

def _transform_intents_for_file(self) -> List[Union[Text, Dict[Text, Any]]]:
"""Transform intent properties for displaying or writing into a domain file.
Expand Down Expand Up @@ -1200,6 +1228,10 @@ def as_yaml(self, clean_before_dump: bool = False) -> Text:
domain_data.update(self.cleaned_domain())
else:
domain_data.update(self.as_dict())
if domain_data.get(KEY_RESPONSES, {}):
domain_data[KEY_RESPONSES] = self.get_responses_with_multilines(
domain_data[KEY_RESPONSES]
)

return rasa.shared.utils.io.dump_obj_as_yaml_to_string(
domain_data, should_preserve_key_order=True
Expand Down
5 changes: 4 additions & 1 deletion rasa/shared/nlu/training_data/formats/rasa_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional

import rasa.shared.data
from rasa.shared.core.domain import Domain
from rasa.shared.exceptions import YamlException
from rasa.shared.utils import validation
from ruamel.yaml import StringIO
Expand Down Expand Up @@ -409,7 +410,9 @@ def training_data_to_dict(
result[KEY_NLU] = nlu_items

if training_data.responses:
result[KEY_RESPONSES] = training_data.responses
result[KEY_RESPONSES] = Domain.get_responses_with_multilines(
training_data.responses
)

return result

Expand Down
105 changes: 104 additions & 1 deletion tests/shared/core/test_domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@

from rasa.shared.exceptions import YamlSyntaxException
import rasa.shared.utils.io
from rasa.shared.constants import DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES
from rasa.shared.constants import (
DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES,
LATEST_TRAINING_DATA_FORMAT_VERSION,
)
from rasa.core import training, utils
from rasa.core.featurizers.tracker_featurizers import MaxHistoryTrackerFeaturizer
from rasa.shared.core.slots import InvalidSlotTypeException, TextSlot
Expand Down Expand Up @@ -1142,3 +1145,103 @@ def test_valid_slot_mappings(domain_as_dict: Dict[Text, Any]):
def test_form_invalid_mappings(domain_as_dict: Dict[Text, Any]):
with pytest.raises(InvalidDomain):
Domain.from_dict(domain_as_dict)


def test_slot_order_is_preserved():
test_yaml = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}'
session_config:
session_expiration_time: 60
carry_over_slots_to_new_session: true
slots:
confirm:
type: bool
influence_conversation: false
previous_email:
type: text
influence_conversation: false
caller_id:
type: text
influence_conversation: false
email:
type: text
influence_conversation: false
incident_title:
type: text
influence_conversation: false
priority:
type: text
influence_conversation: false
problem_description:
type: text
influence_conversation: false
requested_slot:
type: text
influence_conversation: false
handoff_to:
type: text
influence_conversation: false
"""

domain = Domain.from_yaml(test_yaml)
assert domain.as_yaml(clean_before_dump=True) == test_yaml


def test_slot_order_is_preserved_when_merging():

slot_1 = """
b:
type: text
influence_conversation: false
a:
type: text
influence_conversation: false"""

test_yaml_1 = f"""
slots:{slot_1}
"""

slot_2 = """
d:
type: text
influence_conversation: false
c:
type: text
influence_conversation: false"""

test_yaml_2 = f"""
slots:{slot_2}
"""

test_yaml_merged = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}'
session_config:
session_expiration_time: 60
carry_over_slots_to_new_session: true
slots:{slot_2}{slot_1}
"""

domain_1 = Domain.from_yaml(test_yaml_1)
domain_2 = Domain.from_yaml(test_yaml_2)
domain_merged = domain_1.merge(domain_2)

assert domain_merged.as_yaml(clean_before_dump=True) == test_yaml_merged


def test_responses_text_multiline_is_preserved():
test_yaml = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}'
session_config:
session_expiration_time: 60
carry_over_slots_to_new_session: true
responses:
utter_confirm:
- text: |-
First line
Second line
Third line
- text: One more response
utter_cancel:
- text: First line
- text: Second line
"""

domain = Domain.from_yaml(test_yaml)
assert domain.as_yaml(clean_before_dump=True) == test_yaml
30 changes: 30 additions & 0 deletions tests/shared/nlu/training_data/formats/test_rasa_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,3 +441,33 @@ def test_responses_are_converted_from_markdown():

# dumping again should also not change the format
assert dumped == RasaYAMLWriter().dumps(dumped_result)


def test_responses_text_multiline_is_preserved():
responses_yml = textwrap.dedent(
"""
responses:
utter_confirm:
- text: |-
First line
Second line
Third line
- text: One more response
utter_cancel:
- text: First line
- text: Second line
"""
)

reader = RasaYAMLReader()
result = reader.reads(responses_yml)

dumped = RasaYAMLWriter().dumps(result)

validation_reader = RasaYAMLReader()
dumped_result = validation_reader.reads(dumped)

assert dumped_result.responses == result.responses

# dumping again should also not change the format
assert dumped == RasaYAMLWriter().dumps(dumped_result)

0 comments on commit 573b9dc

Please sign in to comment.