Skip to content

Commit

Permalink
Merge pull request #7033 from RasaHQ/story-processing
Browse files Browse the repository at this point in the history
Fixes for story/rules reading and writing for both formats
  • Loading branch information
rasabot authored Oct 20, 2020
2 parents ca551ca + 4bf0928 commit 93d26fc
Show file tree
Hide file tree
Showing 10 changed files with 115 additions and 18 deletions.
2 changes: 2 additions & 0 deletions changelog/7033.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- Fix `YAMLStoryReader` not being able to represent [`OR` statements](stories.mdx#or-statements) in conversion mode.
- Fix `MarkdownStoryWriter` not being able to write stories with `OR` statements (when loaded in conversion mode).
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None:

# check if source file is test stories file
if MarkdownStoryReader.is_test_stories_file(source_path):
reader = MarkdownStoryReader(is_used_for_conversion=True, use_e2e=True)
reader = MarkdownStoryReader(is_used_for_training=False, use_e2e=True)
output_core_path = cls._generate_path_for_converted_test_data_file(
source_path, output_path
)
else:
reader = MarkdownStoryReader(is_used_for_conversion=True)
reader = MarkdownStoryReader(is_used_for_training=False)
output_core_path = cls.generate_path_for_converted_training_data_file(
source_path, output_path
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _add_user_messages(self, messages: List[Text], line_num: int) -> None:
)
parsed_messages = [self._parse_message(m, line_num) for m in messages]
self.current_step_builder.add_user_messages(
parsed_messages, self.is_used_for_conversion
parsed_messages, self._is_used_for_training
)

def _add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> None:
Expand All @@ -204,7 +204,7 @@ def _add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> None:
self.current_step_builder.add_user_messages(parsed_messages)

@staticmethod
def parse_e2e_message(line: Text, is_used_for_conversion: bool = False) -> Message:
def parse_e2e_message(line: Text, is_used_for_training: bool = True) -> Message:
"""Parses an md list item line based on the current section type.
Matches expressions of the form `<intent>:<example>`. For the
Expand All @@ -231,7 +231,7 @@ def parse_e2e_message(line: Text, is_used_for_conversion: bool = False) -> Messa
intent = match.group(2)
message = match.group(4)
example = entities_parser.parse_training_example(message, intent)
if is_used_for_conversion:
if not is_used_for_training:
# In case this is a simple conversion from Markdown we should copy over
# the original text and not parse the entities
example.data[rasa.shared.nlu.constants.TEXT] = message
Expand All @@ -248,7 +248,7 @@ def parse_e2e_message(line: Text, is_used_for_conversion: bool = False) -> Messa
def _parse_message(self, message: Text, line_num: int) -> UserUttered:

if self.use_e2e:
parsed = self.parse_e2e_message(message, self.is_used_for_conversion)
parsed = self.parse_e2e_message(message, self._is_used_for_training)
text = parsed.get("text")
intent = {INTENT_NAME_KEY: parsed.get("intent")}
entities = parsed.get("entities")
Expand Down
8 changes: 4 additions & 4 deletions rasa/shared/core/training_data/story_reader/story_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(
template_vars: Optional[Dict] = None,
use_e2e: bool = False,
source_name: Optional[Text] = None,
is_used_for_conversion: bool = False,
is_used_for_training: bool = True,
) -> None:
"""Constructor for the StoryReader.
Expand All @@ -37,9 +37,9 @@ def __init__(
template_vars: Template variables to be replaced.
use_e2e: Specifies whether to use the e2e parser or not.
source_name: Name of the training data source.
is_used_for_conversion: Identifies if the user utterances should be parsed
is_used_for_training: Identifies if the user utterances should be parsed
(entities are extracted and removed from the original text) and
OR statements should be unfolded . This parameter is used only to
OR statements should be unfolded. This parameter is used only to
simplify the conversation from MD story files. Don't use it other ways,
because it ends up in a invalid story that cannot be user for real
training. Default value is `False`, which preserves the expected behavior
Expand All @@ -51,7 +51,7 @@ def __init__(
self.template_variables = template_vars if template_vars else {}
self.use_e2e = use_e2e
self.source_name = source_name
self.is_used_for_conversion = is_used_for_conversion
self._is_used_for_training = is_used_for_training
self._is_parsing_conditions = False

def read_from_file(self, filename: Text) -> List[StoryStep]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,13 @@ def _prev_end_checkpoints(self) -> List[Checkpoint]:
return [Checkpoint(name) for name in end_names]

def add_user_messages(
self, messages: List[UserUttered], is_used_for_conversion: bool = False
self, messages: List[UserUttered], is_used_for_training: bool = True
) -> None:
"""Adds next story steps with the user's utterances.
Args:
messages: User utterances.
is_used_for_conversion: Identifies if the user utterance is a part of
is_used_for_training: Identifies if the user utterance is a part of
OR statement. This parameter is used only to simplify the conversation
from MD story files. Don't use it other ways, because it ends up
in a invalid story that cannot be user for real training.
Expand All @@ -82,7 +82,7 @@ def add_user_messages(
t.add_user_message(messages[0])
else:
# this simplifies conversion between formats, but breaks the logic
if is_used_for_conversion:
if not is_used_for_training:
for t in self.current_steps:
t.add_events(messages)
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def from_reader(cls, reader: "YAMLStoryReader") -> "YAMLStoryReader":
reader.template_variables,
reader.use_e2e,
reader.source_name,
reader.is_used_for_conversion,
reader._is_used_for_training,
)

def read_from_file(self, filename: Union[Text, Path]) -> List[StoryStep]:
Expand Down Expand Up @@ -336,7 +336,9 @@ def _parse_or_statement(self, step: Dict[Text, Any]) -> None:
)
return

self.current_step_builder.add_user_messages(utterances)
self.current_step_builder.add_user_messages(
utterances, self._is_used_for_training
)

def _user_intent_from_step(self, step: Dict[Text, Any]) -> Text:
user_intent = step.get(KEY_USER_INTENT, "").strip()
Expand Down
26 changes: 26 additions & 0 deletions rasa/shared/core/training_data/structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
SessionStarted,
)
from rasa.shared.core.trackers import DialogueStateTracker # pytype: disable=pyi-error
from rasa.shared.exceptions import RasaCoreException

if typing.TYPE_CHECKING:
import networkx as nx
Expand All @@ -41,6 +42,13 @@
STEP_COUNT = 1


class EventTypeError(RasaCoreException, ValueError):
"""Represents an error caused by a Rasa Core event not being of the expected
type.
"""


class Checkpoint:
def __init__(
self, name: Optional[Text], conditions: Optional[Dict[Text, Any]] = None
Expand Down Expand Up @@ -131,6 +139,19 @@ def _user_string(story_step_element: UserUttered, e2e: bool) -> Text:
def _bot_string(story_step_element: Event) -> Text:
return f" - {story_step_element.as_story_string()}\n"

@staticmethod
def _or_string(story_step_element: List[Event], e2e: bool) -> Text:
for event in story_step_element:
if not isinstance(event, UserUttered):
raise EventTypeError(
"OR statement events must be of type `UserUttered`."
)

result = " OR ".join(
[element.as_story_string(e2e) for element in story_step_element]
)
return f"* {result}\n"

def as_story_string(self, flat: bool = False, e2e: bool = False) -> Text:
# if the result should be flattened, we
# will exclude the caption and any checkpoints.
Expand All @@ -156,6 +177,11 @@ def as_story_string(self, flat: bool = False, e2e: bool = False) -> Text:
converted = s.as_story_string() # pytype: disable=attribute-error
if converted:
result += self._bot_string(s)
elif isinstance(s, list):
# The story reader classes support reading stories in
# conversion mode. When this mode is enabled, OR statements
# are represented as lists of events.
result += self._or_string(s, e2e)
else:
raise Exception(f"Unexpected element in story step: {s}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,3 +359,35 @@ def test_read_mixed_training_data_file(default_domain: Domain):
with pytest.warns(None) as record:
reader.read_from_parsed_yaml(yaml_content)
assert not len(record)


def test_or_statement_if_not_training_mode():
stories = """
stories:
- story: hello world
steps:
- or:
- intent: intent1
- intent: intent2
- action: some_action
- intent: intent3
- action: other_action
"""

reader = YAMLStoryReader(is_used_for_training=False)
yaml_content = rasa.shared.utils.io.read_yaml(stories)

steps = reader.read_from_parsed_yaml(yaml_content)

assert len(steps) == 1

assert len(steps[0].events) == 4 # 4 events in total
assert len(steps[0].start_checkpoints) == 1
assert steps[0].start_checkpoints[0].name == "STORY_START"
assert steps[0].end_checkpoints == []

or_statement = steps[0].events[0]
assert isinstance(or_statement, list) # But first one is a list (OR)

assert or_statement[0].intent["name"] == "intent1"
assert or_statement[1].intent["name"] == "intent2"
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async def test_simple_story(
):

original_md_reader = MarkdownStoryReader(
default_domain, None, False, input_md_file, is_used_for_conversion=True
default_domain, None, False, input_md_file, is_used_for_training=False
)
original_md_story_steps = original_md_reader.read_from_file(input_md_file)

Expand Down Expand Up @@ -67,7 +67,7 @@ async def test_story_start_checkpoint_is_skipped(default_domain: Domain):
input_md_file = "data/test_stories/stories.md"

original_md_reader = MarkdownStoryReader(
default_domain, None, False, input_md_file, is_used_for_conversion=True
default_domain, None, False, input_md_file, is_used_for_training=False
)
original_md_story_steps = original_md_reader.read_from_file(input_md_file)

Expand All @@ -78,7 +78,7 @@ async def test_story_start_checkpoint_is_skipped(default_domain: Domain):

async def test_forms_are_converted(default_domain: Domain):
original_md_reader = MarkdownStoryReader(
default_domain, None, False, is_used_for_conversion=True
default_domain, None, False, is_used_for_training=False
)
original_md_story_steps = original_md_reader.read_from_file(
"data/test_stories/stories_form.md"
Expand Down
35 changes: 35 additions & 0 deletions tests/shared/core/training_data/test_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,41 @@ def test_session_start_is_not_serialised(default_domain: Domain):
assert story.as_story_string(flat=True) == expected


def test_as_story_string_or_statement():
from rasa.shared.core.training_data.story_reader.yaml_story_reader import (
YAMLStoryReader,
)

import rasa.shared.utils.io

stories = """
stories:
- story: hello world
steps:
- or:
- intent: intent1
- intent: intent2
- intent: intent3
- action: some_action
"""

reader = YAMLStoryReader(is_used_for_training=False)
yaml_content = rasa.shared.utils.io.read_yaml(stories)

steps = reader.read_from_parsed_yaml(yaml_content)

assert len(steps) == 1

assert (
steps[0].as_story_string()
== """
## hello world
* intent1 OR intent2 OR intent3
- some_action
"""
)


def test_cap_length():
assert (
rasa.shared.core.training_data.structures._cap_length("mystring", 6) == "mys..."
Expand Down

0 comments on commit 93d26fc

Please sign in to comment.