Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RasaModelData can handle 4D Tensors #6833

Merged
merged 30 commits into from
Oct 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
6d21133
Merge branch 'master' into 4d-tensors
tabergma Sep 24, 2020
4d8d6b8
handle 4d dense features
tabergma Sep 25, 2020
33e1efa
padding for dense and sparse works
tabergma Sep 25, 2020
1ba9791
update RasaModelData
tabergma Sep 29, 2020
3a720a8
update RasaModelData tests
tabergma Sep 29, 2020
9f18a48
update shape of sparse tensors
tabergma Sep 29, 2020
0a29c64
update is_in_4d_format
tabergma Sep 29, 2020
22cbb02
Merge branch 'e2e' into 4d-tensors
tabergma Sep 29, 2020
61350d1
set eager back to False
tabergma Sep 29, 2020
e8b55b4
fix code quality issues
tabergma Sep 29, 2020
3690aee
formatting
tabergma Sep 29, 2020
388b18a
Merge branch 'e2e' into 4d-tensors
tabergma Sep 30, 2020
d4a0fbe
fix type issues
tabergma Sep 30, 2020
48c9bd0
refactoring
tabergma Sep 30, 2020
656eb70
update types
tabergma Sep 30, 2020
55a1759
formatting
tabergma Sep 30, 2020
a947c9b
fix type issue
tabergma Sep 30, 2020
3540707
subclass numpy array
tabergma Sep 30, 2020
0f0c873
explicit specify number_of_dimensions
tabergma Oct 1, 2020
fb29d2c
clean up
tabergma Oct 1, 2020
16b6787
training is working again
tabergma Oct 1, 2020
59d594e
rename feature_dimension to units
tabergma Oct 1, 2020
68b5465
reset default eager values
tabergma Oct 1, 2020
a46f277
update comments
tabergma Oct 1, 2020
ad1a34d
refactoring
tabergma Oct 1, 2020
bb8de45
fix type issue
tabergma Oct 1, 2020
053247e
fix types
tabergma Oct 5, 2020
8018f83
review comments
tabergma Oct 6, 2020
9a5cbf0
Merge branch 'e2e' into 4d-tensors
tabergma Oct 6, 2020
8da6f58
formatting
tabergma Oct 6, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions rasa/core/policies/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@
ACTION_BACK_NAME,
)
from rasa.shared.core.domain import InvalidDomain, Domain
from rasa.shared.core.events import (
DefinePrevUserUtteredFeaturization,
)
from rasa.shared.core.events import DefinePrevUserUtteredFeaturization
from rasa.shared.core.events import ActionExecutionRejected
from rasa.core.exceptions import UnsupportedDialogueModelError
from rasa.core.featurizers.tracker_featurizers import MaxHistoryTrackerFeaturizer
Expand Down
16 changes: 13 additions & 3 deletions rasa/core/policies/ted_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@
from rasa.shared.core.generator import TrackerWithCachedStates
from rasa.utils import train_utils
from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
from rasa.utils.tensorflow.model_data import (
RasaModelData,
FeatureSignature,
FeatureArray,
)
from rasa.utils.tensorflow.model_data_utils import convert_to_data_format
from rasa.utils.tensorflow.constants import (
LABEL,
Expand Down Expand Up @@ -264,7 +268,9 @@ def _create_label_data(

label_ids = np.arange(domain.num_actions)
label_data.add_features(
LABEL_KEY, LABEL_SUB_KEY, [np.expand_dims(label_ids, -1)]
LABEL_KEY,
LABEL_SUB_KEY,
[FeatureArray(np.expand_dims(label_ids, -1), number_of_dimensions=2)],
)

return label_data, encoded_all_labels
Expand Down Expand Up @@ -295,7 +301,11 @@ def _create_model_data(
label_ids = np.array(
[np.expand_dims(seq_label_ids, -1) for seq_label_ids in label_ids]
)
model_data.add_features(LABEL_KEY, LABEL_SUB_KEY, [label_ids])
model_data.add_features(
LABEL_KEY,
LABEL_SUB_KEY,
[FeatureArray(label_ids, number_of_dimensions=3)],
)

attribute_data, self.zero_state_features = convert_to_data_format(
tracker_state_features
Expand Down
62 changes: 44 additions & 18 deletions rasa/nlu/classifiers/diet_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@
from rasa.utils import train_utils
from rasa.utils.tensorflow import layers
from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
from rasa.utils.tensorflow.model_data import (
RasaModelData,
FeatureSignature,
FeatureArray,
)
from rasa.nlu.constants import TOKENS_NAMES
from rasa.shared.nlu.constants import (
TEXT,
Expand Down Expand Up @@ -325,7 +329,7 @@ def __init__(
self.model = model

self._label_data: Optional[RasaModelData] = None
self._data_example: Optional[Dict[Text, List[np.ndarray]]] = None
self._data_example: Optional[Dict[Text, List[FeatureArray]]] = None

@property
def label_key(self) -> Optional[Text]:
Expand Down Expand Up @@ -492,10 +496,10 @@ def _check_input_dimension_consistency(self, model_data: RasaModelData) -> None:
"""Checks if features have same dimensionality if hidden layers are shared."""

if self.component_config.get(SHARE_HIDDEN_LAYERS):
num_text_sentence_features = model_data.feature_dimension(TEXT, SENTENCE)
num_label_sentence_features = model_data.feature_dimension(LABEL, SENTENCE)
num_text_sequence_features = model_data.feature_dimension(TEXT, SEQUENCE)
num_label_sequence_features = model_data.feature_dimension(LABEL, SEQUENCE)
num_text_sentence_features = model_data.number_of_units(TEXT, SENTENCE)
num_label_sentence_features = model_data.number_of_units(LABEL, SENTENCE)
num_text_sequence_features = model_data.number_of_units(TEXT, SEQUENCE)
num_label_sequence_features = model_data.number_of_units(LABEL, SEQUENCE)

if (0 < num_text_sentence_features != num_label_sentence_features > 0) or (
0 < num_text_sequence_features != num_label_sequence_features > 0
Expand All @@ -507,7 +511,7 @@ def _check_input_dimension_consistency(self, model_data: RasaModelData) -> None:

def _extract_labels_precomputed_features(
self, label_examples: List[Message], attribute: Text = INTENT
) -> Tuple[List[np.ndarray], List[np.ndarray]]:
) -> Tuple[List[FeatureArray], List[FeatureArray]]:
"""Collects precomputed encodings."""

features = defaultdict(list)
Expand All @@ -521,23 +525,32 @@ def _extract_labels_precomputed_features(
sentence_features = []
for feature_name, feature_value in features.items():
if SEQUENCE in feature_name:
sequence_features.append(np.array(features[feature_name]))
sequence_features.append(
FeatureArray(np.array(feature_value), number_of_dimensions=3)
)
else:
sentence_features.append(np.array(features[feature_name]))
sentence_features.append(
FeatureArray(np.array(feature_value), number_of_dimensions=3)
)

return (sequence_features, sentence_features)
return sequence_features, sentence_features

@staticmethod
def _compute_default_label_features(
labels_example: List[Message],
) -> List[np.ndarray]:
) -> List[FeatureArray]:
"""Computes one-hot representation for the labels."""

logger.debug("No label features found. Computing default label features.")

eye_matrix = np.eye(len(labels_example), dtype=np.float32)
# add sequence dimension to one-hot labels
return [np.array([np.expand_dims(a, 0) for a in eye_matrix])]
return [
FeatureArray(
np.array([np.expand_dims(a, 0) for a in eye_matrix]),
number_of_dimensions=3,
)
]

def _create_label_data(
self,
Expand Down Expand Up @@ -590,16 +603,23 @@ def _create_label_data(
# explicitly add last dimension to label_ids
# to track correctly dynamic sequences
label_data.add_features(
LABEL_KEY, LABEL_SUB_KEY, [np.expand_dims(label_ids, -1)]
LABEL_KEY,
LABEL_SUB_KEY,
[FeatureArray(np.expand_dims(label_ids, -1), number_of_dimensions=2)],
)

label_data.add_lengths(LABEL, SEQUENCE_LENGTH, LABEL, SEQUENCE)

return label_data

def _use_default_label_features(self, label_ids: np.ndarray) -> List[np.ndarray]:
def _use_default_label_features(self, label_ids: np.ndarray) -> List[FeatureArray]:
all_label_features = self._label_data.get(LABEL, SENTENCE)[0]
return [np.array([all_label_features[label_id] for label_id in label_ids])]
return [
FeatureArray(
np.array([all_label_features[label_id] for label_id in label_ids]),
number_of_dimensions=2,
)
]

def _create_model_data(
self,
Expand Down Expand Up @@ -645,7 +665,11 @@ def _create_model_data(
for key, attribute_features in features.items():
for sub_key, _features in attribute_features.items():
sub_key = sub_key.replace(f"{SPARSE}_", "").replace(f"{DENSE}_", "")
model_data.add_features(key, sub_key, [np.array(_features)])
model_data.add_features(
key,
sub_key,
[FeatureArray(np.array(_features), number_of_dimensions=3)],
)

if (
label_attribute
Expand All @@ -660,7 +684,9 @@ def _create_model_data(
# explicitly add last dimension to label_ids
# to track correctly dynamic sequences
model_data.add_features(
LABEL_KEY, LABEL_SUB_KEY, [np.expand_dims(label_ids, -1)]
LABEL_KEY,
LABEL_SUB_KEY,
[FeatureArray(np.expand_dims(label_ids, -1), number_of_dimensions=2)],
)

model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE)
Expand Down Expand Up @@ -1028,7 +1054,7 @@ def _load_model(
entity_tag_specs: List[EntityTagSpec],
label_data: RasaModelData,
meta: Dict[Text, Any],
data_example: Dict[Text, Dict[Text, List[np.ndarray]]],
data_example: Dict[Text, Dict[Text, List[FeatureArray]]],
model_dir: Text,
) -> "RasaModel":
file_name = meta.get("file")
Expand Down
Loading