From 5873266980216cc18c41243da5fa0da9af7468d4 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 18 Dec 2015 17:03:35 -0800 Subject: [PATCH] Properly handling entity meanings in datastore. To do this, added entity_to_protobuf method that could be used recursively. Also solves #1206 since recursively serializing nested entities to protobuf was being done incorrectly. Fixes #1065. Fixes #1206. --- gcloud/datastore/_datastore_pb2.py | 1 - gcloud/datastore/batch.py | 27 +-- gcloud/datastore/entity.py | 3 + gcloud/datastore/helpers.py | 143 ++++++++++++---- gcloud/datastore/test_batch.py | 1 + gcloud/datastore/test_helpers.py | 266 +++++++++++++++++++++++++++-- 6 files changed, 369 insertions(+), 72 deletions(-) diff --git a/gcloud/datastore/_datastore_pb2.py b/gcloud/datastore/_datastore_pb2.py index 6bbc7e16e721..398146391c2f 100644 --- a/gcloud/datastore/_datastore_pb2.py +++ b/gcloud/datastore/_datastore_pb2.py @@ -35,4 +35,3 @@ Mutation = _datastore_v1_pb2.Mutation MutationResult = _datastore_v1_pb2.MutationResult ReadOptions = _datastore_v1_pb2.ReadOptions -Property = _datastore_v1_pb2.Property # Not present in v1beta3 diff --git a/gcloud/datastore/batch.py b/gcloud/datastore/batch.py index 98d9d02ef123..aadd063bb7d6 100644 --- a/gcloud/datastore/batch.py +++ b/gcloud/datastore/batch.py @@ -229,26 +229,7 @@ def _assign_entity_to_pb(entity_pb, entity): :type entity: :class:`gcloud.datastore.entity.Entity` :param entity: The entity being updated within the batch / transaction. """ - key_pb = entity.key.to_protobuf() - key_pb = helpers._prepare_key_for_request(key_pb) - entity_pb.key.CopyFrom(key_pb) - - for name, value in entity.items(): - - value_is_list = isinstance(value, list) - if value_is_list and len(value) == 0: - continue - - prop = entity_pb.property.add() - # Set the name of the property. - prop.name = name - - # Set the appropriate value. - helpers._set_protobuf_value(prop.value, value) - - if name in entity.exclude_from_indexes: - if not value_is_list: - prop.value.indexed = False - - for sub_value in prop.value.list_value: - sub_value.indexed = False + bare_entity_pb = helpers.entity_to_protobuf(entity) + key_pb = helpers._prepare_key_for_request(bare_entity_pb.key) + bare_entity_pb.key.CopyFrom(key_pb) + entity_pb.CopyFrom(bare_entity_pb) diff --git a/gcloud/datastore/entity.py b/gcloud/datastore/entity.py index 6b88f28cfbd7..7a25e648391a 100644 --- a/gcloud/datastore/entity.py +++ b/gcloud/datastore/entity.py @@ -81,6 +81,9 @@ def __init__(self, key=None, exclude_from_indexes=()): self.key = key self._exclude_from_indexes = set(_ensure_tuple_or_list( 'exclude_from_indexes', exclude_from_indexes)) + # NOTE: This will be populated when parsing a protobuf in + # gcloud.datastore.helpers.entity_from_protobuf. + self._meanings = {} def __eq__(self, other): """Compare two entities for equality. diff --git a/gcloud/datastore/helpers.py b/gcloud/datastore/helpers.py index e7171ae31657..383207a6ebfd 100644 --- a/gcloud/datastore/helpers.py +++ b/gcloud/datastore/helpers.py @@ -73,6 +73,48 @@ def find_true_dataset_id(dataset_id, connection): return returned_pb.key.partition_id.dataset_id +def _get_meaning(value_pb, is_list=False): + """Get the meaning from a protobuf value. + + :type value_pb: :class:`gcloud.datastore._entity_pb2.Value` + :param value_pb: The protobuf value to be checked for an + associated meaning. + + :type is_list: bool + :param is_list: Boolean indicating if the ``value_pb`` contains + a list value. + + :rtype: int + :returns: The meaning for the ``value_pb`` if one is set, else + :data:`None`. + :raises: :class:`ValueError ` if a list value + has disagreeing meanings (in sub-elements) or has some + elements with meanings and some without. + """ + meaning = None + if is_list: + # An empty list will have no values, hence no shared meaning + # set among them. + if len(value_pb.list_value) == 0: + return None + + # We check among all the meanings, some of which may be None, + # the rest which may be enum/int values. + all_meanings = set(_get_meaning(sub_value_pb) + for sub_value_pb in value_pb.list_value) + meaning = all_meanings.pop() + # The value we popped off should have been unique. If not + # then we can't handle a list with values that have more + # than one meaning. + if all_meanings: + raise ValueError('Different meanings set on values ' + 'within a list_value') + elif value_pb.HasField('meaning'): + meaning = value_pb.meaning + + return meaning + + def entity_from_protobuf(pb): """Factory method for creating an entity based on a protobuf. @@ -90,11 +132,19 @@ def entity_from_protobuf(pb): key = key_from_protobuf(pb.key) entity_props = {} + entity_meanings = {} exclude_from_indexes = [] for property_pb in pb.property: - value = _get_value_from_property_pb(property_pb) - entity_props[property_pb.name] = value + value = _get_value_from_value_pb(property_pb.value) + prop_name = property_pb.name + entity_props[prop_name] = value + + # Check if the property has an associated meaning. + meaning = _get_meaning(property_pb.value, + is_list=isinstance(value, list)) + if meaning is not None: + entity_meanings[prop_name] = (meaning, value) # Check if property_pb.value was indexed. Lists need to be # special-cased and we require all `indexed` values in a list agree. @@ -106,16 +156,67 @@ def entity_from_protobuf(pb): 'be indexed or all excluded from indexes.') if not indexed_values.pop(): - exclude_from_indexes.append(property_pb.name) + exclude_from_indexes.append(prop_name) else: if not property_pb.value.indexed: - exclude_from_indexes.append(property_pb.name) + exclude_from_indexes.append(prop_name) entity = Entity(key=key, exclude_from_indexes=exclude_from_indexes) entity.update(entity_props) + entity._meanings.update(entity_meanings) return entity +def entity_to_protobuf(entity): + """Converts an entity into a protobuf. + + :type entity: :class:`gcloud.datastore.entity.Entity` + :param entity: The entity to be turned into a protobuf. + + :rtype: :class:`gcloud.datastore._entity_pb2.Entity` + :returns: The protobuf representing the entity. + """ + entity_pb = _entity_pb2.Entity() + if entity.key is not None: + key_pb = entity.key.to_protobuf() + entity_pb.key.CopyFrom(key_pb) + + for name, value in entity.items(): + value_is_list = isinstance(value, list) + if value_is_list and len(value) == 0: + continue + + prop = entity_pb.property.add() + # Set the name of the property. + prop.name = name + + # Set the appropriate value. + _set_protobuf_value(prop.value, value) + + # Add index information to protobuf. + if name in entity.exclude_from_indexes: + if not value_is_list: + prop.value.indexed = False + + for sub_value in prop.value.list_value: + sub_value.indexed = False + + # Add meaning information to protobuf. + if name in entity._meanings: + meaning, orig_value = entity._meanings[name] + # Only add the meaning back to the protobuf if the value is + # unchanged from when it was originally read from the API. + if orig_value is value: + # For lists, we set meaning on each sub-element. + if value_is_list: + for sub_value_pb in prop.value.list_value: + sub_value_pb.meaning = meaning + else: + prop.value.meaning = meaning + + return entity_pb + + def key_from_protobuf(pb): """Factory method for creating a key based on a protobuf. @@ -248,29 +349,12 @@ def _get_value_from_value_pb(value_pb): result = entity_from_protobuf(value_pb.entity_value) elif value_pb.list_value: - result = [_get_value_from_value_pb(x) for x in value_pb.list_value] + result = [_get_value_from_value_pb(value) + for value in value_pb.list_value] return result -def _get_value_from_property_pb(property_pb): - """Given a protobuf for a Property, get the correct value. - - The Cloud Datastore Protobuf API returns a Property Protobuf which - has one value set and the rest blank. This function retrieves the - the one value provided. - - Some work is done to coerce the return value into a more useful type - (particularly in the case of a timestamp value, or a key value). - - :type property_pb: :class:`gcloud.datastore._datastore_pb2.Property` - :param property_pb: The Property Protobuf. - - :returns: The value provided by the Protobuf. - """ - return _get_value_from_value_pb(property_pb.value) - - def _set_protobuf_value(value_pb, val): """Assign 'val' to the correct subfield of 'value_pb'. @@ -285,7 +369,7 @@ def _set_protobuf_value(value_pb, val): :type val: :class:`datetime.datetime`, boolean, float, integer, string, :class:`gcloud.datastore.key.Key`, - :class:`gcloud.datastore.entity.Entity`, + :class:`gcloud.datastore.entity.Entity` :param val: The value to be assigned. """ if val is None: @@ -296,15 +380,8 @@ def _set_protobuf_value(value_pb, val): if attr == 'key_value': value_pb.key_value.CopyFrom(val) elif attr == 'entity_value': - e_pb = value_pb.entity_value - e_pb.Clear() - key = val.key - if key is not None: - e_pb.key.CopyFrom(key.to_protobuf()) - for item_key, value in val.items(): - p_pb = e_pb.property.add() - p_pb.name = item_key - _set_protobuf_value(p_pb.value, value) + entity_pb = entity_to_protobuf(val) + value_pb.entity_value.CopyFrom(entity_pb) elif attr == 'list_value': l_pb = value_pb.list_value for item in val: diff --git a/gcloud/datastore/test_batch.py b/gcloud/datastore/test_batch.py index 2d32cc30405f..a46584aa9744 100644 --- a/gcloud/datastore/test_batch.py +++ b/gcloud/datastore/test_batch.py @@ -385,6 +385,7 @@ def commit(self, dataset_id, mutation, transaction_id): class _Entity(dict): key = None exclude_from_indexes = () + _meanings = {} class _Key(object): diff --git a/gcloud/datastore/test_helpers.py b/gcloud/datastore/test_helpers.py index 6e52b3595143..36177c9334f2 100644 --- a/gcloud/datastore/test_helpers.py +++ b/gcloud/datastore/test_helpers.py @@ -104,6 +104,20 @@ def test_entity_no_key(self): self.assertEqual(entity.key, None) self.assertEqual(dict(entity), {}) + def test_entity_with_meaning(self): + from gcloud.datastore import _entity_pb2 + + entity_pb = _entity_pb2.Entity() + prop = entity_pb.property.add() + prop.value.meaning = meaning = 9 + prop.value.string_value = val = u'something' + prop.name = name = 'hello' + + entity = self._callFUT(entity_pb) + self.assertEqual(entity.key, None) + self.assertEqual(dict(entity), {name: val}) + self.assertEqual(entity._meanings, {name: (meaning, val)}) + def test_nested_entity_no_key(self): from gcloud.datastore import _entity_pb2 @@ -138,6 +152,163 @@ def test_nested_entity_no_key(self): self.assertEqual(inside_entity[INSIDE_NAME], INSIDE_VALUE) +class Test_entity_to_protobuf(unittest2.TestCase): + + def _callFUT(self, entity): + from gcloud.datastore.helpers import entity_to_protobuf + return entity_to_protobuf(entity) + + def _compareEntityProto(self, entity_pb1, entity_pb2): + import operator + self.assertEqual(entity_pb1.key, entity_pb2.key) + name_getter = operator.attrgetter('name') + prop_list1 = sorted(entity_pb1.property, key=name_getter) + prop_list2 = sorted(entity_pb2.property, key=name_getter) + self.assertEqual(len(prop_list1), len(prop_list2)) + for val1, val2 in zip(prop_list1, prop_list2): + if val1.value.HasField('entity_value'): + self.assertEqual(val1.name, val2.name) + self.assertEqual(val1.value.meaning, val2.value.meaning) + self._compareEntityProto(val1.value.entity_value, + val2.value.entity_value) + else: + self.assertEqual(val1, val2) + + def test_empty(self): + from gcloud.datastore import _entity_pb2 + from gcloud.datastore.entity import Entity + + entity = Entity() + entity_pb = self._callFUT(entity) + self._compareEntityProto(entity_pb, _entity_pb2.Entity()) + + def test_key_only(self): + from gcloud.datastore import _entity_pb2 + from gcloud.datastore.entity import Entity + from gcloud.datastore.key import Key + + kind, name = 'PATH', 'NAME' + dataset_id = 'DATASET' + key = Key(kind, name, dataset_id=dataset_id) + entity = Entity(key=key) + entity_pb = self._callFUT(entity) + + expected_pb = _entity_pb2.Entity() + expected_pb.key.partition_id.dataset_id = dataset_id + path_elt = expected_pb.key.path_element.add() + path_elt.kind = kind + path_elt.name = name + + self._compareEntityProto(entity_pb, expected_pb) + + def test_simple_fields(self): + from gcloud.datastore import _entity_pb2 + from gcloud.datastore.entity import Entity + + entity = Entity() + name1 = 'foo' + entity[name1] = value1 = 42 + name2 = 'bar' + entity[name2] = value2 = u'some-string' + entity_pb = self._callFUT(entity) + + expected_pb = _entity_pb2.Entity() + prop1 = expected_pb.property.add() + prop1.name = name1 + prop1.value.integer_value = value1 + prop2 = expected_pb.property.add() + prop2.name = name2 + prop2.value.string_value = value2 + + self._compareEntityProto(entity_pb, expected_pb) + + def test_with_empty_list(self): + from gcloud.datastore import _entity_pb2 + from gcloud.datastore.entity import Entity + + entity = Entity() + entity['foo'] = [] + entity_pb = self._callFUT(entity) + + self._compareEntityProto(entity_pb, _entity_pb2.Entity()) + + def test_inverts_to_protobuf(self): + from gcloud.datastore import _entity_pb2 + from gcloud.datastore.helpers import entity_from_protobuf + + original_pb = _entity_pb2.Entity() + # Add a key. + original_pb.key.partition_id.dataset_id = dataset_id = 'DATASET' + elem1 = original_pb.key.path_element.add() + elem1.kind = 'Family' + elem1.id = 1234 + elem2 = original_pb.key.path_element.add() + elem2.kind = 'King' + elem2.name = 'Spades' + + # Add an integer property. + prop1 = original_pb.property.add() + prop1.name = 'foo' + prop1.value.integer_value = 1337 + prop1.value.indexed = False + # Add a string property. + prop2 = original_pb.property.add() + prop2.name = 'bar' + prop2.value.string_value = u'hello' + + # Add a nested (entity) property. + prop3 = original_pb.property.add() + prop3.name = 'entity-baz' + sub_pb = _entity_pb2.Entity() + sub_prop1 = sub_pb.property.add() + sub_prop1.name = 'x' + sub_prop1.value.double_value = 3.14 + sub_prop2 = sub_pb.property.add() + sub_prop2.name = 'y' + sub_prop2.value.double_value = 2.718281828 + prop3.value.meaning = 9 + prop3.value.entity_value.CopyFrom(sub_pb) + + # Add a list property. + prop4 = original_pb.property.add() + prop4.name = 'list-quux' + list_val1 = prop4.value.list_value.add() + list_val1.indexed = False + list_val1.meaning = meaning = 22 + list_val1.blob_value = b'\xe2\x98\x83' + list_val2 = prop4.value.list_value.add() + list_val2.indexed = False + list_val2.meaning = meaning + list_val2.blob_value = b'\xe2\x98\x85' + + # Convert to the user-space Entity. + entity = entity_from_protobuf(original_pb) + # Convert the user-space Entity back to a protobuf. + new_pb = self._callFUT(entity) + + # NOTE: entity_to_protobuf() strips the dataset_id so we "cheat". + new_pb.key.partition_id.dataset_id = dataset_id + self._compareEntityProto(original_pb, new_pb) + + def test_meaning_with_change(self): + from gcloud.datastore import _entity_pb2 + from gcloud.datastore.entity import Entity + + entity = Entity() + name = 'foo' + entity[name] = value = 42 + entity._meanings[name] = (9, 1337) + entity_pb = self._callFUT(entity) + + expected_pb = _entity_pb2.Entity() + prop = expected_pb.property.add() + prop.name = name + prop.value.integer_value = value + # NOTE: No meaning is used since the value differs from the + # value stored. + self._compareEntityProto(entity_pb, expected_pb) + + class Test_key_from_protobuf(unittest2.TestCase): def _callFUT(self, val): @@ -383,21 +554,6 @@ def test_unknown(self): self.assertEqual(self._callFUT(pb), None) -class Test__get_value_from_property_pb(unittest2.TestCase): - - def _callFUT(self, pb): - from gcloud.datastore.helpers import _get_value_from_property_pb - - return _get_value_from_property_pb(pb) - - def test_it(self): - from gcloud.datastore._datastore_pb2 import Property - - pb = Property() - pb.value.string_value = 'value' - self.assertEqual(self._callFUT(pb), 'value') - - class Test_set_protobuf_value(unittest2.TestCase): def _callFUT(self, value_pb, val): @@ -613,6 +769,86 @@ def test_unprefixed_bogus_key_hit(self): self.assertEqual(result, PREFIXED) +class Test__get_meaning(unittest2.TestCase): + + def _callFUT(self, *args, **kwargs): + from gcloud.datastore.helpers import _get_meaning + return _get_meaning(*args, **kwargs) + + def test_no_meaning(self): + from gcloud.datastore import _entity_pb2 + + value_pb = _entity_pb2.Value() + result = self._callFUT(value_pb) + self.assertEqual(result, None) + + def test_single(self): + from gcloud.datastore import _entity_pb2 + + value_pb = _entity_pb2.Value() + value_pb.meaning = meaning = 22 + value_pb.string_value = u'hi' + result = self._callFUT(value_pb) + self.assertEqual(meaning, result) + + def test_empty_list_value(self): + from gcloud.datastore import _entity_pb2 + + value_pb = _entity_pb2.Value() + value_pb.list_value.add() + value_pb.list_value.pop() + + result = self._callFUT(value_pb, is_list=True) + self.assertEqual(None, result) + + def test_list_value(self): + from gcloud.datastore import _entity_pb2 + + value_pb = _entity_pb2.Value() + meaning = 9 + sub_value_pb1 = value_pb.list_value.add() + sub_value_pb2 = value_pb.list_value.add() + + sub_value_pb1.meaning = sub_value_pb2.meaning = meaning + sub_value_pb1.string_value = u'hi' + sub_value_pb2.string_value = u'bye' + + result = self._callFUT(value_pb, is_list=True) + self.assertEqual(meaning, result) + + def test_list_value_disagreeing(self): + from gcloud.datastore import _entity_pb2 + + value_pb = _entity_pb2.Value() + meaning1 = 9 + meaning2 = 10 + sub_value_pb1 = value_pb.list_value.add() + sub_value_pb2 = value_pb.list_value.add() + + sub_value_pb1.meaning = meaning1 + sub_value_pb2.meaning = meaning2 + sub_value_pb1.string_value = u'hi' + sub_value_pb2.string_value = u'bye' + + with self.assertRaises(ValueError): + self._callFUT(value_pb, is_list=True) + + def test_list_value_partially_unset(self): + from gcloud.datastore import _entity_pb2 + + value_pb = _entity_pb2.Value() + meaning1 = 9 + sub_value_pb1 = value_pb.list_value.add() + sub_value_pb2 = value_pb.list_value.add() + + sub_value_pb1.meaning = meaning1 + sub_value_pb1.string_value = u'hi' + sub_value_pb2.string_value = u'bye' + + with self.assertRaises(ValueError): + self._callFUT(value_pb, is_list=True) + + class _Connection(object): _called_dataset_id = _called_key_pbs = _lookup_result = None