From 7bdd01f32a73e6531134fa3a8125cfca14c00f33 Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Fri, 6 Sep 2019 16:52:42 -0500 Subject: [PATCH] support legacy compressed properties back and forth (#183) * support legacy compressed properties back and forth --- google/cloud/ndb/model.py | 73 ++++++++++++++++++++++++- tests/system/conftest.py | 17 ++++++ tests/system/test_crud.py | 23 ++++++++ tests/unit/test_model.py | 111 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 218 insertions(+), 6 deletions(-) diff --git a/google/cloud/ndb/model.py b/google/cloud/ndb/model.py index 8c504f07..72cfd499 100644 --- a/google/cloud/ndb/model.py +++ b/google/cloud/ndb/model.py @@ -324,7 +324,13 @@ class Person(Model): _MEANING_PREDEFINED_ENTITY_USER = 20 -_MEANING_URI_COMPRESSED = "ZLIB" +_MEANING_COMPRESSED = 22 + +# As produced by zlib. Indicates compressed byte sequence using DEFLATE at +# default compression level, with a 32K window size. +# From https://github.com/madler/zlib/blob/master/doc/rfc1950.txt +_ZLIB_COMPRESSION_MARKER = b"x\x9c" + _MAX_STRING_LENGTH = 1500 Key = key_module.Key BlobKey = _datastore_types.BlobKey @@ -627,6 +633,8 @@ def new_entity(key): else: value = _BaseValue(value) + value = prop._from_datastore(ds_entity, value) + prop._store_value(entity, value) return entity @@ -721,6 +729,13 @@ def _entity_to_ds_entity(entity, set_key=True): ds_entity = ds_entity_module.Entity( exclude_from_indexes=exclude_from_indexes ) + + # Some properties may need to set meanings for backwards compatibility, + # so we look for them. They are set using the _to_datastore calls above. + meanings = data.pop("_meanings", None) + if meanings is not None: + ds_entity._meanings = meanings + ds_entity.update(data) return ds_entity @@ -2034,6 +2049,25 @@ def _to_datastore(self, entity, data, prefix="", repeated=False): return (key,) + def _from_datastore(self, ds_entity, value): + """Helper to convert property value from Datastore serializable data. + + Called to modify the value of a property during deserialization from + storage. Subclasses (like BlobProperty) may need to override the + default behavior, which is simply to return the received value without + modification. + + Args: + ds_entity (~google.cloud.datastore.Entity): The Datastore entity to + convert. + value (_BaseValue): The stored value of this property for the + entity being deserialized. + + Return: + value [Any]: The transformed value. + """ + return value + def _validate_key(value, entity=None): """Validate a key. @@ -2414,11 +2448,48 @@ def _from_base_type(self, value): decompressed. """ if self._compressed and not isinstance(value, _CompressedValue): + if not value.startswith(_ZLIB_COMPRESSION_MARKER): + value = zlib.compress(value) value = _CompressedValue(value) if isinstance(value, _CompressedValue): return zlib.decompress(value.z_val) + def _to_datastore(self, entity, data, prefix="", repeated=False): + """Override of :method:`Property._to_datastore`. + + If this is a compressed property, we need to set the backwards- + compatible `_meanings` field, so that it can be properly read later. + """ + keys = super(BlobProperty, self)._to_datastore( + entity, data, prefix=prefix, repeated=repeated + ) + if self._compressed: + value = data[self._name] + if isinstance(value, _CompressedValue): + value = value.z_val + data[self._name] = value + if not value.startswith(_ZLIB_COMPRESSION_MARKER): + value = zlib.compress(value) + data[self._name] = value + data.setdefault("_meanings", {})[self._name] = ( + _MEANING_COMPRESSED, + value, + ) + return keys + + def _from_datastore(self, ds_entity, value): + """Override of :method:`Property._from_datastore`. + + Need to check the ds_entity for a compressed meaning that would + indicate we are getting a compressed value. + """ + if self._name in ds_entity._meanings: + meaning = ds_entity._meanings[self._name][0] + if meaning == _MEANING_COMPRESSED and not self._compressed: + value.b_val = zlib.decompress(value.b_val) + return value + def _db_set_compressed_meaning(self, p): """Helper for :meth:`_db_set_value`. diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 8586e891..d98450bf 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -91,6 +91,23 @@ def make_entity(*key_args, **entity_kwargs): yield make_entity +@pytest.fixture +def ds_entity_with_meanings(with_ds_client, dispose_of): + def make_entity(*key_args, **entity_kwargs): + meanings = key_args[0] + key = with_ds_client.key(*key_args[1:]) + assert with_ds_client.get(key) is None + entity = datastore.Entity(key=key, exclude_from_indexes=("blob",)) + entity._meanings = meanings + entity.update(entity_kwargs) + with_ds_client.put(entity) + dispose_of(key) + + return entity + + yield make_entity + + @pytest.fixture def dispose_of(with_ds_client, to_delete): def delete_entity(ds_key): diff --git a/tests/system/test_crud.py b/tests/system/test_crud.py index 5774316b..b871303d 100644 --- a/tests/system/test_crud.py +++ b/tests/system/test_crud.py @@ -20,6 +20,7 @@ import operator import os import threading +import zlib from unittest import mock @@ -315,6 +316,28 @@ class SomeKind(ndb.Model): dispose_of(key._key) +@pytest.mark.usefixtures("client_context") +def test_retrieve_entity_with_legacy_compressed_property( + ds_entity_with_meanings +): + class SomeKind(ndb.Model): + blob = ndb.BlobProperty() + + value = b"abc" * 1000 + compressed_value = zlib.compress(value) + entity_id = test_utils.system.unique_resource_id() + ds_entity_with_meanings( + {"blob": (22, compressed_value)}, + KIND, + entity_id, + **{"blob": compressed_value} + ) + + key = ndb.Key(KIND, entity_id) + retrieved = key.get() + assert retrieved.blob == value + + @pytest.mark.usefixtures("client_context") def test_large_pickle_property(dispose_of, ds_client): class SomeKind(ndb.Model): diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index c84798fe..d9df986e 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -1722,12 +1722,19 @@ def test__from_base_type(): assert converted == original @staticmethod - def test__from_base_type_no_compressed_value(): - prop = model.BlobProperty(name="blob") + def test__from_base_type_no_compressed_value_uncompressed(): + prop = model.BlobProperty(name="blob", compressed=True) original = b"abc" * 10 - value = zlib.compress(original) - prop._compressed = True - converted = prop._from_base_type(value) + converted = prop._from_base_type(original) + + assert converted == original + + @staticmethod + def test__from_base_type_no_compressed_value_compressed(): + prop = model.BlobProperty(name="blob", compressed=True) + original = b"abc" * 10 + z_val = zlib.compress(original) + converted = prop._from_base_type(z_val) assert converted == original @@ -1761,6 +1768,100 @@ def test__db_get_value(): with pytest.raises(NotImplementedError): prop._db_get_value(None, None) + @staticmethod + @pytest.mark.usefixtures("in_context") + def test__to_datastore_compressed(): + class ThisKind(model.Model): + foo = model.BlobProperty(compressed=True) + + uncompressed_value = b"abc" * 1000 + compressed_value = zlib.compress(uncompressed_value) + entity = ThisKind(foo=uncompressed_value) + ds_entity = model._entity_to_ds_entity(entity) + assert "foo" in ds_entity._meanings + assert ds_entity._meanings["foo"][0] == model._MEANING_COMPRESSED + assert ds_entity._meanings["foo"][1] == compressed_value + + @staticmethod + @pytest.mark.usefixtures("in_context") + def test__to_datastore_uncompressed(): + class ThisKind(model.Model): + foo = model.BlobProperty(compressed=False) + + uncompressed_value = b"abc" + entity = ThisKind(foo=uncompressed_value) + ds_entity = model._entity_to_ds_entity(entity) + assert "foo" not in ds_entity._meanings + + @staticmethod + @pytest.mark.usefixtures("in_context") + def test__from_datastore_compressed_to_uncompressed(): + class ThisKind(model.Model): + foo = model.BlobProperty(compressed=False) + + key = datastore.Key("ThisKind", 123, project="testing") + datastore_entity = datastore.Entity(key=key) + uncompressed_value = b"abc" * 1000 + compressed_value = zlib.compress(uncompressed_value) + datastore_entity.update({"foo": compressed_value}) + meanings = {"foo": (model._MEANING_COMPRESSED, compressed_value)} + datastore_entity._meanings = meanings + protobuf = helpers.entity_to_protobuf(datastore_entity) + entity = model._entity_from_protobuf(protobuf) + assert entity.foo == uncompressed_value + ds_entity = model._entity_to_ds_entity(entity) + assert ds_entity["foo"] == uncompressed_value + + @staticmethod + @pytest.mark.usefixtures("in_context") + def test__from_datastore_compressed_to_compressed(): + class ThisKind(model.Model): + foo = model.BlobProperty(compressed=True) + + key = datastore.Key("ThisKind", 123, project="testing") + datastore_entity = datastore.Entity(key=key) + uncompressed_value = b"abc" * 1000 + compressed_value = zlib.compress(uncompressed_value) + datastore_entity.update({"foo": compressed_value}) + meanings = {"foo": (model._MEANING_COMPRESSED, compressed_value)} + datastore_entity._meanings = meanings + protobuf = helpers.entity_to_protobuf(datastore_entity) + entity = model._entity_from_protobuf(protobuf) + ds_entity = model._entity_to_ds_entity(entity) + assert ds_entity["foo"] == compressed_value + + @staticmethod + @pytest.mark.usefixtures("in_context") + def test__from_datastore_uncompressed_to_uncompressed(): + class ThisKind(model.Model): + foo = model.BlobProperty(compressed=False) + + key = datastore.Key("ThisKind", 123, project="testing") + datastore_entity = datastore.Entity(key=key) + uncompressed_value = b"abc" * 1000 + datastore_entity.update({"foo": uncompressed_value}) + protobuf = helpers.entity_to_protobuf(datastore_entity) + entity = model._entity_from_protobuf(protobuf) + assert entity.foo == uncompressed_value + ds_entity = model._entity_to_ds_entity(entity) + assert ds_entity["foo"] == uncompressed_value + + @staticmethod + @pytest.mark.usefixtures("in_context") + def test__from_datastore_uncompressed_to_compressed(): + class ThisKind(model.Model): + foo = model.BlobProperty(compressed=True) + + key = datastore.Key("ThisKind", 123, project="testing") + datastore_entity = datastore.Entity(key=key) + uncompressed_value = b"abc" * 1000 + compressed_value = zlib.compress(uncompressed_value) + datastore_entity.update({"foo": uncompressed_value}) + protobuf = helpers.entity_to_protobuf(datastore_entity) + entity = model._entity_from_protobuf(protobuf) + ds_entity = model._entity_to_ds_entity(entity) + assert ds_entity["foo"] == compressed_value + class TestTextProperty: @staticmethod