Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dumpers: support for different load type #274

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions invenio_records/dumpers/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def _dump_model_field(self, record, model_field_name, dump, dump_key,
dump[dump_key] = self._serialize(val, dump_type)

def _load_model_field(self, record_cls, model_field_name, dump, dump_key,
dump_type):
load_type):
"""Helper method to load model fields from dump.

:param record_cls: The record class being used for loading.
Expand All @@ -169,12 +169,11 @@ def _load_model_field(self, record_cls, model_field_name, dump, dump_key,
return val

# Determine dump data type if not provided
if dump_type is None:
sa_field = getattr(record_cls.model_cls, model_field_name)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@slint this is the "bug" @alejandromumo discovered. This variable is never used, we could potentitally just return it, but then we would lose the capability of custom serializations, which is what we (and in most of the cases want).

dump_type = self._sa_type(record_cls.model_cls, model_field_name)
if load_type is None:
load_type = self._sa_type(record_cls.model_cls, model_field_name)

# Deserialize the value
return self._deserialize(val, dump_type)
return self._deserialize(val, load_type)

@staticmethod
def _iter_modelfields(record_cls):
Expand Down Expand Up @@ -247,15 +246,15 @@ def load(self, dump_data, record_cls):
# Load explicitly defined model fields.
model_data = {}
it = self._model_fields.items()
for model_field_name, (dump_key, dump_type) in it:
for model_field_name, (dump_key, load_type) in it:
model_data[model_field_name] = self._load_model_field(
record_cls, model_field_name, dump_data, dump_key, dump_type)
record_cls, model_field_name, dump_data, dump_key, load_type)

# Load model fields defined as system fields
for systemfield in self._iter_modelfields(record_cls):
model_data[systemfield.model_field_name] = self._load_model_field(
record_cls, systemfield.model_field_name, dump_data,
systemfield.dump_key, systemfield.dump_type)
systemfield.dump_key, systemfield.load_type)

# Initialize model if an id was provided.
if model_data.get('id') is not None:
Expand Down
11 changes: 10 additions & 1 deletion invenio_records/systemfields/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class ModelField(SystemField):
"""Model field for providing get and set access on a model field."""

def __init__(self, model_field_name=None, dump=True, dump_key=None,
dump_type=None):
dump_type=None, load_type=None):
"""Initialize the field.

:param model_field_name: Name of field on the database model.
Expand All @@ -29,6 +29,7 @@ def __init__(self, model_field_name=None, dump=True, dump_key=None,
self.dump = dump
self._dump_key = dump_key
self._dump_type = dump_type
self._load_type = load_type

#
# Helpers
Expand Down Expand Up @@ -58,6 +59,14 @@ def dump_type(self):
"""
return self._dump_type

@property
def load_type(self):
"""The data type used to determine how to deserialize the model field.

Defaults to dump_type.
"""
return self._load_type or self._dump_type

def _set(self, model, value):
"""Internal method to set value on the model's field."""
setattr(model, self.model_field_name, value)
Expand Down
42 changes: 42 additions & 0 deletions tests/test_api_dumpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@
"""Test the dumpers API."""

from datetime import date, datetime
from enum import Enum
from uuid import UUID

import pytest
from invenio_db import db
from sqlalchemy.dialects import mysql
from sqlalchemy_utils.types import ChoiceType

from invenio_records.api import Record
from invenio_records.dumpers import ElasticsearchDumper, ElasticsearchDumperExt
from invenio_records.dumpers.relations import RelationDumperExt
from invenio_records.models import RecordMetadataBase
from invenio_records.systemfields import SystemFieldsMixin
from invenio_records.systemfields.model import ModelField
from invenio_records.systemfields.relations import PKListRelation, \
PKRelation, RelationsField

Expand Down Expand Up @@ -215,3 +220,40 @@ class RecordWithRelations(Record):
# Load it
# new_record = Record.loads(dump, loader=dumper)
# assert 'count' not in new_record


def test_load_dump_type(testapp):
dumper = ElasticsearchDumper()
rec = TestRecord.create({}, test=EnumTestModel.REGISTERED)
# Serialize
dumped_data = dumper.dump(rec, {})
assert isinstance(dumped_data["test"], str)
# Deserialize
loaded_data = dumper.load(dumped_data, TestRecord)
assert isinstance(loaded_data.test, EnumTestModel)
Copy link
Member

@alejandromumo alejandromumo Mar 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is currently failing, as the data is being loaded as a str and not EnumTestModel. That happens because record's ModelField test has the attribute dump_type set to str when instantiated. Therefore, the load_type will be also set to str and does not allow the dumper to infer the load type from the record's model SQL Alchemy column type.

See dumper's load module field:

# Retrieve the value
val = dump.pop(dump_key)
# Return None values immediately.
if val is None:
return val
# Determine dump data type if not provided
if load_type is None:
load_type = self._sa_type(record_cls.model_cls, model_field_name)
# Deserialize the value
return self._deserialize(val, load_type)

Copy link
Member Author

@ppanero ppanero Mar 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to clarify, this is an issue with the master branch, independently of the previous commit or not (dump_type parameter rename to load_type).



# Similar to PIDStatus
class EnumTestModel(Enum):
NEW = "N"
REGISTERED = "R"

def __init__(self, value):
"""Hack."""

def __str__(self):
"""Return its value."""
return self.value


class TestMetadata(db.Model, RecordMetadataBase):
"""Represent a record metadata."""

__tablename__ = 'test_dumper_table'
test = db.Column(ChoiceType(EnumTestModel, impl=db.CHAR(1)))


# Similar to ModelPIDField
class TestRecord(Record, SystemFieldsMixin):
model_cls = TestMetadata
test = ModelField(dump_type=str)