From 27554bd347df9f7886c20ec2ad09c2a817e08424 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Mon, 20 Jun 2022 15:53:43 +0530
Subject: [PATCH 1/9] dynamic types support array, dict, ndarray

---
 mlem/contrib/lightgbm.py     |   8 +-
 mlem/contrib/numpy.py        |  29 +++-
 mlem/core/data_type.py       | 201 +++++++++++++++++++++---
 tests/contrib/test_numpy.py  | 216 ++++++++++++++++++++------
 tests/core/test_data_type.py | 288 ++++++++++++++++++++++++++++++-----
 5 files changed, 626 insertions(+), 116 deletions(-)

diff --git a/mlem/contrib/lightgbm.py b/mlem/contrib/lightgbm.py
index b45fad44..17e9b263 100644
--- a/mlem/contrib/lightgbm.py
+++ b/mlem/contrib/lightgbm.py
@@ -69,8 +69,12 @@ def get_writer(
         return LightGBMDataWriter(**kwargs)
 
     @classmethod
-    def process(cls, obj: Any, **kwargs) -> DataType:
-        return LightGBMDataType(inner=DataAnalyzer.analyze(obj.data))
+    def process(cls, obj: Any, is_dynamic: bool = False, **kwargs) -> DataType:
+        return LightGBMDataType(
+            inner=DataAnalyzer.analyze(
+                obj.data, is_dynamic=is_dynamic, **kwargs
+            )
+        )
 
     def get_model(self, prefix: str = "") -> Type[BaseModel]:
         return self.inner.get_serializer().get_model(prefix)
diff --git a/mlem/contrib/numpy.py b/mlem/contrib/numpy.py
index e532d34d..232220d8 100644
--- a/mlem/contrib/numpy.py
+++ b/mlem/contrib/numpy.py
@@ -94,7 +94,7 @@ class NumpyNdarrayType(
     type: ClassVar[str] = "ndarray"
     libraries: ClassVar[List[ModuleType]] = [np]
 
-    shape: Tuple[Optional[int], ...]
+    shape: Optional[Tuple[Optional[int], ...]]
     dtype: str
 
     @staticmethod
@@ -134,7 +134,16 @@ def _subtype(self, subshape: Tuple[Optional[int], ...]):
     def get_model(self, prefix: str = "") -> Type[BaseModel]:
         # TODO: https://github.com/iterative/mlem/issues/33
         return create_model(
-            prefix + "NumpyNdarray", __root__=(List[self._subtype(self.shape[1:])], ...)  # type: ignore
+            prefix + "NumpyNdarray",
+            __root__=(
+                self._subtype(self.shape)
+                if self.shape
+                else List[
+                    Union[python_type_from_np_string_repr(self.dtype), List]
+                ],  # type: ignore
+                ...,
+            )
+            # type: ignore
         )
 
     def serialize(self, instance: np.ndarray):
@@ -148,10 +157,20 @@ def serialize(self, instance: np.ndarray):
         return instance.tolist()
 
     def _check_shape(self, array, exc_type):
-        if tuple(array.shape)[1:] != self.shape[1:]:
-            raise exc_type(
-                f"given array is of shape: {(None,) + tuple(array.shape)[1:]}, expected: {self.shape}"
+        if self.shape:
+            if len(array.shape) != len(self.shape):
+                raise exc_type(
+                    f"given array is of rank: {len(array.shape)}, expected: {len(self.shape)}"
+                )
+
+            array_shape = tuple(
+                None if expected_dim is None else array_dim
+                for array_dim, expected_dim in zip(array.shape, self.shape)
             )
+            if tuple(array_shape) != self.shape:
+                raise exc_type(
+                    f"given array is of shape: {array_shape}, expected: {self.shape}"
+                )
 
     def get_writer(self, project: str = None, filename: str = None, **kwargs):
         return NumpyArrayWriter()
diff --git a/mlem/core/data_type.py b/mlem/core/data_type.py
index 4356fbf9..00a5094e 100644
--- a/mlem/core/data_type.py
+++ b/mlem/core/data_type.py
@@ -2,6 +2,7 @@
 Base classes for working with data in MLEM
 """
 import builtins
+import json
 import posixpath
 from abc import ABC, abstractmethod
 from typing import (
@@ -70,8 +71,10 @@ def bind(self, data: Any):
         return self
 
     @classmethod
-    def create(cls, obj: Any, **kwargs):
-        return DataAnalyzer.analyze(obj, **kwargs).bind(obj)
+    def create(cls, obj: Any, is_dynamic: bool = False, **kwargs):
+        return DataAnalyzer.analyze(obj, is_dynamic=is_dynamic, **kwargs).bind(
+            obj
+        )
 
 
 class DataSerializer(ABC):
@@ -367,7 +370,7 @@ def get_model(self, prefix: str = "") -> Type[BaseModel]:
 
 def _check_type_and_size(obj, dtype, size, exc_type):
     DataType.check_type(obj, dtype, exc_type)
-    if size != -1 and len(obj) != size:
+    if size is not None and len(obj) != size:
         raise exc_type(
             f"given {dtype.__name__} has len: {len(obj)}, expected: {size}"
         )
@@ -449,45 +452,83 @@ def is_object_valid(cls, obj: Any) -> bool:
         return isinstance(obj, (list, tuple))
 
     @classmethod
-    def process(cls, obj, **kwargs) -> DataType:
+    def process(cls, obj, is_dynamic: bool = False, **kwargs) -> DataType:
         if isinstance(obj, tuple):
-            return TupleType(items=[DataAnalyzer.analyze(o) for o in obj])
+            return TupleType(
+                items=[
+                    DataAnalyzer.analyze(o, is_dynamic=is_dynamic, **kwargs)
+                    for o in obj
+                ]
+            )
 
         py_types = {type(o) for o in obj}
         if len(obj) <= 1 or len(py_types) > 1:
-            return ListType(items=[DataAnalyzer.analyze(o) for o in obj])
+            return ListType(
+                items=[
+                    DataAnalyzer.analyze(o, is_dynamic=is_dynamic, **kwargs)
+                    for o in obj
+                ]
+            )
+
+        size = None if is_dynamic else len(obj)
 
         if not py_types.intersection(
             PrimitiveType.PRIMITIVES
         ):  # py_types is guaranteed to be singleton set here
-            items_types = [DataAnalyzer.analyze(o) for o in obj]
+            items_types = [
+                DataAnalyzer.analyze(o, is_dynamic=is_dynamic, **kwargs)
+                for o in obj
+            ]
             first, *others = items_types
             for other in others:
                 if first != other:
                     return ListType(items=items_types)
-            return ArrayType(dtype=first, size=len(obj))
+            return ArrayType(dtype=first, size=size)
 
         # optimization for large lists of same primitive type elements
-        return ArrayType(dtype=DataAnalyzer.analyze(obj[0]), size=len(obj))
-
+        return ArrayType(
+            dtype=DataAnalyzer.analyze(
+                obj[0], is_dynamic=is_dynamic, **kwargs
+            ),
+            size=size,
+        )
 
-class DictType(DataType, DataSerializer, DataHook):
-    """
-    DataType for dict
-    """
-
-    type: ClassVar[str] = "dict"
-    item_types: Dict[str, DataType]
 
+class DictTypeHook(DataHook):
     @classmethod
     def is_object_valid(cls, obj: Any) -> bool:
         return isinstance(obj, dict)
 
     @classmethod
-    def process(cls, obj: Any, **kwargs) -> "DictType":
-        return DictType(
-            item_types={k: DataAnalyzer.analyze(v) for (k, v) in obj.items()}
-        )
+    def process(
+        cls, obj: Any, is_dynamic: bool = False, **kwargs
+    ) -> Union["DictType", "DynamicDictType"]:
+
+        if not is_dynamic:
+            return DictType(
+                item_types={
+                    k: DataAnalyzer.analyze(v, is_dynamic=is_dynamic, **kwargs)
+                    for (k, v) in obj.items()
+                }
+            )
+        else:
+            return DynamicDictType(
+                key_type=DataAnalyzer.analyze(
+                    next(iter(obj.keys())), is_dynamic=is_dynamic, **kwargs
+                ),
+                value_type=DataAnalyzer.analyze(
+                    next(iter(obj.values())), is_dynamic=is_dynamic, **kwargs
+                ),
+            )
+
+
+class DictType(DataType, DataSerializer):
+    """
+    DataType for dict with fixed set of keys
+    """
+
+    type: ClassVar[str] = "dict"
+    item_types: Dict[str, DataType]
 
     def deserialize(self, obj):
         self._check_type_and_keys(obj, DeserializationError)
@@ -577,6 +618,124 @@ def read_batch(
         raise NotImplementedError
 
 
+class DynamicDictType(DataType, DataSerializer):
+    """
+    Dynamic DataType for dict without fixed set of keys
+    """
+
+    type: ClassVar[str] = "d_dict"
+
+    key_type: DataType
+    value_type: DataType
+
+    def deserialize(self, obj):
+        self._check_type_and_keys(obj, DeserializationError)
+        return {
+            self.key_type.get_serializer()
+            .deserialize(
+                k,
+            ): self.value_type.get_serializer()
+            .deserialize(
+                v,
+            )
+            for k, v in obj.items()
+        }
+
+    def serialize(self, instance: dict):
+        self._check_type_and_keys(instance, SerializationError)
+        if self.key_type == PrimitiveType and self.value_type == PrimitiveType:
+            return instance
+        else:
+            return {
+                self.key_type.get_serializer()
+                .serialize(
+                    k,
+                ): self.value_type.get_serializer()
+                .serialize(
+                    v,
+                )
+                for k, v in instance.items()
+            }
+
+    def _check_type_and_keys(self, obj, exc_type):
+        self.check_type(obj, dict, exc_type)
+        obj_type = DictTypeHook.process(obj, is_dynamic=True)
+        obj_types = (obj_type.key_type, obj_type.value_type)
+        expected_types = (self.key_type, self.value_type)
+        if obj_types != expected_types:
+            raise exc_type(
+                f"given dict has type: {obj_types}, expected: {expected_types}"
+            )
+
+        # TODO - should we check for type of all items of dict?
+
+    def get_requirements(self) -> Requirements:
+        return sum(
+            [
+                self.key_type.get_requirements(),
+                self.value_type.get_requirements(),
+            ],
+            Requirements.new(),
+        )
+
+    def get_writer(
+        self, project: str = None, filename: str = None, **kwargs
+    ) -> "DynamicDictWriter":
+        return DynamicDictWriter(**kwargs)
+
+    def get_model(self, prefix="") -> Type[BaseModel]:
+        field_type = (
+            Dict[  # type: ignore
+                self.key_type.get_serializer().get_model(
+                    prefix + "_key_"  # noqa: F821
+                ),
+                self.value_type.get_serializer().get_model(
+                    prefix + "_val_"  # noqa: F821
+                ),
+            ],
+            ...,
+        )
+        return create_model(prefix + "DynamicDictType", __root__=field_type)  # type: ignore
+
+
+class DynamicDictWriter(DataWriter):
+    type: ClassVar[str] = "d_dict"
+
+    def write(
+        self, data: DataType, storage: Storage, path: str
+    ) -> Tuple[DataReader, Artifacts]:
+        if not isinstance(data, DynamicDictType):
+            raise ValueError(
+                f"expected data to be of DynamicDictTypeWriter, got {type(data)} instead"
+            )
+        with storage.open(path) as (f, art):
+            f.write(
+                json.dumps(data.get_serializer().serialize(data.data)).encode(
+                    "utf-8"
+                )
+            )
+        return DynamicDictReader(data_type=data), {DataWriter.art_name: art}
+
+
+class DynamicDictReader(DataReader):
+    type: ClassVar[str] = "d_dict"
+    data_type: DynamicDictType
+
+    def read(self, artifacts: Artifacts) -> DataType:
+        if DataWriter.art_name not in artifacts:
+            raise ValueError(
+                f"Wrong artifacts {artifacts}: should be one {DataWriter.art_name} file"
+            )
+        with artifacts[DataWriter.art_name].open() as f:
+            data = json.load(f)
+        return self.data_type.copy().bind(data)
+
+    def read_batch(
+        self, artifacts: Artifacts, batch_size: int
+    ) -> Iterator[DataType]:
+        raise NotImplementedError
+
+
 #
 #
 # class BytesDataType(DataType):
diff --git a/tests/contrib/test_numpy.py b/tests/contrib/test_numpy.py
index 5b9252a1..127b5135 100644
--- a/tests/contrib/test_numpy.py
+++ b/tests/contrib/test_numpy.py
@@ -1,8 +1,9 @@
-from json import loads
+import re
 
 import numpy as np
 import pytest
 from pydantic import parse_obj_as
+from pytest_lazyfixture import lazy_fixture
 
 from mlem.contrib.numpy import (
     NumpyNdarrayType,
@@ -36,15 +37,120 @@ def custom_assert(x, y):
     )
 
 
-def test_ndarray_source():
-    data = np.array([1, 2, 3])
-    data_type = DataType.create(data)
-    data_write_read_check(data_type, custom_eq=np.array_equal)
+@pytest.fixture
+def nat():
+    data = np.array([[1, 2], [3, 4]])
+    dtype = DataType.create(data)
+    payload = {"shape": (None, 2), "dtype": "int64", "type": "ndarray"}
+    schema = {
+        "title": "NumpyNdarray",
+        "type": "array",
+        "items": {
+            "type": "array",
+            "items": {"type": "integer"},
+            "minItems": 2,
+            "maxItems": 2,
+        },
+    }
+    test_data1 = data
+    test_data2 = np.array([[10, 20], [30, 40]])
+    test_data3 = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
+    return False, dtype, payload, schema, test_data1, test_data2, test_data3
 
 
 @pytest.fixture
-def nat():
-    return DataType.create(np.array([[1, 2], [3, 4]]))
+def nat_dynamic():
+    dtype = NumpyNdarrayType(shape=[2, None, None], dtype="int")
+    payload = {"dtype": "int", "shape": (2, None, None), "type": "ndarray"}
+    schema = {
+        "items": {
+            "items": {"items": {"type": "integer"}, "type": "array"},
+            "type": "array",
+        },
+        "maxItems": 2,
+        "minItems": 2,
+        "title": "NumpyNdarray",
+        "type": "array",
+    }
+    test_data1 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
+    test_data2 = np.array(
+        [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]
+    )
+    test_data3 = np.array([[[1, 2, 2], [3, 4, 4]], [[5, 6, 6], [7, 8, 8]]])
+    return True, dtype, payload, schema, test_data1, test_data2, test_data3
+
+
+@pytest.fixture
+def nat_dynamic_float():
+    dtype = NumpyNdarrayType(shape=[2, None, None, 1], dtype="float")
+    payload = {
+        "dtype": "float",
+        "shape": (2, None, None, 1),
+        "type": "ndarray",
+    }
+    schema = {
+        "items": {
+            "items": {
+                "items": {
+                    "items": {"type": "number"},
+                    "maxItems": 1,
+                    "minItems": 1,
+                    "type": "array",
+                },
+                "type": "array",
+            },
+            "type": "array",
+        },
+        "maxItems": 2,
+        "minItems": 2,
+        "title": "NumpyNdarray",
+        "type": "array",
+    }
+    test_data1 = np.array([[[[1.0]], [[3.0]]], [[[5.1]], [[7.1]]]])
+    test_data2 = np.array([[[[1.1], [3.0], [5.0]]], [[[7.1], [9.99], [11.2]]]])
+    test_data3 = np.array(
+        [[[[1.1], [3.2]], [[5.33], [7.1]]], [[[1.11], [3.4]], [[5.3], [7.2]]]]
+    )
+    return True, dtype, payload, schema, test_data1, test_data2, test_data3
+
+
+@pytest.fixture
+def nat_dynamic_all_none_dims():
+    dtype = NumpyNdarrayType(shape=[None, None, None], dtype="int")
+    payload = {"dtype": "int", "shape": (None, None, None), "type": "ndarray"}
+    schema = {
+        "items": {
+            "items": {"items": {"type": "integer"}, "type": "array"},
+            "type": "array",
+        },
+        "title": "NumpyNdarray",
+        "type": "array",
+    }
+    test_data1 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
+    test_data2 = np.array(
+        [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]
+    )
+    test_data3 = np.array([[[1, 2, 2], [3, 4, 4]], [[5, 6, 6], [7, 8, 8]]])
+    return True, dtype, payload, schema, test_data1, test_data2, test_data3
+
+
+@pytest.fixture
+def nat_dynamic_shape_none():
+    dtype = NumpyNdarrayType(shape=None, dtype="int")
+    payload = {"dtype": "int", "type": "ndarray"}
+    schema = {
+        "items": {
+            "anyOf": [{"type": "integer"}, {"items": {}, "type": "array"}]
+        },
+        "title": "NumpyNdarray",
+        "type": "array",
+    }
+    test_data1 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
+    test_data2 = np.array(
+        [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]
+    )
+    test_data3 = np.array([[[1, 2, 2], [3, 4, 4]], [[5, 6, 6], [7, 8, 8]]])
+    return True, dtype, payload, schema, test_data1, test_data2, test_data3
 
 
 def test_python_type_from_np_string_repr():
@@ -81,41 +187,73 @@ def test_number():
     assert ndt.get_serializer().deserialize(n_payload) == value
 
 
-def test_ndarray(nat):
-    value = nat.data
+@pytest.mark.parametrize("test_data_idx", [4, 5, 6])
+@pytest.mark.parametrize(
+    "data",
+    [
+        (lazy_fixture("nat")),
+        (lazy_fixture("nat_dynamic")),
+        (lazy_fixture("nat_dynamic_all_none_dims")),
+        (lazy_fixture("nat_dynamic_shape_none")),
+        (lazy_fixture("nat_dynamic_float")),
+    ],
+)
+def test_ndarray(data, test_data_idx):
+    nat, payload, schema, value = (
+        data[1],
+        data[2],
+        data[3],
+        data[test_data_idx],
+    )
     assert isinstance(nat, NumpyNdarrayType)
-    assert nat.shape == (None, 2)
-    assert python_type_from_np_string_repr(nat.dtype) == int
     assert nat.get_requirements().modules == ["numpy"]
-    payload = nat.json()
-    nat2 = parse_obj_as(DataType, loads(payload))
+    assert nat.dict() == payload
+    nat2 = parse_obj_as(DataType, payload)
     assert nat == nat2
     assert nat.get_model().__name__ == nat2.get_model().__name__
-    assert nat.get_model().schema() == {
-        "title": "NumpyNdarray",
-        "type": "array",
-        "items": {
-            "type": "array",
-            "items": {"type": "integer"},
-            "minItems": 2,
-            "maxItems": 2,
-        },
-    }
+    assert nat.get_model().schema() == schema
     n_payload = nat.get_serializer().serialize(value)
     assert (nat.get_serializer().deserialize(n_payload) == value).all()
+    model = parse_obj_as(nat.get_model(), n_payload)
+    assert model.__root__ == n_payload
+
+    nat = nat.bind(value)
+    data_write_read_check(nat, custom_eq=np.array_equal)
 
 
 @pytest.mark.parametrize(
-    "obj",
+    "nddtype,obj,err_msg",
     [
-        {},  # wrong type
-        np.array([[1, 2], [3, 4]], dtype=np.float32),  # wrong data type
-        np.array([1, 2]),  # wrong shape
+        [
+            lazy_fixture("nat"),
+            {},
+            "given data is of type: <class 'dict'>, expected: <class 'numpy.ndarray'>",
+        ],
+        [
+            lazy_fixture("nat"),
+            np.array([[1, 2], [3, 4]], dtype=np.float32),
+            "given array is of type: float32, " "expected: int64",
+        ],
+        [
+            lazy_fixture("nat"),
+            np.array([1, 2]),
+            "given array is of rank: 1, expected: 2",
+        ],
+        [
+            lazy_fixture("nat_dynamic"),
+            np.array([1, 2]),
+            "given array is of rank: 1, " "expected: 3",
+        ],
+        [
+            lazy_fixture("nat_dynamic_float"),
+            np.array([[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]]),
+            "given array is of shape: (1, None, None, 2), expected: (2, None, None, 1)",
+        ],
     ],
 )
-def test_ndarray_serialize_failure(nat, obj):
-    with pytest.raises(SerializationError):
-        nat.serialize(obj)
+def test_ndarray_serialize_failure(nddtype, obj, err_msg):
+    with pytest.raises(SerializationError, match=re.escape(err_msg)):
+        nddtype[1].serialize(obj)
 
 
 @pytest.mark.parametrize(
@@ -124,26 +262,10 @@ def test_ndarray_serialize_failure(nat, obj):
 )
 def test_ndarray_deserialize_failure(nat, obj):
     with pytest.raises(DeserializationError):
-        nat.deserialize(obj)
+        nat[1].deserialize(obj)
 
 
 def test_requirements():
     assert get_object_requirements(
         NumpyNdarrayType(shape=(0,), dtype="int")
     ).modules == ["numpy"]
-
-
-# Copyright 2019 Zyfra
-# Copyright 2021 Iterative
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/tests/core/test_data_type.py b/tests/core/test_data_type.py
index f79ddb84..6e34383b 100644
--- a/tests/core/test_data_type.py
+++ b/tests/core/test_data_type.py
@@ -1,5 +1,8 @@
+import copy
+
 import pytest
 from pydantic import parse_obj_as
+from pytest_lazyfixture import lazy_fixture
 
 from mlem.core.data_type import (
     ArrayReader,
@@ -9,6 +12,8 @@
     DataType,
     DictReader,
     DictType,
+    DynamicDictReader,
+    DynamicDictType,
     ListType,
     PrimitiveReader,
     PrimitiveType,
@@ -27,6 +32,32 @@ def test_primitives_not_ok():
     assert not PrimitiveType.is_object_valid(NotPrimitive())
 
 
+@pytest.fixture
+def array():
+    is_dynamic = False
+    array = [1, 2, 3, 4, 5]
+    payload = {
+        "dtype": {"ptype": "int", "type": "primitive"},
+        "size": 5,
+        "type": "array",
+    }
+    schema = {
+        "items": {"type": "integer"},
+        "title": "Array",
+        "type": "array",
+    }
+
+    return is_dynamic, array, payload, schema
+
+
+@pytest.fixture
+def array_dynamic(array):
+    is_dynamic = True
+    payload = copy.deepcopy(array[2])
+    del payload["size"]
+    return is_dynamic, array[1], payload, array[3]
+
+
 @pytest.mark.parametrize("ptype", PrimitiveType.PRIMITIVES)
 def test_primitive_source(ptype):
     if ptype is type(None):  # noqa: E721
@@ -63,31 +94,39 @@ def test_primitives(ptype):
     assert dt.get_model() is ptype
 
 
-def test_array():
-    l_value = [1, 2, 3, 4, 5]
-    dt = DataAnalyzer.analyze(l_value)
+@pytest.mark.parametrize(
+    "array_data,value",
+    [
+        (lazy_fixture("array"), None),
+        (lazy_fixture("array_dynamic"), None),
+        (lazy_fixture("array_dynamic"), [1, 2, 3]),
+    ],
+)
+def test_array(array_data, value):
+    dt = DataAnalyzer.analyze(array_data[1], is_dynamic=array_data[0])
+    l_value = array_data[1] if value is None else value
     assert isinstance(dt, ArrayType)
-    payload = {
-        "dtype": {"ptype": "int", "type": "primitive"},
-        "size": 5,
-        "type": "array",
-    }
-    assert dt.dict() == payload
-    dt2 = parse_obj_as(ArrayType, payload)
+    assert dt.dict() == array_data[2]
+    dt2 = parse_obj_as(ArrayType, array_data[2])
     assert dt2 == dt
     assert l_value == dt.serialize(l_value)
     assert l_value == dt.deserialize(l_value)
     assert dt.get_model().__name__ == "Array"
-    assert dt.get_model().schema() == {
-        "items": {"type": "integer"},
-        "title": "Array",
-        "type": "array",
-    }
+    assert dt.get_model().schema() == array_data[3]
 
 
-def test_list_source():
-    l_value = [1, 2, 3, 4, 5]
-    dt = DataType.create(l_value)
+@pytest.mark.parametrize(
+    "is_dynamic,array_data,value",
+    [
+        (False, lazy_fixture("array"), None),
+        (True, lazy_fixture("array_dynamic"), None),
+        (True, lazy_fixture("array_dynamic"), [1, 2, 3]),
+    ],
+)
+def test_list_source(is_dynamic, array_data, value):
+    dt = DataType.create(array_data[0])
+    l_value = array_data[0] if value is None else value
+    dt.bind(l_value)
 
     artifacts = data_write_read_check(
         dt,
@@ -95,11 +134,8 @@ def test_list_source():
     )
 
     assert list(artifacts.keys()) == [f"{x}/data" for x in range(len(l_value))]
-    assert artifacts["0/data"].uri.endswith("data/0")
-    assert artifacts["1/data"].uri.endswith("data/1")
-    assert artifacts["2/data"].uri.endswith("data/2")
-    assert artifacts["3/data"].uri.endswith("data/3")
-    assert artifacts["4/data"].uri.endswith("data/4")
+    for x in range(len(l_value)):
+        assert artifacts[f"{x}/data"].uri.endswith(f"data/{x}")
 
 
 def test_tuple():
@@ -198,10 +234,10 @@ def test_mixed_list_source():
     assert artifacts["5/data"].uri.endswith("data/5")
 
 
-def test_dict():
+@pytest.fixture
+def dict_data():
+    is_dynamic = False
     d = {"1": 1, "2": "a"}
-    dt = DataAnalyzer.analyze(d)
-    assert isinstance(dt, DictType)
     payload = {
         "item_types": {
             "1": {"ptype": "int", "type": "primitive"},
@@ -209,13 +245,8 @@ def test_dict():
         },
         "type": "dict",
     }
-    assert dt.dict() == payload
-    dt2 = parse_obj_as(DictType, payload)
-    assert dt2 == dt
-    assert d == dt.serialize(d)
-    assert d == dt.deserialize(d)
-    assert dt.get_model().__name__ == "DictType"
-    assert dt.get_model().schema() == {
+
+    schema = {
         "title": "DictType",
         "type": "object",
         "properties": {
@@ -225,10 +256,182 @@ def test_dict():
         "required": ["1", "2"],
     }
 
+    test_data1 = {"1": 1, "2": "a"}
+    test_data2 = {"1": 2, "2": "b"}
+    test_data3 = {"1": 3, "2": "c"}
+
+    return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
+
+
+@pytest.fixture
+def dynamic_dict_data():
+    is_dynamic = True
+    d = {"a": 1, "b": 2}
+    payload = {
+        "key_type": {"ptype": "str", "type": "primitive"},
+        "value_type": {"ptype": "int", "type": "primitive"},
+        "type": "d_dict",
+    }
+    schema = {
+        "title": "DynamicDictType",
+        "type": "object",
+        "additionalProperties": {"type": "integer"},
+    }
+
+    test_data1 = {"a": 1, "b": 2}
+    test_data2 = {"a": 1}
+    test_data3 = {"a": 1, "b": 2, "c": 3, "d": 1}
+
+    return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
+
+
+@pytest.fixture
+def dynamic_dict_str_val_type_data():
+    is_dynamic = True
+    d = {"a": "1", "b": "2"}
+    payload = {
+        "key_type": {"ptype": "str", "type": "primitive"},
+        "value_type": {"ptype": "str", "type": "primitive"},
+        "type": "d_dict",
+    }
+    schema = {
+        "title": "DynamicDictType",
+        "type": "object",
+        "additionalProperties": {"type": "string"},
+    }
+
+    test_data1 = {"a": "1", "b": "2"}
+    test_data2 = {"a": "1"}
+    test_data3 = {"a": "1", "b": "2", "c": "3", "d": "1"}
 
-def test_dict_source():
-    d_value = {"1": 1.5, "2": "a", "3": {"1": False}}
-    data_type = DataType.create(d_value)
+    return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
+
+
+@pytest.fixture
+def dynamic_dict_array_type():
+    is_dynamic = True
+    d = {"a": [1, 2, 3], "b": [3, 4, 5]}
+    payload = {
+        "key_type": {"ptype": "str", "type": "primitive"},
+        "type": "d_dict",
+        "value_type": {
+            "dtype": {"ptype": "int", "type": "primitive"},
+            "type": "array",
+        },
+    }
+    schema = {
+        "additionalProperties": {"$ref": "#/definitions/_val_Array"},
+        "definitions": {
+            "_val_Array": {
+                "items": {"type": "integer"},
+                "title": "_val_Array",
+                "type": "array",
+            }
+        },
+        "title": "DynamicDictType",
+        "type": "object",
+    }
+
+    test_data1 = {"a": [1, 2, 3], "b": [3, 4, 5]}
+    test_data2 = {"a": [1, 2, 3]}
+    test_data3 = {"a": [1, 2, 3], "b": [3, 4, 5], "d": [6, 7, 8]}
+    return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
+
+
+@pytest.fixture
+def dynamic_dict_dict_type():
+    is_dynamic = True
+    d = {"a": {"l": [1, 2]}, "b": {"l": [3, 4]}}
+    payload = {
+        "key_type": {"ptype": "str", "type": "primitive"},
+        "type": "d_dict",
+        "value_type": {
+            "key_type": {"ptype": "str", "type": "primitive"},
+            "type": "d_dict",
+            "value_type": {
+                "dtype": {"ptype": "int", "type": "primitive"},
+                "type": "array",
+            },
+        },
+    }
+    schema = {
+        "additionalProperties": {"$ref": "#/definitions/_val_DynamicDictType"},
+        "definitions": {
+            "_val_DynamicDictType": {
+                "additionalProperties": {
+                    "$ref": "#/definitions/_val__val_Array"
+                },
+                "title": "_val_DynamicDictType",
+                "type": "object",
+            },
+            "_val__val_Array": {
+                "items": {"type": "integer"},
+                "title": "_val__val_Array",
+                "type": "array",
+            },
+        },
+        "title": "DynamicDictType",
+        "type": "object",
+    }
+    test_data1 = {"a": {"l": [1, 2]}, "b": {"l": [3, 4]}}
+    test_data2 = {"a": {"l": [1, 2]}}
+    test_data3 = {"a": {"l": [1, 2]}, "b": {"l": [3, 4]}, "c": {"k": [3, 4]}}
+    return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
+
+
+@pytest.mark.parametrize("test_data_idx", [4, 5, 6])
+@pytest.mark.parametrize(
+    "data",
+    [
+        lazy_fixture("dict_data"),
+        lazy_fixture("dynamic_dict_data"),
+        lazy_fixture("dynamic_dict_str_val_type_data"),
+        lazy_fixture("dynamic_dict_array_type"),
+        lazy_fixture("dynamic_dict_dict_type"),
+    ],
+)
+def test_dict(data, test_data_idx):
+    is_dynamic, d, payload, schema, test_data = (
+        data[0],
+        data[1],
+        data[2],
+        data[3],
+        data[test_data_idx],
+    )
+    dt = DataAnalyzer.analyze(d, is_dynamic=is_dynamic)
+    dtype = DictType if not is_dynamic else DynamicDictType
+    assert isinstance(dt, dtype)
+
+    assert dt.dict() == payload
+    dt2 = parse_obj_as(dtype, payload)
+    assert dt2 == dt
+    assert test_data == dt.serialize(test_data)
+    assert test_data == dt.deserialize(test_data)
+    assert dt.get_model().__name__ == dtype.__name__
+    assert dt.get_model().schema() == schema
+    assert parse_obj_as(dt.get_model(), test_data)
+
+
+@pytest.mark.parametrize("test_data_idx", [4, 5, 6])
+@pytest.mark.parametrize(
+    "data",
+    [
+        lazy_fixture("dict_data"),
+        lazy_fixture("dynamic_dict_data"),
+        lazy_fixture("dynamic_dict_str_val_type_data"),
+        lazy_fixture("dynamic_dict_array_type"),
+        lazy_fixture("dynamic_dict_dict_type"),
+    ],
+)
+def test_dict_source(data, test_data_idx):
+    is_dynamic, d, test_data = (
+        data[0],
+        data[1],
+        data[test_data_idx],
+    )
+    data_type = DataType.create(d, is_dynamic=is_dynamic)
+    data_type = data_type.bind(test_data)
+    dtype_reader = DynamicDictReader if is_dynamic else DictReader
 
     def custom_assert(x, y):
         assert x == y
@@ -238,11 +441,14 @@ def custom_assert(x, y):
 
     artifacts = data_write_read_check(
         data_type,
-        reader_type=DictReader,
+        reader_type=dtype_reader,
         custom_assert=custom_assert,
     )
 
-    assert list(artifacts.keys()) == ["1/data", "2/data", "3/1/data"]
-    assert artifacts["1/data"].uri.endswith("data/1")
-    assert artifacts["2/data"].uri.endswith("data/2")
-    assert artifacts["3/1/data"].uri.endswith("data/3/1")
+    if not is_dynamic:
+        assert list(artifacts.keys()) == ["1/data", "2/data"]
+        assert artifacts["1/data"].uri.endswith("data/1")
+        assert artifacts["2/data"].uri.endswith("data/2")
+    else:
+        assert list(artifacts.keys()) == ["data"]
+        assert artifacts["data"].uri.endswith("data")

From c6e8d72595010de0fe25a24df8b19f58219fe342 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Mon, 20 Jun 2022 16:23:23 +0530
Subject: [PATCH 2/9] dynamic types support array, dict, ndarray

---
 mlem/contrib/lightgbm.py     |  8 ++------
 tests/core/test_data_type.py | 14 +++++++-------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/mlem/contrib/lightgbm.py b/mlem/contrib/lightgbm.py
index 17e9b263..b45fad44 100644
--- a/mlem/contrib/lightgbm.py
+++ b/mlem/contrib/lightgbm.py
@@ -69,12 +69,8 @@ def get_writer(
         return LightGBMDataWriter(**kwargs)
 
     @classmethod
-    def process(cls, obj: Any, is_dynamic: bool = False, **kwargs) -> DataType:
-        return LightGBMDataType(
-            inner=DataAnalyzer.analyze(
-                obj.data, is_dynamic=is_dynamic, **kwargs
-            )
-        )
+    def process(cls, obj: Any, **kwargs) -> DataType:
+        return LightGBMDataType(inner=DataAnalyzer.analyze(obj.data))
 
     def get_model(self, prefix: str = "") -> Type[BaseModel]:
         return self.inner.get_serializer().get_model(prefix)
diff --git a/tests/core/test_data_type.py b/tests/core/test_data_type.py
index 6e34383b..f4d1c06d 100644
--- a/tests/core/test_data_type.py
+++ b/tests/core/test_data_type.py
@@ -116,16 +116,16 @@ def test_array(array_data, value):
 
 
 @pytest.mark.parametrize(
-    "is_dynamic,array_data,value",
+    "array_data,value",
     [
-        (False, lazy_fixture("array"), None),
-        (True, lazy_fixture("array_dynamic"), None),
-        (True, lazy_fixture("array_dynamic"), [1, 2, 3]),
+        (lazy_fixture("array"), None),
+        (lazy_fixture("array_dynamic"), None),
+        (lazy_fixture("array_dynamic"), [1, 2, 3]),
     ],
 )
-def test_list_source(is_dynamic, array_data, value):
-    dt = DataType.create(array_data[0])
-    l_value = array_data[0] if value is None else value
+def test_list_source(array_data, value):
+    dt = DataType.create(array_data[1])
+    l_value = array_data[1] if value is None else value
     dt.bind(l_value)
 
     artifacts = data_write_read_check(

From fe53e93c0a18830bf67041d770bf1e3bb9d7dfb2 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Mon, 20 Jun 2022 21:27:19 +0530
Subject: [PATCH 3/9] dynamic types support array, dict, ndarray

---
 mlem/core/data_type.py | 62 +++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/mlem/core/data_type.py b/mlem/core/data_type.py
index 00a5094e..1304b55e 100644
--- a/mlem/core/data_type.py
+++ b/mlem/core/data_type.py
@@ -503,23 +503,9 @@ def is_object_valid(cls, obj: Any) -> bool:
     def process(
         cls, obj: Any, is_dynamic: bool = False, **kwargs
     ) -> Union["DictType", "DynamicDictType"]:
-
         if not is_dynamic:
-            return DictType(
-                item_types={
-                    k: DataAnalyzer.analyze(v, is_dynamic=is_dynamic, **kwargs)
-                    for (k, v) in obj.items()
-                }
-            )
-        else:
-            return DynamicDictType(
-                key_type=DataAnalyzer.analyze(
-                    next(iter(obj.keys())), is_dynamic=is_dynamic, **kwargs
-                ),
-                value_type=DataAnalyzer.analyze(
-                    next(iter(obj.values())), is_dynamic=is_dynamic, **kwargs
-                ),
-            )
+            return DictType.analyze(obj, **kwargs)
+        return DynamicDictType.analyze(obj, **kwargs)
 
 
 class DictType(DataType, DataSerializer):
@@ -530,6 +516,15 @@ class DictType(DataType, DataSerializer):
     type: ClassVar[str] = "dict"
     item_types: Dict[str, DataType]
 
+    @classmethod
+    def analyze(cls, obj, **kwargs):
+        return DictType(
+            item_types={
+                k: DataAnalyzer.analyze(v, is_dynamic=False, **kwargs)
+                for (k, v) in obj.items()
+            }
+        )
+
     def deserialize(self, obj):
         self._check_type_and_keys(obj, DeserializationError)
         return {
@@ -645,21 +640,32 @@ def serialize(self, instance: dict):
         self._check_type_and_keys(instance, SerializationError)
         if self.key_type == PrimitiveType and self.value_type == PrimitiveType:
             return instance
-        else:
-            return {
-                self.key_type.get_serializer()
-                .serialize(
-                    k,
-                ): self.value_type.get_serializer()
-                .serialize(
-                    v,
-                )
-                for k, v in instance.items()
-            }
+
+        return {
+            self.key_type.get_serializer()
+            .serialize(
+                k,
+            ): self.value_type.get_serializer()
+            .serialize(
+                v,
+            )
+            for k, v in instance.items()
+        }
+
+    @classmethod
+    def analyze(cls, obj, **kwargs):
+        return DynamicDictType(
+            key_type=DataAnalyzer.analyze(
+                next(iter(obj.keys())), is_dynamic=True, **kwargs
+            ),
+            value_type=DataAnalyzer.analyze(
+                next(iter(obj.values())), is_dynamic=True, **kwargs
+            ),
+        )
 
     def _check_type_and_keys(self, obj, exc_type):
         self.check_type(obj, dict, exc_type)
-        obj_type = DictTypeHook.process(obj, is_dynamic=True)
+        obj_type: DynamicDictType = self.analyze(obj)
         obj_types = (obj_type.key_type, obj_type.value_type)
         expected_types = (self.key_type, self.value_type)
         if obj_types != expected_types:

From 07fcd8180e6ecab73ad9f3daaecadbe95173939f Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Mon, 20 Jun 2022 22:24:07 +0530
Subject: [PATCH 4/9] fix lint issue

---
 tests/contrib/test_numpy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/contrib/test_numpy.py b/tests/contrib/test_numpy.py
index 127b5135..511a3b3d 100644
--- a/tests/contrib/test_numpy.py
+++ b/tests/contrib/test_numpy.py
@@ -232,7 +232,7 @@ def test_ndarray(data, test_data_idx):
         [
             lazy_fixture("nat"),
             np.array([[1, 2], [3, 4]], dtype=np.float32),
-            "given array is of type: float32, " "expected: int64",
+            "given array is of type: float32, expected: int64",
         ],
         [
             lazy_fixture("nat"),
@@ -242,7 +242,7 @@ def test_ndarray(data, test_data_idx):
         [
             lazy_fixture("nat_dynamic"),
             np.array([1, 2]),
-            "given array is of rank: 1, " "expected: 3",
+            "given array is of rank: 1, expected: 3",
         ],
         [
             lazy_fixture("nat_dynamic_float"),

From 4095937281e6816de099ee01338c30e112635d18 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 22 Jun 2022 02:45:27 +0530
Subject: [PATCH 5/9] incorporated review comments

---
 mlem/contrib/numpy.py        |  4 +-
 mlem/core/data_type.py       | 49 +++++++++++++------
 tests/contrib/test_numpy.py  | 20 ++++++++
 tests/core/test_data_type.py | 95 ++++++++++++++++++++++++++++++++++--
 4 files changed, 146 insertions(+), 22 deletions(-)

diff --git a/mlem/contrib/numpy.py b/mlem/contrib/numpy.py
index 232220d8..c944c671 100644
--- a/mlem/contrib/numpy.py
+++ b/mlem/contrib/numpy.py
@@ -137,7 +137,7 @@ def get_model(self, prefix: str = "") -> Type[BaseModel]:
             prefix + "NumpyNdarray",
             __root__=(
                 self._subtype(self.shape)
-                if self.shape
+                if self.shape is not None
                 else List[
                     Union[python_type_from_np_string_repr(self.dtype), List]
                 ],  # type: ignore
@@ -157,7 +157,7 @@ def serialize(self, instance: np.ndarray):
         return instance.tolist()
 
     def _check_shape(self, array, exc_type):
-        if self.shape:
+        if self.shape is not None:
             if len(array.shape) != len(self.shape):
                 raise exc_type(
                     f"given array is of rank: {len(array.shape)}, expected: {len(self.shape)}"
diff --git a/mlem/core/data_type.py b/mlem/core/data_type.py
index 1304b55e..13d76067 100644
--- a/mlem/core/data_type.py
+++ b/mlem/core/data_type.py
@@ -19,7 +19,7 @@
 )
 
 import flatdict
-from pydantic import BaseModel
+from pydantic import BaseModel, validator
 from pydantic.main import create_model
 
 from mlem.core.artifacts import Artifacts, Storage
@@ -504,8 +504,8 @@ def process(
         cls, obj: Any, is_dynamic: bool = False, **kwargs
     ) -> Union["DictType", "DynamicDictType"]:
         if not is_dynamic:
-            return DictType.analyze(obj, **kwargs)
-        return DynamicDictType.analyze(obj, **kwargs)
+            return DictType.create(obj, **kwargs)
+        return DynamicDictType.create(obj, **kwargs)
 
 
 class DictType(DataType, DataSerializer):
@@ -517,7 +517,7 @@ class DictType(DataType, DataSerializer):
     item_types: Dict[str, DataType]
 
     @classmethod
-    def analyze(cls, obj, **kwargs):
+    def create(cls, obj, **kwargs):
         return DictType(
             item_types={
                 k: DataAnalyzer.analyze(v, is_dynamic=False, **kwargs)
@@ -620,11 +620,19 @@ class DynamicDictType(DataType, DataSerializer):
 
     type: ClassVar[str] = "d_dict"
 
-    key_type: DataType
+    key_type: PrimitiveType
     value_type: DataType
 
+    @validator("key_type")
+    def is_valid_key_type(  # pylint: disable=no-self-argument
+        cls, key_type  # noqa: B902
+    ):
+        if key_type.ptype not in ["str", "int", "float"]:
+            raise ValueError(f"key_type {key_type.ptype} is not supported")
+        return key_type
+
     def deserialize(self, obj):
-        self._check_type_and_keys(obj, DeserializationError)
+        self.check_type(obj, dict, DeserializationError)
         return {
             self.key_type.get_serializer()
             .deserialize(
@@ -637,9 +645,7 @@ def deserialize(self, obj):
         }
 
     def serialize(self, instance: dict):
-        self._check_type_and_keys(instance, SerializationError)
-        if self.key_type == PrimitiveType and self.value_type == PrimitiveType:
-            return instance
+        self._check_types(instance, SerializationError)
 
         return {
             self.key_type.get_serializer()
@@ -653,7 +659,9 @@ def serialize(self, instance: dict):
         }
 
     @classmethod
-    def analyze(cls, obj, **kwargs):
+    def create(
+        cls, obj, is_dynamic: bool = True, **kwargs
+    ) -> "DynamicDictType":
         return DynamicDictType(
             key_type=DataAnalyzer.analyze(
                 next(iter(obj.keys())), is_dynamic=True, **kwargs
@@ -663,18 +671,25 @@ def analyze(cls, obj, **kwargs):
             ),
         )
 
-    def _check_type_and_keys(self, obj, exc_type):
+    def _check_types(self, obj, exc_type, ignore_key_type: bool = False):
         self.check_type(obj, dict, exc_type)
-        obj_type: DynamicDictType = self.analyze(obj)
-        obj_types = (obj_type.key_type, obj_type.value_type)
-        expected_types = (self.key_type, self.value_type)
+
+        obj_type = self.create(obj)
+        if ignore_key_type:
+            obj_types: Union[
+                Tuple[PrimitiveType, DataType], Tuple[DataType]
+            ] = (obj_type.value_type,)
+            expected_types: Union[
+                Tuple[PrimitiveType, DataType], Tuple[DataType]
+            ] = (self.value_type,)
+        else:
+            obj_types = (obj_type.key_type, obj_type.value_type)
+            expected_types = (self.key_type, self.value_type)
         if obj_types != expected_types:
             raise exc_type(
                 f"given dict has type: {obj_types}, expected: {expected_types}"
             )
 
-        # TODO - should we check for type of all items of dict?
-
     def get_requirements(self) -> Requirements:
         return sum(
             [
@@ -734,6 +749,8 @@ def read(self, artifacts: Artifacts) -> DataType:
             )
         with artifacts[DataWriter.art_name].open() as f:
             data = json.load(f)
+            # json stores keys as strings. Deserialize string keys as well as values.
+            data = self.data_type.deserialize(data)
         return self.data_type.copy().bind(data)
 
     def read_batch(
diff --git a/tests/contrib/test_numpy.py b/tests/contrib/test_numpy.py
index 511a3b3d..ce669ea5 100644
--- a/tests/contrib/test_numpy.py
+++ b/tests/contrib/test_numpy.py
@@ -153,6 +153,20 @@ def nat_dynamic_shape_none():
     return True, dtype, payload, schema, test_data1, test_data2, test_data3
 
 
+@pytest.fixture
+def nat_shape_empty():
+    dtype = NumpyNdarrayType(shape=(), dtype="int")
+    payload = {"dtype": "int", "shape": (), "type": "ndarray"}
+    schema = {
+        "title": "NumpyNdarray",
+        "type": "integer",
+    }
+    test_data1 = np.array(1)
+    test_data2 = np.array(3)
+    test_data3 = np.array(4)
+    return True, dtype, payload, schema, test_data1, test_data2, test_data3
+
+
 def test_python_type_from_np_string_repr():
     assert python_type_from_np_string_repr("int64") == int
 
@@ -196,6 +210,7 @@ def test_number():
         (lazy_fixture("nat_dynamic_all_none_dims")),
         (lazy_fixture("nat_dynamic_shape_none")),
         (lazy_fixture("nat_dynamic_float")),
+        (lazy_fixture("nat_shape_empty")),
     ],
 )
 def test_ndarray(data, test_data_idx):
@@ -244,6 +259,11 @@ def test_ndarray(data, test_data_idx):
             np.array([1, 2]),
             "given array is of rank: 1, expected: 3",
         ],
+        [
+            lazy_fixture("nat_empty_shape"),
+            np.array([1, 2]),
+            "given array is of rank: 1, expected: 0",
+        ],
         [
             lazy_fixture("nat_dynamic_float"),
             np.array([[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]]),
diff --git a/tests/core/test_data_type.py b/tests/core/test_data_type.py
index f4d1c06d..a94f6054 100644
--- a/tests/core/test_data_type.py
+++ b/tests/core/test_data_type.py
@@ -1,5 +1,6 @@
 import copy
 
+import numpy as np
 import pytest
 from pydantic import parse_obj_as
 from pytest_lazyfixture import lazy_fixture
@@ -307,6 +308,50 @@ def dynamic_dict_str_val_type_data():
     return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
 
 
+@pytest.fixture
+def dynamic_dict_int_key_type_data():
+    is_dynamic = True
+    d = {1: "1", 2: "2"}
+    payload = {
+        "key_type": {"ptype": "int", "type": "primitive"},
+        "value_type": {"ptype": "str", "type": "primitive"},
+        "type": "d_dict",
+    }
+    schema = {
+        "title": "DynamicDictType",
+        "type": "object",
+        "additionalProperties": {"type": "string"},
+    }
+
+    test_data1 = {1: "1", 2: "2"}
+    test_data2 = {2: "1"}
+    test_data3 = {3: "1", 4: "2", 5: "3", 6: "1"}
+
+    return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
+
+
+@pytest.fixture
+def dynamic_dict_float_key_type_data():
+    is_dynamic = True
+    d = {1.0: "1", 2.0: "2"}
+    payload = {
+        "key_type": {"ptype": "float", "type": "primitive"},
+        "value_type": {"ptype": "str", "type": "primitive"},
+        "type": "d_dict",
+    }
+    schema = {
+        "title": "DynamicDictType",
+        "type": "object",
+        "additionalProperties": {"type": "string"},
+    }
+
+    test_data1 = {1.0: "1", 2.0: "2"}
+    test_data2 = {2.9999999999: "1"}
+    test_data3 = {3.8998: "1", 4.0001: "2", 5.2: "3", 6.9: "1"}
+
+    return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
+
+
 @pytest.fixture
 def dynamic_dict_array_type():
     is_dynamic = True
@@ -379,6 +424,41 @@ def dynamic_dict_dict_type():
     return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
 
 
+@pytest.fixture
+def dynamic_dict_ndarray_type():
+    is_dynamic = True
+    d = {"a": np.array([1, 2]), "b": np.array([3, 4])}
+    payload = {
+        "key_type": {"ptype": "str", "type": "primitive"},
+        "type": "d_dict",
+        "value_type": {"dtype": "int64", "shape": (None,), "type": "ndarray"},
+    }
+
+    schema = {
+        "additionalProperties": {"$ref": "#/definitions/_val_NumpyNdarray"},
+        "definitions": {
+            "_val_NumpyNdarray": {
+                "items": {"type": "integer"},
+                "title": "_val_NumpyNdarray",
+                "type": "array",
+            }
+        },
+        "title": "DynamicDictType",
+        "type": "object",
+    }
+
+    test_data1 = {"a": np.array([1, 2]), "b": np.array([3, 4])}
+    test_data2 = {
+        "a": np.array([1, 2]),
+    }
+    test_data3 = {
+        "a": np.array([1, 2]),
+        "b": np.array([3, 4]),
+        "c": np.array([5, 6]),
+    }
+    return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
+
+
 @pytest.mark.parametrize("test_data_idx", [4, 5, 6])
 @pytest.mark.parametrize(
     "data",
@@ -386,8 +466,11 @@ def dynamic_dict_dict_type():
         lazy_fixture("dict_data"),
         lazy_fixture("dynamic_dict_data"),
         lazy_fixture("dynamic_dict_str_val_type_data"),
+        lazy_fixture("dynamic_dict_int_key_type_data"),
+        lazy_fixture("dynamic_dict_float_key_type_data"),
         lazy_fixture("dynamic_dict_array_type"),
         lazy_fixture("dynamic_dict_dict_type"),
+        lazy_fixture("dynamic_dict_ndarray_type"),
     ],
 )
 def test_dict(data, test_data_idx):
@@ -405,11 +488,12 @@ def test_dict(data, test_data_idx):
     assert dt.dict() == payload
     dt2 = parse_obj_as(dtype, payload)
     assert dt2 == dt
-    assert test_data == dt.serialize(test_data)
-    assert test_data == dt.deserialize(test_data)
+    serialised_test_data = dt.serialize(test_data)
+    deserialised_test_data = dt.deserialize(serialised_test_data)
+    assert serialised_test_data == dt.serialize(deserialised_test_data)
     assert dt.get_model().__name__ == dtype.__name__
     assert dt.get_model().schema() == schema
-    assert parse_obj_as(dt.get_model(), test_data)
+    assert parse_obj_as(dt.get_model(), serialised_test_data)
 
 
 @pytest.mark.parametrize("test_data_idx", [4, 5, 6])
@@ -419,8 +503,11 @@ def test_dict(data, test_data_idx):
         lazy_fixture("dict_data"),
         lazy_fixture("dynamic_dict_data"),
         lazy_fixture("dynamic_dict_str_val_type_data"),
+        lazy_fixture("dynamic_dict_int_key_type_data"),
+        lazy_fixture("dynamic_dict_float_key_type_data"),
         lazy_fixture("dynamic_dict_array_type"),
         lazy_fixture("dynamic_dict_dict_type"),
+        lazy_fixture("dynamic_dict_ndarray_type"),
     ],
 )
 def test_dict_source(data, test_data_idx):
@@ -434,7 +521,7 @@ def test_dict_source(data, test_data_idx):
     dtype_reader = DynamicDictReader if is_dynamic else DictReader
 
     def custom_assert(x, y):
-        assert x == y
+        np.testing.assert_equal(x, y)
         assert len(x) == len(y)
         assert isinstance(x, dict)
         assert isinstance(y, dict)

From 4605d0f1221797f4306dfd865d54023b08a8a642 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Sat, 25 Jun 2022 01:45:13 +0530
Subject: [PATCH 6/9] incorporated review comments

---
 mlem/core/data_type.py       | 12 +++++------
 tests/core/test_data_type.py | 42 +++++++++++++++++++++++-------------
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/mlem/core/data_type.py b/mlem/core/data_type.py
index 13d76067..2b19fac7 100644
--- a/mlem/core/data_type.py
+++ b/mlem/core/data_type.py
@@ -504,8 +504,8 @@ def process(
         cls, obj: Any, is_dynamic: bool = False, **kwargs
     ) -> Union["DictType", "DynamicDictType"]:
         if not is_dynamic:
-            return DictType.create(obj, **kwargs)
-        return DynamicDictType.create(obj, **kwargs)
+            return DictType.process(obj, **kwargs)
+        return DynamicDictType.process(obj, **kwargs)
 
 
 class DictType(DataType, DataSerializer):
@@ -517,7 +517,7 @@ class DictType(DataType, DataSerializer):
     item_types: Dict[str, DataType]
 
     @classmethod
-    def create(cls, obj, **kwargs):
+    def process(cls, obj, **kwargs):
         return DictType(
             item_types={
                 k: DataAnalyzer.analyze(v, is_dynamic=False, **kwargs)
@@ -659,9 +659,7 @@ def serialize(self, instance: dict):
         }
 
     @classmethod
-    def create(
-        cls, obj, is_dynamic: bool = True, **kwargs
-    ) -> "DynamicDictType":
+    def process(cls, obj, **kwargs) -> "DynamicDictType":
         return DynamicDictType(
             key_type=DataAnalyzer.analyze(
                 next(iter(obj.keys())), is_dynamic=True, **kwargs
@@ -674,7 +672,7 @@ def create(
     def _check_types(self, obj, exc_type, ignore_key_type: bool = False):
         self.check_type(obj, dict, exc_type)
 
-        obj_type = self.create(obj)
+        obj_type = self.process(obj)
         if ignore_key_type:
             obj_types: Union[
                 Tuple[PrimitiveType, DataType], Tuple[DataType]
diff --git a/tests/core/test_data_type.py b/tests/core/test_data_type.py
index a94f6054..9988c7fd 100644
--- a/tests/core/test_data_type.py
+++ b/tests/core/test_data_type.py
@@ -427,34 +427,46 @@ def dynamic_dict_dict_type():
 @pytest.fixture
 def dynamic_dict_ndarray_type():
     is_dynamic = True
-    d = {"a": np.array([1, 2]), "b": np.array([3, 4])}
+    d = {11: {1: np.array([1, 2])}, 22: {2: np.array([3, 4])}}
     payload = {
-        "key_type": {"ptype": "str", "type": "primitive"},
+        "key_type": {"ptype": "int", "type": "primitive"},
         "type": "d_dict",
-        "value_type": {"dtype": "int64", "shape": (None,), "type": "ndarray"},
+        "value_type": {
+            "key_type": {"ptype": "int", "type": "primitive"},
+            "type": "d_dict",
+            "value_type": {
+                "dtype": "int64",
+                "shape": (None,),
+                "type": "ndarray",
+            },
+        },
     }
-
     schema = {
-        "additionalProperties": {"$ref": "#/definitions/_val_NumpyNdarray"},
+        "additionalProperties": {"$ref": "#/definitions/_val_DynamicDictType"},
         "definitions": {
-            "_val_NumpyNdarray": {
+            "_val_DynamicDictType": {
+                "additionalProperties": {
+                    "$ref": "#/definitions/_val__val_NumpyNdarray"
+                },
+                "title": "_val_DynamicDictType",
+                "type": "object",
+            },
+            "_val__val_NumpyNdarray": {
                 "items": {"type": "integer"},
-                "title": "_val_NumpyNdarray",
+                "title": "_val__val_NumpyNdarray",
                 "type": "array",
-            }
+            },
         },
         "title": "DynamicDictType",
         "type": "object",
     }
 
-    test_data1 = {"a": np.array([1, 2]), "b": np.array([3, 4])}
-    test_data2 = {
-        "a": np.array([1, 2]),
-    }
+    test_data1 = {11: {1: np.array([1, 2])}, 22: {2: np.array([3, 4])}}
+    test_data2 = {11: {1: np.array([1, 2])}}
     test_data3 = {
-        "a": np.array([1, 2]),
-        "b": np.array([3, 4]),
-        "c": np.array([5, 6]),
+        11: {1: np.array([1, 2])},
+        22: {2: np.array([3, 4])},
+        33: {2: np.array([5, 6])},
     }
     return is_dynamic, d, payload, schema, test_data1, test_data2, test_data3
 

From 11951777043b7dbc80c720b0bd43f32cf22e1bb5 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Sat, 25 Jun 2022 01:51:07 +0530
Subject: [PATCH 7/9] reverted licence

---
 tests/contrib/test_numpy.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/contrib/test_numpy.py b/tests/contrib/test_numpy.py
index ce669ea5..d05c2daf 100644
--- a/tests/contrib/test_numpy.py
+++ b/tests/contrib/test_numpy.py
@@ -289,3 +289,19 @@ def test_requirements():
     assert get_object_requirements(
         NumpyNdarrayType(shape=(0,), dtype="int")
     ).modules == ["numpy"]
+
+
+# Copyright 2019 Zyfra
+# Copyright 2021 Iterative
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

From a71d8fe04d3f137c8d4cb900610d2a87e316c73d Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Tue, 28 Jun 2022 17:56:43 +0530
Subject: [PATCH 8/9] correct fixture name

---
 tests/contrib/test_numpy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/contrib/test_numpy.py b/tests/contrib/test_numpy.py
index d05c2daf..d12b4735 100644
--- a/tests/contrib/test_numpy.py
+++ b/tests/contrib/test_numpy.py
@@ -260,7 +260,7 @@ def test_ndarray(data, test_data_idx):
             "given array is of rank: 1, expected: 3",
         ],
         [
-            lazy_fixture("nat_empty_shape"),
+            lazy_fixture("nat_shape_empty"),
             np.array([1, 2]),
             "given array is of rank: 1, expected: 0",
         ],

From 40677def2381fcc2a9f82153e05168579f39a14a Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 29 Jun 2022 19:25:10 +0530
Subject: [PATCH 9/9] fix windows numpy int dtype issue

---
 tests/conftest.py            |  8 ++++++++
 tests/contrib/test_numpy.py  | 10 +++++++---
 tests/core/test_data_type.py |  4 ++--
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 014f3d2f..305662e7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,6 +5,7 @@
 from typing import Any, Callable, Type
 
 import git
+import numpy as np
 import pandas as pd
 import pytest
 from fastapi.testclient import TestClient
@@ -426,3 +427,10 @@ def disable_colorama():
     import colorama
 
     colorama.init = lambda: None
+
+
+@pytest.fixture
+def numpy_default_int_dtype():
+    # default int type is platform dependent.
+    # For windows 64 it is int32 and for linux 64 it is int64
+    return str(np.array([1]).dtype)
diff --git a/tests/contrib/test_numpy.py b/tests/contrib/test_numpy.py
index d12b4735..4a66df95 100644
--- a/tests/contrib/test_numpy.py
+++ b/tests/contrib/test_numpy.py
@@ -38,10 +38,14 @@ def custom_assert(x, y):
 
 
 @pytest.fixture
-def nat():
+def nat(numpy_default_int_dtype):
     data = np.array([[1, 2], [3, 4]])
     dtype = DataType.create(data)
-    payload = {"shape": (None, 2), "dtype": "int64", "type": "ndarray"}
+    payload = {
+        "shape": (None, 2),
+        "dtype": numpy_default_int_dtype,
+        "type": "ndarray",
+    }
     schema = {
         "title": "NumpyNdarray",
         "type": "array",
@@ -247,7 +251,7 @@ def test_ndarray(data, test_data_idx):
         [
             lazy_fixture("nat"),
             np.array([[1, 2], [3, 4]], dtype=np.float32),
-            "given array is of type: float32, expected: int64",
+            f"given array is of type: float32, expected: {np.array([[1, 2], [3, 4]]).dtype}",
         ],
         [
             lazy_fixture("nat"),
diff --git a/tests/core/test_data_type.py b/tests/core/test_data_type.py
index 9988c7fd..1a7e3740 100644
--- a/tests/core/test_data_type.py
+++ b/tests/core/test_data_type.py
@@ -425,7 +425,7 @@ def dynamic_dict_dict_type():
 
 
 @pytest.fixture
-def dynamic_dict_ndarray_type():
+def dynamic_dict_ndarray_type(numpy_default_int_dtype):
     is_dynamic = True
     d = {11: {1: np.array([1, 2])}, 22: {2: np.array([3, 4])}}
     payload = {
@@ -435,7 +435,7 @@ def dynamic_dict_ndarray_type():
             "key_type": {"ptype": "int", "type": "primitive"},
             "type": "d_dict",
             "value_type": {
-                "dtype": "int64",
+                "dtype": numpy_default_int_dtype,
                 "shape": (None,),
                 "type": "ndarray",
             },