implement get_model for torch tensor (#296)

fix dtype inspection for torch
iterative · Jun 16, 2022 · d6bcbc6 · d6bcbc6
1 parent c701297
commit d6bcbc6
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 4 deletions.
diff --git a/mlem/contrib/numpy.py b/mlem/contrib/numpy.py
@@ -132,7 +132,6 @@ def _subtype(self, subshape: Tuple[Optional[int], ...]):
         )
 
     def get_model(self, prefix: str = "") -> Type[BaseModel]:
-        # TODO: https://github.com/iterative/mlem/issues/33
         return create_model(
             prefix + "NumpyNdarray", __root__=(List[self._subtype(self.shape[1:])], ...)  # type: ignore
         )

diff --git a/mlem/contrib/torch.py b/mlem/contrib/torch.py
@@ -1,8 +1,10 @@
-from typing import Any, ClassVar, Iterator, Optional, Tuple
+from typing import Any, ClassVar, Iterator, List, Optional, Tuple
 
 import torch
+from pydantic import conlist, create_model
 
 from mlem.constants import PREDICT_METHOD_NAME
+from mlem.contrib.numpy import python_type_from_np_string_repr
 from mlem.core.artifacts import Artifacts, Storage
 from mlem.core.data_type import (
     DataHook,
@@ -17,6 +19,11 @@
 from mlem.core.requirements import InstallableRequirement, Requirements
 
 
+def python_type_from_torch_string_repr(dtype: str):
+    #  not sure this will work all the time
+    return python_type_from_np_string_repr(dtype)
+
+
 class TorchTensorDataType(
     DataType, DataSerializer, DataHook, IsInstanceHookMixin
 ):
@@ -68,14 +75,26 @@ def get_writer(
     ) -> DataWriter:
         return TorchTensorWriter(**kwargs)
 
+    def _subtype(self, subshape: Tuple[Optional[int], ...]):
+        if len(subshape) == 0:
+            return python_type_from_torch_string_repr(self.dtype)
+        return conlist(
+            self._subtype(subshape[1:]),
+            min_items=subshape[0],
+            max_items=subshape[0],
+        )
+
     def get_model(self, prefix: str = ""):
-        raise NotImplementedError
+        return create_model(
+            prefix + "TorchTensor",
+            __root__=(List[self._subtype(self.shape[1:])], ...),  # type: ignore
+        )
 
     @classmethod
     def process(cls, obj: torch.Tensor, **kwargs) -> DataType:
         return TorchTensorDataType(
             shape=(None,) + obj.shape[1:],
-            dtype=str(obj.dtype)[len(obj.dtype.__module__) + 1 :],
+            dtype=str(obj.dtype)[len("torch") + 1 :],
         )