[PyOV] String type support (openvinotoolkit#21532)

* WIP working Tensor with automatic casting * Update infer functions and minor fixes * Impl for multi-dim arrays with fixes for strides * Fix strides for S kind and refactor tests * Added str_data and bytes_data to Tensor, cleaning up the solution * Add test of warning while using data property * Allow lists as inputs for single Tensors, refactor infer code to return OVDict with decoded strings, some refactoring, tests * Replace string with another invalid input * Added bytes_str and string_str properties, clean up common part of bindings, added test cases * Improve string element type to be created from numpy/python counterparts, refactor of common code, small improvements * Add tests for types * Remove print * Small fix for tensors from pointers * Small fix for asserts * Add tests for data dispatcher * Fix comments * fix tests * Fix edge-case for scalar-like values and unlock tests for data dispatcher
ynimmaga · Dec 15, 2023 · eff9ba7 · eff9ba7
1 parent 7a31163
commit eff9ba7
Show file tree

Hide file tree

Showing 14 changed files with 829 additions and 54 deletions.
diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/runtime/ie_api.py
@@ -262,7 +262,7 @@ def create_infer_request(self) -> InferRequest:
         """
         return InferRequest(super().create_infer_request())
 
-    def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None) -> OVDict:
+    def infer_new_request(self, inputs: Any = None) -> OVDict:
         """Infers specified input(s) in synchronous mode.
 
         Blocks all methods of CompiledModel while request is running.
@@ -287,7 +287,7 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray]
         function throws error.
 
         :param inputs: Data to be set on input tensors.
-        :type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional
+        :type inputs: Any, optional
         :return: Dictionary of results from output tensors with port/int/str keys.
         :rtype: OVDict
         """
@@ -297,7 +297,7 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray]
 
     def __call__(
         self,
-        inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None,
+        inputs: Any = None,
         share_inputs: bool = True,
         share_outputs: bool = False,
         *,
@@ -332,7 +332,7 @@ def __call__(
         function throws error.
 
         :param inputs: Data to be set on input tensors.
-        :type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional
+        :type inputs: Any, optional
         :param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs.
 
                               If set to `False` inputs the data dispatcher will safely copy data

diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py
@@ -10,7 +10,7 @@
 from openvino._pyopenvino import ConstOutput, Tensor, Type
 from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper, OVDict
 
-ContainerTypes = Union[dict, list, tuple]
+ContainerTypes = Union[dict, list, tuple, OVDict]
 ScalarTypes = Union[np.number, int, float]
 ValidKeys = Union[str, int, ConstOutput]
 
@@ -31,7 +31,7 @@ def get_request_tensor(
 
 @singledispatch
 def value_to_tensor(
-    value: Union[Tensor, np.ndarray, ScalarTypes],
+    value: Union[Tensor, np.ndarray, ScalarTypes, str],
     request: Optional[_InferRequestWrapper] = None,
     is_shared: bool = False,
     key: Optional[ValidKeys] = None,
@@ -59,6 +59,11 @@ def _(
     tensor = get_request_tensor(request, key)
     tensor_type = tensor.get_element_type()
     tensor_dtype = tensor_type.to_dtype()
+    # String edge-case, always copy.
+    # Scalars are also handled by C++.
+    if tensor_type == Type.string:
+        return Tensor(value, shared_memory=False)
+    # Scalars edge-case:
     if value.ndim == 0:
         tensor_shape = tuple(tensor.shape)
         if tensor_dtype == value.dtype and tensor_shape == value.shape:
@@ -82,21 +87,34 @@ def _(
     return Tensor(value, shared_memory=is_shared)
 
 
+@value_to_tensor.register(list)
+def _(
+    value: list,
+    request: _InferRequestWrapper,
+    is_shared: bool = False,
+    key: Optional[ValidKeys] = None,
+) -> Tensor:
+    return Tensor(value)
+
+
 @value_to_tensor.register(np.number)
 @value_to_tensor.register(int)
 @value_to_tensor.register(float)
+@value_to_tensor.register(str)
+@value_to_tensor.register(bytes)
 def _(
-    value: ScalarTypes,
+    value: Union[ScalarTypes, str, bytes],
     request: _InferRequestWrapper,
     is_shared: bool = False,
     key: Optional[ValidKeys] = None,
 ) -> Tensor:
-    # np.number/int/float edge-case, copy will occur in both scenarios.
+    # np.number/int/float/str/bytes edge-case, copy will occur in both scenarios.
     tensor_type = get_request_tensor(request, key).get_element_type()
     tensor_dtype = tensor_type.to_dtype()
     tmp = np.array(value)
+    # String edge-case -- it converts the data inside of Tensor class.
     # If types are mismatched, convert.
-    if tensor_dtype != tmp.dtype:
+    if tensor_type != Type.string and tensor_dtype != tmp.dtype:
         return Tensor(tmp.astype(tensor_dtype), shared_memory=False)
     return Tensor(tmp, shared_memory=False)
 
@@ -204,8 +222,10 @@ def _(
 @create_shared.register(np.number)
 @create_shared.register(int)
 @create_shared.register(float)
+@create_shared.register(str)
+@create_shared.register(bytes)
 def _(
-    inputs: Union[Tensor, ScalarTypes],
+    inputs: Union[Tensor, ScalarTypes, str, bytes],
     request: _InferRequestWrapper,
 ) -> Tensor:
     return value_to_tensor(inputs, request=request, is_shared=True)
@@ -256,7 +276,10 @@ def _(
         if tuple(tensor.shape) != inputs.shape:
             tensor.shape = inputs.shape
         # When copying, type should be up/down-casted automatically.
-        tensor.data[:] = inputs[:]
+        if tensor.element_type == Type.string:
+            tensor.bytes_data = inputs
+        else:
+            tensor.data[:] = inputs[:]
     else:
         # If shape is "empty", assume this is a scalar value
         set_request_tensor(
@@ -269,8 +292,9 @@ def _(
 @update_tensor.register(np.number)  # type: ignore
 @update_tensor.register(float)
 @update_tensor.register(int)
+@update_tensor.register(str)
 def _(
-    inputs: Union[np.number, float, int],
+    inputs: Union[ScalarTypes, str],
     request: _InferRequestWrapper,
     key: Optional[ValidKeys] = None,
 ) -> None:
@@ -286,6 +310,7 @@ def update_inputs(inputs: dict, request: _InferRequestWrapper) -> dict:
 
     It creates copy of Tensors or copy data to already allocated Tensors on device
     if the item is of type `np.ndarray`, `np.number`, `int`, `float` or has numpy __array__ attribute.
+    If value is of type `list`, create a Tensor based on it, copy will occur in the Tensor constructor.
     """
     # Create new temporary dictionary.
     # new_inputs will be used to transfer data to inference calls,
@@ -296,8 +321,10 @@ def update_inputs(inputs: dict, request: _InferRequestWrapper) -> dict:
             raise TypeError(f"Incompatible key type for input: {key}")
         # Copy numpy arrays to already allocated Tensors.
         # If value object has __array__ attribute, load it to Tensor using np.array
-        if isinstance(value, (np.ndarray, np.number, int, float)) or hasattr(value, "__array__"):
+        if isinstance(value, (np.ndarray, np.number, int, float, str)) or hasattr(value, "__array__"):
             update_tensor(value, request, key)
+        elif isinstance(value, list):
+            new_inputs[key] = Tensor(value)
         # If value is of Tensor type, put it into temporary dictionary.
         elif isinstance(value, Tensor):
             new_inputs[key] = value
@@ -309,7 +336,7 @@ def update_inputs(inputs: dict, request: _InferRequestWrapper) -> dict:
 
 @singledispatch
 def create_copied(
-    inputs: Union[ContainerTypes, OVDict, np.ndarray, ScalarTypes],
+    inputs: Union[ContainerTypes, np.ndarray, ScalarTypes, str, bytes],
     request: _InferRequestWrapper,
 ) -> Union[dict, None]:
     # Check the special case of the array-interface
@@ -325,7 +352,7 @@ def create_copied(
 @create_copied.register(tuple)
 @create_copied.register(OVDict)
 def _(
-    inputs: Union[ContainerTypes, OVDict],
+    inputs: ContainerTypes,
     request: _InferRequestWrapper,
 ) -> dict:
     return update_inputs(normalize_arrays(inputs, is_shared=False), request)
@@ -344,8 +371,10 @@ def _(
 @create_copied.register(np.number)
 @create_copied.register(int)
 @create_copied.register(float)
+@create_copied.register(str)
+@create_copied.register(bytes)
 def _(
-    inputs: Union[Tensor, ScalarTypes],
+    inputs: Union[Tensor, ScalarTypes, str, bytes],
     request: _InferRequestWrapper,
 ) -> Tensor:
     return value_to_tensor(inputs, request=request, is_shared=False)
@@ -356,7 +385,7 @@ def _(
 
 def _data_dispatch(
     request: _InferRequestWrapper,
-    inputs: Union[ContainerTypes, OVDict, Tensor, np.ndarray, ScalarTypes] = None,
+    inputs: Union[ContainerTypes, Tensor, np.ndarray, ScalarTypes, str] = None,
     is_shared: bool = False,
 ) -> Union[dict, Tensor]:
     if inputs is None:

diff --git a/src/bindings/python/src/openvino/runtime/utils/types.py b/src/bindings/python/src/openvino/runtime/utils/types.py
@@ -36,6 +36,10 @@
     (Type.u32, np.uint32),
     (Type.u64, np.uint64),
     (Type.bf16, np.uint16),
+    (Type.string, str),
+    (Type.string, np.str_),
+    (Type.string, bytes),
+    (Type.string, np.bytes_),
 ]
 
 openvino_to_numpy_types_str_map = [
@@ -52,6 +56,10 @@
     ("u16", np.uint16),
     ("u32", np.uint32),
     ("u64", np.uint64),
+    ("string", str),
+    ("string", np.str_),
+    ("string", bytes),
+    ("string", np.bytes_),
 ]