openvinotoolkit · jiwaszki · Mar 16, 2023 · Mar 13, 2023 · Mar 13, 2023 · Mar 13, 2023
@@ -56,38 +56,51 @@ def _(
     is_shared: bool = False,
     key: Optional[ValidKeys] = None,
 ) -> Tensor:
-    # Edge-case for numpy arrays if shape is "empty",
-    # assume this is a scalar value - always copy
-    if not value.shape:
-        return Tensor(np.ndarray([], value.dtype, np.array(value)))
-    tensor_type = get_request_tensor(request, key).get_element_type()
+    tensor = get_request_tensor(request, key)
+    tensor_type = tensor.get_element_type()
     tensor_dtype = tensor_type.to_dtype()
-    # WA for FP16-->BF16 edge-case - always copy
+    if value.ndim == 0:
+        tensor_shape = tuple(tensor.shape)
+        if tensor_dtype == value.dtype and tensor_shape == value.shape:
+            return Tensor(value, shared_memory=is_shared)
+        else:
+            return Tensor(value.astype(tensor_dtype).reshape(tensor_shape), shared_memory=False)
+    # WA for FP16-->BF16 edge-case, always copy.
     if tensor_type == Type.bf16:
         tensor = Tensor(tensor_type, value.shape)
         tensor.data[:] = value.view(tensor_dtype)
         return tensor
-    return Tensor(value.astype(tensor_dtype) if tensor_dtype != value.dtype else value, shared_memory=is_shared)
+    # If types are mismatched, convert and always copy.
+    if tensor_dtype != value.dtype:
+        return Tensor(value.astype(tensor_dtype), shared_memory=False)
+    # Otherwise, use mode defined in the call.
+    return Tensor(value, shared_memory=is_shared)
 
 
 @value_to_tensor.register(np.number)
 @value_to_tensor.register(int)
 @value_to_tensor.register(float)
 def _(
     value: ScalarTypes,
-    request: Optional[_InferRequestWrapper] = None,
+    request: _InferRequestWrapper,
     is_shared: bool = False,
     key: Optional[ValidKeys] = None,
 ) -> Tensor:
-    return Tensor(np.ndarray([], type(value), np.array(value)))
+    # np.number/int/float edge-case, copy will occur in both scenarios.
+    tensor_type = get_request_tensor(request, key).get_element_type()
+    tensor_dtype = tensor_type.to_dtype()
+    tmp = np.array(value)
+    # If types are mismatched, convert.
+    if tensor_dtype != tmp.dtype:
+        return Tensor(tmp.astype(tensor_dtype), shared_memory=False)
+    return Tensor(tmp, shared_memory=False)
 
 
 def to_c_style(value: Any, is_shared: bool = False) -> Any:
     if not isinstance(value, np.ndarray):
         if hasattr(value, "__array__"):
             return to_c_style(np.array(value, copy=False)) if is_shared else np.array(value, copy=True)
         return value
-    # Check C-style if not convert data (or raise error?)
     return value if value.flags["C_CONTIGUOUS"] else np.ascontiguousarray(value)
 
 
@@ -223,20 +236,20 @@ def _(
     request: _InferRequestWrapper,
     key: Optional[ValidKeys] = None,
 ) -> None:
-    # If shape is "empty", assume this is a scalar value
-    if not inputs.shape:
-        set_request_tensor(
-            request,
-            value_to_tensor(inputs, request=request, is_shared=False),
-            key,
-        )
-    else:
+    if inputs.ndim != 0:
         tensor = get_request_tensor(request, key)
         # Update shape if there is a mismatch
-        if tensor.shape != inputs.shape:
+        if tuple(tensor.shape) != inputs.shape:
             tensor.shape = inputs.shape
         # When copying, type should be up/down-casted automatically.
         tensor.data[:] = inputs[:]
+    else:
+        # If shape is "empty", assume this is a scalar value
+        set_request_tensor(
+            request,
+            value_to_tensor(inputs, request=request, is_shared=False, key=key),
+            key,
+        )
 
 
 @update_tensor.register(np.number)  # type: ignore
@@ -249,7 +262,7 @@ def _(
 ) -> None:
     set_request_tensor(
         request,
-        value_to_tensor(inputs, is_shared=False),
+        value_to_tensor(inputs, request=request, is_shared=False, key=key),
         key,
     )
 
@@ -320,7 +333,7 @@ def _(
     inputs: Union[Tensor, ScalarTypes],
     request: _InferRequestWrapper,
 ) -> Tensor:
-    return value_to_tensor(inputs, is_shared=False)
+    return value_to_tensor(inputs, request=request, is_shared=False)
 ###
 # End of "copied" dispatcher methods.
 ###

@@ -121,7 +121,7 @@ ov::op::v0::Constant create_copied(py::array& array) {
     // Create actual Constant and a constructor is copying data.
     return ov::op::v0::Constant(array_helpers::get_ov_type(array),
                                 array_helpers::get_shape(array),
-                                const_cast<void*>(array.data(0)));
+                                array.ndim() == 0 ? array.data() : array.data(0));
 }
 
 template <>
@@ -135,10 +135,10 @@ ov::op::v0::Constant create_shared(py::array& array) {
     // Check if passed array has C-style contiguous memory layout.
     // If memory is going to be shared it needs to be contiguous before passing to the constructor.
     if (array_helpers::is_contiguous(array)) {
-        auto memory =
-            std::make_shared<ngraph::runtime::SharedBuffer<py::array>>(static_cast<char*>(array.mutable_data(0)),
-                                                                       array.nbytes(),
-                                                                       array);
+        auto memory = std::make_shared<ngraph::runtime::SharedBuffer<py::array>>(
+            static_cast<char*>(array.ndim() == 0 ? array.mutable_data() : array.mutable_data(0)),
+            array.ndim() == 0 ? array.itemsize() : array.nbytes(),
+            array);
         return ov::op::v0::Constant(array_helpers::get_ov_type(array), array_helpers::get_shape(array), memory);
     }
     // If passed array is not C-style, throw an error.
@@ -159,9 +159,9 @@ ov::Tensor create_copied(py::array& array) {
     // Create actual Tensor and copy data.
     auto tensor = ov::Tensor(array_helpers::get_ov_type(array), array_helpers::get_shape(array));
     // If ndim of py::array is 0, array is a numpy scalar. That results in size to be equal to 0.
-    // To gain access to actual raw/low-level data, it is needed to use buffer protocol.
-    py::buffer_info buf = array.request();
-    std::memcpy(tensor.data(), buf.ptr, buf.ndim == 0 ? buf.itemsize : buf.itemsize * buf.size);
+    std::memcpy(tensor.data(),
+                array.ndim() == 0 ? array.data() : array.data(0),
+                array.ndim() == 0 ? array.itemsize() : array.nbytes());
     return tensor;
 }
 
@@ -170,9 +170,10 @@ ov::Tensor create_shared(py::array& array) {
     // Check if passed array has C-style contiguous memory layout.
     // If memory is going to be shared it needs to be contiguous before passing to the constructor.
     if (array_helpers::is_contiguous(array)) {
+        // If ndim of py::array is 0, array is a numpy scalar.
         return ov::Tensor(array_helpers::get_ov_type(array),
                           array_helpers::get_shape(array),
-                          const_cast<void*>(array.data(0)),
+                          array.ndim() == 0 ? array.mutable_data() : array.mutable_data(0),
                           array_helpers::get_strides(array));
     }
     // If passed array is not C-style, throw an error.

@@ -1014,3 +1014,108 @@ def test_convert_infer_request(device):
     with pytest.raises(TypeError) as e:
         deepcopy(res)
     assert "cannot deepcopy 'openvino.runtime.ConstOutput' object." in str(e)
+
+
+@pytest.mark.parametrize("shared_flag", [True, False])
+@pytest.mark.parametrize("input_data", [
+    np.array(1.0, dtype=np.float32),
+    np.array(1, dtype=np.int32),
+    np.float32(1.0),
+    np.int32(1.0),
+    1.0,
+    1,
+])
+def test_only_scalar_infer(device, shared_flag, input_data):
+    core = Core()
+    param = ops.parameter([], np.float32, name="data")
+    relu = ops.relu(param, name="relu")
+    model = Model([relu], [param], "scalar_model")
+
+    compiled = core.compile_model(model=model, device_name=device)
+    request = compiled.create_infer_request()
+
+    res = request.infer(input_data, shared_memory=shared_flag)
+
+    assert res[request.model_outputs[0]] == np.maximum(input_data, 0)
+
+    input_tensor = request.get_input_tensor()
+    if shared_flag and isinstance(input_data, np.ndarray) and input_data.dtype == input_tensor.data.dtype:
+        assert np.shares_memory(input_data, input_tensor.data)
+    else:
+        assert not np.shares_memory(input_data, input_tensor.data)
+
+
+@pytest.mark.parametrize("shared_flag", [True, False])
+@pytest.mark.parametrize("input_data", [
+    {0: np.array(1.0, dtype=np.float32), 1: np.array([1.0, 2.0], dtype=np.float32)},
+    {0: np.array(1, dtype=np.int32), 1: np.array([1, 2], dtype=np.int32)},
+    {0: np.float32(1.0), 1: np.array([1, 2], dtype=np.float32)},
+    {0: np.int32(1.0), 1: np.array([1, 2], dtype=np.int32)},
+    {0: 1.0, 1: np.array([1.0, 2.0], dtype=np.float32)},
+    {0: 1, 1: np.array([1.0, 2.0], dtype=np.int32)},
+])
+def test_mixed_scalar_infer(device, shared_flag, input_data):
+    core = Core()
+    param0 = ops.parameter([], np.float32, name="data0")
+    param1 = ops.parameter([2], np.float32, name="data1")
+    add = ops.add(param0, param1, name="add")
+    model = Model([add], [param0, param1], "mixed_model")
+
+    compiled = core.compile_model(model=model, device_name=device)
+    request = compiled.create_infer_request()
+
+    res = request.infer(input_data, shared_memory=shared_flag)
+
+    assert np.allclose(res[request.model_outputs[0]], np.add(input_data[0], input_data[1]))
+
+    input_tensor0 = request.get_input_tensor(0)
+    input_tensor1 = request.get_input_tensor(1)
+
+    if shared_flag:
+        if isinstance(input_data[0], np.ndarray) and input_data[0].dtype == input_tensor0.data.dtype:
+            assert np.shares_memory(input_data[0], input_tensor0.data)
+        else:
+            assert not np.shares_memory(input_data[0], input_tensor0.data)
+        if isinstance(input_data[1], np.ndarray) and input_data[1].dtype == input_tensor1.data.dtype:
+            assert np.shares_memory(input_data[1], input_tensor1.data)
+        else:
+            assert not np.shares_memory(input_data[1], input_tensor1.data)
+    else:
+        assert not np.shares_memory(input_data[0], input_tensor0.data)
+        assert not np.shares_memory(input_data[1], input_tensor1.data)
+
+
+@pytest.mark.parametrize("shared_flag", [True, False])
+@pytest.mark.parametrize("input_data", [
+    {0: np.array(1.0, dtype=np.float32), 1: np.array([3.0], dtype=np.float32)},
+    {0: np.array(1.0, dtype=np.float32), 1: np.array([3.0, 3.0, 3.0], dtype=np.float32)},
+])
+def test_mixed_dynamic_infer(device, shared_flag, input_data):
+    core = Core()
+    param0 = ops.parameter([], np.float32, name="data0")
+    param1 = ops.parameter(["?"], np.float32, name="data1")
+    add = ops.add(param0, param1, name="add")
+    model = Model([add], [param0, param1], "mixed_model")
+
+    compiled = core.compile_model(model=model, device_name=device)
+    request = compiled.create_infer_request()
+
+    res = request.infer(input_data, shared_memory=shared_flag)
+
+    assert np.allclose(res[request.model_outputs[0]], np.add(input_data[0], input_data[1]))
+
+    input_tensor0 = request.get_input_tensor(0)
+    input_tensor1 = request.get_input_tensor(1)
+
+    if shared_flag:
+        if isinstance(input_data[0], np.ndarray) and input_data[0].dtype == input_tensor0.data.dtype:
+            assert np.shares_memory(input_data[0], input_tensor0.data)
+        else:
+            assert not np.shares_memory(input_data[0], input_tensor0.data)
+        if isinstance(input_data[1], np.ndarray) and input_data[1].dtype == input_tensor1.data.dtype:
+            assert np.shares_memory(input_data[1], input_tensor1.data)
+        else:
+            assert not np.shares_memory(input_data[1], input_tensor1.data)
+    else:
+        assert not np.shares_memory(input_data[0], input_tensor0.data)
+        assert not np.shares_memory(input_data[1], input_tensor1.data)
@@ -125,3 +125,27 @@ def test_with_tensor_memory(cls, shared_flag_one, shared_flag_two, ov_type, nump
     else:
         assert not (np.shares_memory(arr, ov_object.data))
         assert not (np.shares_memory(ov_tensor.data, ov_object.data))
+
+
+@pytest.mark.parametrize("cls", [Tensor, Constant])
+@pytest.mark.parametrize("shared_flag", [True, False])
+@pytest.mark.parametrize("scalar", [
+    np.array(2),
+    np.array(1.0),
+    np.float32(3.0),
+    np.int64(7.0),
+    4,
+    5.0,
+])
+def test_with_scalars(cls, shared_flag, scalar):
+    # If scalar is 0-dim np.array, create a copy for convinience. Otherwise, it will be
+    # shared by all tests.
+    # If scalar is np.number or native int/float, create 0-dim scalar array from it.
+    _scalar = np.copy(scalar) if isinstance(scalar, np.ndarray) else np.array(scalar)
+    ov_object = cls(array=_scalar, shared_memory=shared_flag)
+    if shared_flag is True:
+        assert np.shares_memory(_scalar, ov_object.data)
+        _scalar[()] = 6
+        assert ov_object.data == 6
+    else:
+        assert not (np.shares_memory(_scalar, ov_object.data))