Make upb numpy type checks consistent with pure python and cpp.

PiperOrigin-RevId: 464907203
protocolbuffers · Aug 2, 2022 · 79b735a · 79b735a
1 parent e09d6fc
commit 79b735a
Show file tree

Hide file tree

Showing 4 changed files with 235 additions and 6 deletions.
diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
@@ -89,6 +89,8 @@ jobs:
         run: pip install tzdata
         # Only needed on Windows, Linux ships with tzdata.
         if: ${{ contains(matrix.os, 'windows') }}
+      - name: Install numpy
+        run: pip install numpy
       - name: Install Protobuf Wheels
         run: pip install -vvv --no-index --find-links wheels protobuf protobuftests
       - name: Test that module is importable

diff --git a/python/BUILD b/python/BUILD
@@ -205,6 +205,7 @@ py_extension(
     deps = [
         "//:collections",
         "//:descriptor_upb_proto_reflection",
+        "//:port",
         "//:reflection",
         "//:table_internal",
         "//:textformat",

diff --git a/python/convert.c b/python/convert.c
@@ -32,6 +32,9 @@
 #include "upb/reflection.h"
 #include "upb/util/compare.h"
 
+// Must be last.
+#include "upb/port_def.inc"
+
 PyObject* PyUpb_UpbToPy(upb_MessageValue val, const upb_FieldDef* f,
                         PyObject* arena) {
   switch (upb_FieldDef_CType(f)) {
@@ -150,6 +153,34 @@ static upb_MessageValue PyUpb_MaybeCopyString(const char* ptr, size_t size,
   return ret;
 }
 
+const char* upb_FieldDef_TypeString(const upb_FieldDef* f) {
+  switch (upb_FieldDef_CType(f)) {
+    case kUpb_CType_Double:
+      return "double";
+    case kUpb_CType_Float:
+      return "float";
+    case kUpb_CType_Int64:
+      return "int64";
+    case kUpb_CType_Int32:
+      return "int32";
+    case kUpb_CType_UInt64:
+      return "uint64";
+    case kUpb_CType_UInt32:
+      return "uint32";
+    case kUpb_CType_Enum:
+      return "enum";
+    case kUpb_CType_Bool:
+      return "bool";
+    case kUpb_CType_String:
+      return "string";
+    case kUpb_CType_Bytes:
+      return "bytes";
+    case kUpb_CType_Message:
+      return "message";
+  }
+  UPB_UNREACHABLE();
+}
+
 static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e,
                               upb_MessageValue* val) {
   if (PyUnicode_Check(obj)) {
@@ -176,6 +207,20 @@ static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e,
   }
 }
 
+bool PyUpb_IsNumpyNdarray(PyObject* obj, const upb_FieldDef* f) {
+  PyObject* type_name_obj =
+      PyObject_GetAttrString((PyObject*)Py_TYPE(obj), "__name__");
+  bool is_ndarray = false;
+  if (!strcmp(PyUpb_GetStrData(type_name_obj), "ndarray")) {
+    PyErr_Format(PyExc_TypeError,
+                 "%S has type ndarray, but expected one of: %s", obj,
+                 upb_FieldDef_TypeString(f));
+    is_ndarray = true;
+  }
+  Py_DECREF(type_name_obj);
+  return is_ndarray;
+}
+
 bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
                    upb_Arena* arena) {
   switch (upb_FieldDef_CType(f)) {
@@ -190,12 +235,15 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
     case kUpb_CType_UInt64:
       return PyUpb_GetUint64(obj, &val->uint64_val);
     case kUpb_CType_Float:
+      if (PyUpb_IsNumpyNdarray(obj, f)) return false;
       val->float_val = PyFloat_AsDouble(obj);
       return !PyErr_Occurred();
     case kUpb_CType_Double:
+      if (PyUpb_IsNumpyNdarray(obj, f)) return false;
       val->double_val = PyFloat_AsDouble(obj);
       return !PyErr_Occurred();
     case kUpb_CType_Bool:
+      if (PyUpb_IsNumpyNdarray(obj, f)) return false;
       val->bool_val = PyLong_AsLong(obj);
       return !PyErr_Occurred();
     case kUpb_CType_Bytes: {
@@ -223,6 +271,7 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
       return true;
     }
     case kUpb_CType_Message:
+      // TODO(b/238226055): Include ctype in error message.
       PyErr_Format(PyExc_ValueError, "Message objects may not be assigned",
                    upb_FieldDef_CType(f));
       return false;
@@ -392,3 +441,5 @@ bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
   return upb_Message_UnknownFieldsAreEqual(uf1, usize1, uf2, usize2, 100) ==
          kUpb_UnknownCompareResult_Equal;
 }
+
+#include "upb/port_undef.inc"
diff --git a/python/pb_unit_tests/numpy_test_wrapper.py b/python/pb_unit_tests/numpy_test_wrapper.py
@@ -27,13 +27,188 @@
 
 # begin:google_only
 # from google.protobuf.internal.numpy_test import *
-#
-# # TODO(b/227379846): upb does not match pure-python and fast cpp behavior for
-# # assignment of numpy arrays to proto float or multidimensional arrays to
-# # repeated fields yet.
-# NumpyFloatProtoTest.testNumpyFloatArrayToScalar_RaisesTypeError.__unittest_expecting_failure__ = True
-# NumpyFloatProtoTest.testNumpyDim2FloatArrayToRepeated_RaisesTypeError.__unittest_expecting_failure__ = True
 # end:google_only
 
+# begin:github_only
+# TODO(b/240447513) Delete workaround after numpy_test is open-sourced in
+# protobuf github.
+import unittest
+
+import numpy as np
+
+from google.protobuf import unittest_pb2
+from google.protobuf.internal import testing_refleaks
+
+message = unittest_pb2.TestAllTypes()
+np_float_scalar = np.float64(0.0)
+np_1_float_array = np.zeros(shape=(1,), dtype=np.float64)
+np_2_float_array = np.zeros(shape=(2,), dtype=np.float64)
+np_11_float_array = np.zeros(shape=(1, 1), dtype=np.float64)
+np_22_float_array = np.zeros(shape=(2, 2), dtype=np.float64)
+
+np_int_scalar = np.int64(0)
+np_1_int_array = np.zeros(shape=(1,), dtype=np.int64)
+np_2_int_array = np.zeros(shape=(2,), dtype=np.int64)
+np_11_int_array = np.zeros(shape=(1, 1), dtype=np.int64)
+np_22_int_array = np.zeros(shape=(2, 2), dtype=np.int64)
+
+np_uint_scalar = np.uint64(0)
+np_1_uint_array = np.zeros(shape=(1,), dtype=np.uint64)
+np_2_uint_array = np.zeros(shape=(2,), dtype=np.uint64)
+np_11_uint_array = np.zeros(shape=(1, 1), dtype=np.uint64)
+np_22_uint_array = np.zeros(shape=(2, 2), dtype=np.uint64)
+
+np_bool_scalar = np.bool_(False)
+np_1_bool_array = np.zeros(shape=(1,), dtype=np.bool_)
+np_2_bool_array = np.zeros(shape=(2,), dtype=np.bool_)
+np_11_bool_array = np.zeros(shape=(1, 1), dtype=np.bool_)
+np_22_bool_array = np.zeros(shape=(2, 2), dtype=np.bool_)
+
+@testing_refleaks.TestCase
+class NumpyIntProtoTest(unittest.TestCase):
+
+  # Assigning dim 1 ndarray of ints to repeated field should pass
+  def testNumpyDim1IntArrayToRepeated_IsValid(self):
+    message.repeated_int64[:] = np_1_int_array
+    message.repeated_int64[:] = np_2_int_array
+
+    message.repeated_uint64[:] = np_1_uint_array
+    message.repeated_uint64[:] = np_2_uint_array
+
+  # Assigning dim 2 ndarray of ints to repeated field should fail
+  def testNumpyDim2IntArrayToRepeated_RaisesTypeError(self):
+    with self.assertRaises(TypeError):
+      message.repeated_int64[:] = np_11_int_array
+    with self.assertRaises(TypeError):
+      message.repeated_int64[:] = np_22_int_array
+
+    with self.assertRaises(TypeError):
+      message.repeated_uint64[:] = np_11_uint_array
+    with self.assertRaises(TypeError):
+      message.repeated_uint64[:] = np_22_uint_array
+
+  # Assigning any ndarray of floats to repeated int field should fail
+  def testNumpyFloatArrayToRepeated_RaisesTypeError(self):
+    with self.assertRaises(TypeError):
+      message.repeated_int64[:] = np_1_float_array
+    with self.assertRaises(TypeError):
+      message.repeated_int64[:] = np_11_float_array
+    with self.assertRaises(TypeError):
+      message.repeated_int64[:] = np_22_float_array
+
+  # Assigning any np int to scalar field should pass
+  def testNumpyIntScalarToScalar_IsValid(self):
+    message.optional_int64 = np_int_scalar
+    message.optional_uint64 = np_uint_scalar
+
+  # Assigning any ndarray of ints to scalar field should fail
+  def testNumpyIntArrayToScalar_RaisesTypeError(self):
+    with self.assertRaises(TypeError):
+      message.optional_int64 = np_1_int_array
+    with self.assertRaises(TypeError):
+      message.optional_int64 = np_11_int_array
+    with self.assertRaises(TypeError):
+      message.optional_int64 = np_22_int_array
+
+    with self.assertRaises(TypeError):
+      message.optional_uint64 = np_1_uint_array
+    with self.assertRaises(TypeError):
+      message.optional_uint64 = np_11_uint_array
+    with self.assertRaises(TypeError):
+      message.optional_uint64 = np_22_uint_array
+
+  # Assigning any ndarray of floats to scalar field should fail
+  def testNumpyFloatArrayToScalar_RaisesTypeError(self):
+    with self.assertRaises(TypeError):
+      message.optional_int64 = np_1_float_array
+    with self.assertRaises(TypeError):
+      message.optional_int64 = np_11_float_array
+    with self.assertRaises(TypeError):
+      message.optional_int64 = np_22_float_array
+
+@testing_refleaks.TestCase
+class NumpyFloatProtoTest(unittest.TestCase):
+
+  # Assigning dim 1 ndarray of floats to repeated field should pass
+  def testNumpyDim1FloatArrayToRepeated_IsValid(self):
+    message.repeated_float[:] = np_1_float_array
+    message.repeated_float[:] = np_2_float_array
+
+  # Assigning dim 2 ndarray of floats to repeated field should fail
+  def testNumpyDim2FloatArrayToRepeated_RaisesTypeError(self):
+    with self.assertRaises(TypeError):
+      message.repeated_float[:] = np_11_float_array
+    with self.assertRaises(TypeError):
+      message.repeated_float[:] = np_22_float_array
+
+  # Assigning any np float to scalar field should pass
+  def testNumpyFloatScalarToScalar_IsValid(self):
+    message.optional_float = np_float_scalar
+
+  # Assigning any ndarray of float to scalar field should fail
+  def testNumpyFloatArrayToScalar_RaisesTypeError(self):
+    with self.assertRaises(TypeError):
+      message.optional_float = np_1_float_array
+    with self.assertRaises(TypeError):
+      message.optional_float = np_11_float_array
+    with self.assertRaises(TypeError):
+      message.optional_float = np_22_float_array
+
+@testing_refleaks.TestCase
+class NumpyBoolProtoTest(unittest.TestCase):
+
+  # Assigning dim 1 ndarray of bool to repeated field should pass
+  def testNumpyDim1BoolArrayToRepeated_IsValid(self):
+    message.repeated_bool[:] = np_1_bool_array
+    message.repeated_bool[:] = np_2_bool_array
+
+  # Assigning dim 2 ndarray of bool to repeated field should fail
+  def testNumpyDim2BoolArrayToRepeated_RaisesTypeError(self):
+    with self.assertRaises(TypeError):
+      message.repeated_bool[:] = np_11_bool_array
+    with self.assertRaises(TypeError):
+      message.repeated_bool[:] = np_22_bool_array
+
+  # Assigning any np bool to scalar field should pass
+  def testNumpyBoolScalarToScalar_IsValid(self):
+    message.optional_bool = np_bool_scalar
+
+  # Assigning any ndarray of bool to scalar field should fail
+  def testNumpyBoolArrayToScalar_RaisesTypeError(self):
+    with self.assertRaises(TypeError):
+      message.optional_bool = np_1_bool_array
+    with self.assertRaises(TypeError):
+      message.optional_bool = np_11_bool_array
+    with self.assertRaises(TypeError):
+      message.optional_bool = np_22_bool_array
+
+@testing_refleaks.TestCase
+class NumpyProtoIndexingTest(unittest.TestCase):
+
+  def testNumpyIntScalarIndexing_Passes(self):
+    data = unittest_pb2.TestAllTypes(repeated_int64=[0, 1, 2])
+    self.assertEqual(0, data.repeated_int64[np.int64(0)])
+
+  def testNumpyNegative1IntScalarIndexing_Passes(self):
+    data = unittest_pb2.TestAllTypes(repeated_int64=[0, 1, 2])
+    self.assertEqual(2, data.repeated_int64[np.int64(-1)])
+
+  def testNumpyFloatScalarIndexing_Fails(self):
+    data = unittest_pb2.TestAllTypes(repeated_int64=[0, 1, 2])
+    with self.assertRaises(TypeError):
+      _ = data.repeated_int64[np.float64(0.0)]
+
+  def testNumpyIntArrayIndexing_Fails(self):
+    data = unittest_pb2.TestAllTypes(repeated_int64=[0, 1, 2])
+    with self.assertRaises(TypeError):
+      _ = data.repeated_int64[np.array([0])]
+    with self.assertRaises(TypeError):
+      _ = data.repeated_int64[np.ndarray((1,), buffer=np.array([0]), dtype=int)]
+    with self.assertRaises(TypeError):
+      _ = data.repeated_int64[np.ndarray((1, 1),
+                                         buffer=np.array([0]),
+                                         dtype=int)]
+# end:github_only
+
 if __name__ == '__main__':
   unittest.main(verbosity=2)