Skip to content

Commit

Permalink
ARROW-9451: [Python] Refuse implicit cast of str to unsigned integer
Browse files Browse the repository at this point in the history
Ensure that `pa.array(['5'], type='uint32')` raises an exception.

Closes apache#9912 from pitrou/ARROW-9451-implicit-str-to-int

Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
pitrou committed Apr 7, 2021
1 parent f742968 commit d95c72f
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 13 deletions.
12 changes: 5 additions & 7 deletions cpp/src/arrow/python/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ Status CIntFromPythonImpl(PyObject* obj, Int* out, const std::string& overflow_m
// PyLong_AsUnsignedLong() and PyLong_AsUnsignedLongLong() don't handle
// conversion from non-ints (e.g. np.uint64), so do it ourselves
if (!PyLong_Check(obj)) {
ref.reset(PyNumber_Long(obj));
ref.reset(PyNumber_Index(obj));
if (!ref) {
RETURN_IF_PYERROR();
}
Expand Down Expand Up @@ -348,16 +348,14 @@ bool IsPandasTimestamp(PyObject* obj) {
}

Status InvalidValue(PyObject* obj, const std::string& why) {
std::string obj_as_str;
RETURN_NOT_OK(internal::PyObject_StdStringStr(obj, &obj_as_str));
return Status::Invalid("Could not convert ", obj_as_str, " with type ",
auto obj_as_str = PyObject_StdStringRepr(obj);
return Status::Invalid("Could not convert ", std::move(obj_as_str), " with type ",
Py_TYPE(obj)->tp_name, ": ", why);
}

Status InvalidType(PyObject* obj, const std::string& why) {
std::string obj_as_str;
RETURN_NOT_OK(internal::PyObject_StdStringStr(obj, &obj_as_str));
return Status::TypeError("Could not convert ", obj_as_str, " with type ",
auto obj_as_str = PyObject_StdStringRepr(obj);
return Status::TypeError("Could not convert ", std::move(obj_as_str), " with type ",
Py_TYPE(obj)->tp_name, ": ", why);
}

Expand Down
20 changes: 14 additions & 6 deletions python/pyarrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import decimal
import itertools
import math
import re

import hypothesis as h
import numpy as np
Expand All @@ -42,7 +43,7 @@
(np.uint64, pa.uint64())]


np_int_types, _ = zip(*int_type_pairs)
np_int_types, pa_int_types = zip(*int_type_pairs)


class StrangeIterable:
Expand Down Expand Up @@ -433,6 +434,14 @@ def test_unsigned_integer_overflow(bits):
pa.array([-1], ty)


@parametrize_with_iterable_types
@pytest.mark.parametrize("typ", pa_int_types)
def test_integer_from_string_error(seq, typ):
# ARROW-9451: pa.array(['1'], type=pa.uint32()) should not succeed
with pytest.raises(pa.ArrowInvalid):
pa.array(seq(['1']), type=typ)


def test_convert_with_mask():
data = [1, 2, 3, 4, 5]
mask = np.array([False, True, False, False, True])
Expand Down Expand Up @@ -1684,7 +1693,7 @@ def test_struct_from_list_of_pairs_errors():
# type inference
template = (
r"Could not convert {} with type {}: was expecting tuple of "
r"\(key, value\) pair"
r"(key, value) pair"
)
cases = [
tuple(), # empty key-value pair
Expand All @@ -1693,10 +1702,9 @@ def test_struct_from_list_of_pairs_errors():
'string', # not a tuple
]
for key_value_pair in cases:
msg = template.format(
str(key_value_pair).replace('(', r'\(').replace(')', r'\)'),
type(key_value_pair).__name__
)
msg = re.escape(template.format(
repr(key_value_pair), type(key_value_pair).__name__
))

with pytest.raises(TypeError, match=msg):
pa.array([
Expand Down

0 comments on commit d95c72f

Please sign in to comment.