Skip to content

Commit

Permalink
CLN/BUG: fix ndarray assignment may cause unexpected cast
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhrks committed Jan 8, 2017
1 parent b895968 commit 562f43a
Show file tree
Hide file tree
Showing 11 changed files with 324 additions and 143 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Bug Fixes
- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`)
- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
<<<<<<< f26b049786624ed983f2718687c23e3f1adbb670
- Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`)
- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
is not scalar and ``values`` is not specified (:issue:`14380`)
10 changes: 10 additions & 0 deletions doc/source/whatsnew/v0.19.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,20 @@ Bug Fixes
- Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`)
- Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`)
- Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`)
<<<<<<< HEAD
- Bug in ``DataFrame.combine_first()`` for integer columns (:issue:`14687`).
- Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`)
- Bug in ``pd.read_csv()`` in which the ``nrows`` parameter was not being respected for large input when using the C engine for parsing (:issue:`7626`)
- Bug in ``pd.merge_asof()`` could not handle timezone-aware DatetimeIndex when a tolerance was specified (:issue:`14844`)
=======


- Bug in assignment against datetime-like data with ``int`` may incorrectly converted to datetime-like (:issue:`14145`)
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)



>>>>>>> CLN/BUG: fix ndarray assignment may cause unexpected cast
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
- Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`)
- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)
23 changes: 9 additions & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
import numpy as np
import numpy.ma as ma

from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar,
from pandas.types.cast import (_maybe_upcast,
_cast_scalar_to_array,
_possibly_cast_to_datetime,
_possibly_infer_to_datetimelike,
_possibly_convert_platform,
Expand Down Expand Up @@ -333,15 +334,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
raise_with_traceback(exc)

if arr.ndim == 0 and index is not None and columns is not None:
if isinstance(data, compat.string_types) and dtype is None:
dtype = np.object_
if dtype is None:
dtype, data = _infer_dtype_from_scalar(data)

values = np.empty((len(index), len(columns)), dtype=dtype)
values.fill(data)
mgr = self._init_ndarray(values, index, columns, dtype=dtype,
copy=False)
values = _cast_scalar_to_array((len(index), len(columns)),
data, dtype=dtype)
mgr = self._init_ndarray(values, index, columns,
dtype=values.dtype, copy=False)
else:
raise PandasError('DataFrame constructor not properly called!')

Expand Down Expand Up @@ -455,7 +451,7 @@ def _get_axes(N, K, index=index, columns=columns):
values = _prep_ndarray(values, copy=copy)

if dtype is not None:
if values.dtype != dtype:
if not is_dtype_equal(values.dtype, dtype):
try:
values = values.astype(dtype)
except Exception as orig:
Expand Down Expand Up @@ -2641,9 +2637,8 @@ def reindexer(value):

else:
# upcast the scalar
dtype, value = _infer_dtype_from_scalar(value)
value = np.repeat(value, len(self.index)).astype(dtype)
value = _possibly_cast_to_datetime(value, dtype)
value = _cast_scalar_to_array(len(self.index), value)
value = _possibly_cast_to_datetime(value, value.dtype)

# return internal types directly
if is_extension_type(value):
Expand Down
176 changes: 94 additions & 82 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
is_null_datelike_scalar)
import pandas.types.concat as _concat

from pandas.types.generic import ABCSeries
from pandas.types.generic import ABCSeries, ABCDatetimeIndex
from pandas.core.common import is_null_slice
import pandas.core.algorithms as algos

Expand Down Expand Up @@ -379,7 +379,8 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,

# fillna, but if we cannot coerce, then try again as an ObjectBlock
try:
values, _, value, _ = self._try_coerce_args(self.values, value)
values, _, _, _ = self._try_coerce_args(self.values, value)
# value may be converted to internal, thus drop
blocks = self.putmask(mask, value, inplace=inplace)
blocks = [b.make_block(values=self._try_coerce_result(b.values))
for b in blocks]
Expand Down Expand Up @@ -673,8 +674,43 @@ def setitem(self, indexer, value, mgr=None):
if self.is_numeric:
value = np.nan

# coerce args
values, _, value, _ = self._try_coerce_args(self.values, value)
# coerce if block dtype can store value
values = self.values
try:
values, _, value, _ = self._try_coerce_args(values, value)
# can keep its own dtype
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
value.dtype):
dtype = self.dtype
else:
dtype = 'infer'

except (TypeError, ValueError):
# current dtype cannot store value, coerce to common dtype
find_dtype = False

if hasattr(value, 'dtype'):
dtype = value.dtype
find_dtype = True

elif is_scalar(value):
if isnull(value):
# NaN promotion is handled in latter path
dtype = False
else:
dtype, _ = _infer_dtype_from_scalar(value,
pandas_dtype=True)
find_dtype = True
else:
dtype = 'infer'

if find_dtype:
dtype = _find_common_type([values.dtype, dtype])
if not is_dtype_equal(self.dtype, dtype):
b = self.astype(dtype)
return b.setitem(indexer, value, mgr=mgr)

# value must be storeable at this moment
arr_value = np.array(value)

# cast the values to a type that can hold nan (if necessary)
Expand Down Expand Up @@ -704,87 +740,52 @@ def setitem(self, indexer, value, mgr=None):
raise ValueError("cannot set using a slice indexer with a "
"different length than the value")

try:

def _is_scalar_indexer(indexer):
# return True if we are all scalar indexers

if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return all([is_scalar(idx) for idx in indexer])
return False

def _is_empty_indexer(indexer):
# return a boolean if we have an empty indexer
def _is_scalar_indexer(indexer):
# return True if we are all scalar indexers

if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return any(isinstance(idx, np.ndarray) and len(idx) == 0
for idx in indexer)
return False

# empty indexers
# 8669 (empty)
if _is_empty_indexer(indexer):
pass

# setting a single element for each dim and with a rhs that could
# be say a list
# GH 6043
elif _is_scalar_indexer(indexer):
values[indexer] = value

# if we are an exact match (ex-broadcasting),
# then use the resultant dtype
elif (len(arr_value.shape) and
arr_value.shape[0] == values.shape[0] and
np.prod(arr_value.shape) == np.prod(values.shape)):
values[indexer] = value
values = values.astype(arr_value.dtype)

# set
else:
values[indexer] = value

# coerce and try to infer the dtypes of the result
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
value.dtype):
dtype = value.dtype
elif is_scalar(value):
dtype, _ = _infer_dtype_from_scalar(value)
else:
dtype = 'infer'
values = self._try_coerce_and_cast_result(values, dtype)
block = self.make_block(transf(values), fastpath=True)

# may have to soft convert_objects here
if block.is_object and not self.is_object:
block = block.convert(numeric=False)

return block
except ValueError:
raise
except TypeError:
if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return all([is_scalar(idx) for idx in indexer])
return False

# cast to the passed dtype if possible
# otherwise raise the original error
try:
# e.g. we are uint32 and our value is uint64
# this is for compat with older numpies
block = self.make_block(transf(values.astype(value.dtype)))
return block.setitem(indexer=indexer, value=value, mgr=mgr)
def _is_empty_indexer(indexer):
# return a boolean if we have an empty indexer

except:
pass

raise
if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return any(isinstance(idx, np.ndarray) and len(idx) == 0
for idx in indexer)
return False

except Exception:
# empty indexers
# 8669 (empty)
if _is_empty_indexer(indexer):
pass

return [self]
# setting a single element for each dim and with a rhs that could
# be say a list
# GH 6043
elif _is_scalar_indexer(indexer):
values[indexer] = value

# if we are an exact match (ex-broadcasting),
# then use the resultant dtype
elif (len(arr_value.shape) and
arr_value.shape[0] == values.shape[0] and
np.prod(arr_value.shape) == np.prod(values.shape)):
values[indexer] = value
values = values.astype(arr_value.dtype)

# set
else:
values[indexer] = value

# coerce and try to infer the dtypes of the result
values = self._try_coerce_and_cast_result(values, dtype)
block = self.make_block(transf(values), fastpath=True)
return block

def putmask(self, mask, new, align=True, inplace=False, axis=0,
transpose=False, mgr=None):
Expand Down Expand Up @@ -1255,6 +1256,7 @@ def func(cond, values, other):

values, values_mask, other, other_mask = self._try_coerce_args(
values, other)

try:
return self._try_coerce_result(expressions.where(
cond, values, other, raise_on_error=True))
Expand Down Expand Up @@ -1534,6 +1536,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
new = new[mask]

mask = _safe_reshape(mask, new_values.shape)

new_values[mask] = new
new_values = self._try_coerce_result(new_values)
return [self.make_block(values=new_values)]
Expand Down Expand Up @@ -1703,7 +1706,7 @@ def fillna(self, value, **kwargs):

# allow filling with integers to be
# interpreted as seconds
if not isinstance(value, np.timedelta64) and is_integer(value):
if not isinstance(value, np.timedelta64):
value = Timedelta(value, unit='s')
return super(TimeDeltaBlock, self).fillna(value, **kwargs)

Expand Down Expand Up @@ -1937,6 +1940,15 @@ def _maybe_downcast(self, blocks, downcast=None):
def _can_hold_element(self, element):
return True

def _try_coerce_args(self, values, other):
""" provide coercion to our input arguments """

if isinstance(other, ABCDatetimeIndex):
# to store DatetimeTZBlock as object
other = other.asobject.values

return values, False, other, False

def _try_cast(self, element):
return element

Expand Down Expand Up @@ -2276,8 +2288,6 @@ def _try_coerce_args(self, values, other):
"naive Block")
other_mask = isnull(other)
other = other.asm8.view('i8')
elif hasattr(other, 'dtype') and is_integer_dtype(other):
other = other.view('i8')
else:
try:
other = np.asarray(other)
Expand Down Expand Up @@ -2453,6 +2463,8 @@ def _try_coerce_args(self, values, other):
raise ValueError("incompatible or non tz-aware value")
other_mask = isnull(other)
other = other.value
else:
raise TypeError

return values, values_mask, other, other_mask

Expand Down
13 changes: 5 additions & 8 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np

from pandas.types.cast import (_infer_dtype_from_scalar,
_cast_scalar_to_array,
_possibly_cast_item)
from pandas.types.common import (is_integer, is_list_like,
is_string_like, is_scalar)
Expand Down Expand Up @@ -166,11 +167,9 @@ def _init_data(self, data, copy, dtype, **kwargs):
copy = False
dtype = None
elif is_scalar(data) and all(x is not None for x in passed_axes):
if dtype is None:
dtype, data = _infer_dtype_from_scalar(data)
values = np.empty([len(x) for x in passed_axes], dtype=dtype)
values.fill(data)
mgr = self._init_matrix(values, passed_axes, dtype=dtype,
values = _cast_scalar_to_array([len(x) for x in passed_axes],
data, dtype=dtype)
mgr = self._init_matrix(values, passed_axes, dtype=values.dtype,
copy=False)
copy = False
else: # pragma: no cover
Expand Down Expand Up @@ -570,9 +569,7 @@ def __setitem__(self, key, value):
shape[1:], tuple(map(int, value.shape))))
mat = np.asarray(value)
elif is_scalar(value):
dtype, value = _infer_dtype_from_scalar(value)
mat = np.empty(shape[1:], dtype=dtype)
mat.fill(value)
mat = _cast_scalar_to_array(shape[1:], value)
else:
raise TypeError('Cannot set item of type: %s' % str(type(value)))

Expand Down
Loading

0 comments on commit 562f43a

Please sign in to comment.