From da5a72bef653916844fed1d71659f6ad24d3297d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 13 Oct 2019 18:50:17 -0700 Subject: [PATCH 1/7] passing --- pandas/_libs/groupby.pyx | 31 ++++++++++++++++++++++--------- pandas/core/groupby/ops.py | 8 +++++++- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 3069bbbf34bb7..304983327dc59 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -8,7 +8,7 @@ import numpy as np cimport numpy as cnp from numpy cimport (ndarray, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t) + uint32_t, uint64_t, float32_t, float64_t, complex64_t, complex128_t) cnp.import_array() @@ -424,12 +424,18 @@ def group_any_all(uint8_t[:] out, # group_add, group_prod, group_var, group_mean, group_ohlc # ---------------------------------------------------------------------- +ctypedef fused complexfloating_t: + float64_t + float32_t + complex64_t + complex128_t + @cython.wraparound(False) @cython.boundscheck(False) -def _group_add(floating[:, :] out, +def _group_add(complexfloating_t[:, :] out, int64_t[:] counts, - floating[:, :] values, + complexfloating_t[:, :] values, const int64_t[:] labels, Py_ssize_t min_count=0): """ @@ -437,13 +443,14 @@ def _group_add(floating[:, :] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - floating val, count - floating[:, :] sumx, nobs + complexfloating_t val, count + complexfloating_t[:, :] sumx + int64_t[:, :] nobs if len(values) != len(labels): raise AssertionError("len(index) != len(labels)") - nobs = np.zeros_like(out) + nobs = np.zeros((len(out), out.shape[1]), dtype=np.int64) sumx = np.zeros_like(out) N, K = (values).shape @@ -461,7 +468,11 @@ def _group_add(floating[:, :] out, # not nan if val == val: nobs[lab, j] += 1 - sumx[lab, j] += val + if complexfloating_t is complex64_t or complexfloating_t is complex128_t: + # clang errors if we use += with these dtypes + sumx[lab, j] = sumx[lab, j] + val + else: + sumx[lab, j] += val for i in range(ncounts): for j in range(K): @@ -471,8 +482,10 @@ def _group_add(floating[:, :] out, out[i, j] = sumx[i, j] -group_add_float32 = _group_add['float'] -group_add_float64 = _group_add['double'] +group_add_float32 = _group_add['float32_t'] +group_add_float64 = _group_add['float64_t'] +group_add_complex64 = _group_add['float complex'] +group_add_complex128 = _group_add['double complex'] @cython.wraparound(False) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 40517eefe4d5d..a6dc38e48aa73 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -526,7 +526,13 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): func = self._get_cython_function(kind, how, values, is_numeric) except NotImplementedError: if is_numeric: - values = ensure_float64(values) + try: + values = ensure_float64(values) + except TypeError: + if lib.infer_dtype(values) == "complex": + values = values.astype(complex) + else: + raise func = self._get_cython_function(kind, how, values, is_numeric) else: raise From 0a3a4c180e172a8436c7508130f783c9d80ca089 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 16 Oct 2019 11:11:27 -0700 Subject: [PATCH 2/7] re-raise TypeError --- pandas/core/groupby/groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 92ea733cc3447..30b102ad6ce37 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1357,6 +1357,8 @@ def f(self, **kwargs): raise SpecificationError(str(e)) except DataError: pass + except TypeError: + raise except Exception: # TODO: the remaining test cases that get here are from: # - AttributeError from _cython_agg_blocks bug passing From ac6871a6f0e8c039a43e10233ab155bfa251b13a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 16 Oct 2019 11:33:11 -0700 Subject: [PATCH 3/7] catch some NotImplementedError --- pandas/core/groupby/groupby.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 30b102ad6ce37..c67ee61c28eec 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1359,6 +1359,11 @@ def f(self, **kwargs): pass except TypeError: raise + except NotImplementedError as err: + if "function is not implemented for this dtype" not in str(err): + # raised in _get_cython_function, in some cases can + # be trimmed by implementing cython funcs for more dtypes + raise except Exception: # TODO: the remaining test cases that get here are from: # - AttributeError from _cython_agg_blocks bug passing From 66db887a5d54733b3af8d1a90bcdbd2651438488 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 17 Oct 2019 10:44:40 -0700 Subject: [PATCH 4/7] Catch fewer cases --- pandas/core/groupby/groupby.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a5498d9413407..b27d5bb05ee8f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1340,16 +1340,17 @@ def f(self, **kwargs): # try a cython aggregation if we can try: return self._cython_agg_general(alias, alt=npfunc, **kwargs) - except AssertionError: - raise except DataError: pass - except TypeError: - raise except NotImplementedError as err: - if "function is not implemented for this dtype" not in str(err): + if "function is not implemented for this dtype" in str(err): # raised in _get_cython_function, in some cases can # be trimmed by implementing cython funcs for more dtypes + pass + elif "decimal does not support skipna=True" in str(err): + # FIXME: kludge for test_decimal:test_in_numeric_groupby + pass + else: raise # apply a non-cython aggregation From 948ac216a20d7ef830c85577370ad12ea596a050 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 17 Oct 2019 12:16:45 -0700 Subject: [PATCH 5/7] lint fixup --- pandas/_libs/groupby.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 7e3afc2be5cc5..2797943343af2 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -421,6 +421,7 @@ def group_any_all(uint8_t[:] out, if values[i] == flag_val: out[lab] = flag_val + # ---------------------------------------------------------------------- # group_add, group_prod, group_var, group_mean, group_ohlc # ---------------------------------------------------------------------- @@ -469,7 +470,8 @@ def _group_add(complexfloating_t[:, :] out, # not nan if val == val: nobs[lab, j] += 1 - if complexfloating_t is complex64_t or complexfloating_t is complex128_t: + if (complexfloating_t is complex64_t or + complexfloating_t is complex128_t): # clang errors if we use += with these dtypes sumx[lab, j] = sumx[lab, j] + val else: From 672dbe012777546cb48812c8af55073622568221 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 17 Oct 2019 12:17:27 -0700 Subject: [PATCH 6/7] specify skipna --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 167a08edb1a1b..e380cf5930f97 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -529,7 +529,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): try: values = ensure_float64(values) except TypeError: - if lib.infer_dtype(values) == "complex": + if lib.infer_dtype(values, skipna=False) == "complex": values = values.astype(complex) else: raise From 39ba0179ff67e0c6737d2048cefd206795d114ad Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 17 Oct 2019 13:56:07 -0700 Subject: [PATCH 7/7] lint fixup --- pandas/_libs/groupby.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 2797943343af2..68c21139e7384 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -471,7 +471,7 @@ def _group_add(complexfloating_t[:, :] out, if val == val: nobs[lab, j] += 1 if (complexfloating_t is complex64_t or - complexfloating_t is complex128_t): + complexfloating_t is complex128_t): # clang errors if we use += with these dtypes sumx[lab, j] = sumx[lab, j] + val else: