From 4f4b2ced2b7ac5295111a454e1416e8644ee09fb Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 13 Dec 2019 00:01:12 -0800 Subject: [PATCH 1/2] Introduce _IndexerLike. (#1126) Simple refines. --- databricks/koalas/indexing.py | 92 ++++++++++------------------------- 1 file changed, 27 insertions(+), 65 deletions(-) diff --git a/databricks/koalas/indexing.py b/databricks/koalas/indexing.py index 3f2098d4df..9b62722998 100644 --- a/databricks/koalas/indexing.py +++ b/databricks/koalas/indexing.py @@ -85,7 +85,31 @@ def _unfold(key, kseries): return rows_sel, cols_sel -class AtIndexer(object): +class _IndexerLike(object): + + def __init__(self, kdf_or_kser): + from databricks.koalas.frame import DataFrame + from databricks.koalas.series import Series + assert isinstance(kdf_or_kser, (DataFrame, Series)), \ + 'unexpected argument type: {}'.format(type(kdf_or_kser)) + self._kdf_or_kser = kdf_or_kser + + @property + def _is_df(self): + from databricks.koalas.frame import DataFrame + return isinstance(self._kdf_or_kser, DataFrame) + + @property + def _is_series(self): + from databricks.koalas.series import Series + return isinstance(self._kdf_or_kser, Series) + + @property + def _internal(self): + return self._kdf_or_kser._internal + + +class AtIndexer(_IndexerLike): """ Access a single value for a row/column label pair. If the index is not unique, all matching pairs are returned as an array. @@ -122,26 +146,6 @@ class AtIndexer(object): >>> kdf.at[5, 'B'] array([ 4, 20]) """ - def __init__(self, kdf_or_kser): - from databricks.koalas.frame import DataFrame - from databricks.koalas.series import Series - assert isinstance(kdf_or_kser, (DataFrame, Series)), \ - 'unexpected argument type: {}'.format(type(kdf_or_kser)) - self._kdf_or_kser = kdf_or_kser - - @property - def _is_df(self): - from databricks.koalas.frame import DataFrame - return isinstance(self._kdf_or_kser, DataFrame) - - @property - def _is_series(self): - from databricks.koalas.series import Series - return isinstance(self._kdf_or_kser, Series) - - @property - def _internal(self): - return self._kdf_or_kser._internal def __getitem__(self, key): if self._is_df: @@ -181,7 +185,7 @@ def __getitem__(self, key): or len(values) > 1) else values[0] -class LocIndexer(object): +class LocIndexer(_IndexerLike): """ Access a group of rows and columns by label(s) or a boolean Series. @@ -357,27 +361,6 @@ class LocIndexer(object): 9 7 8 """ - def __init__(self, kdf_or_kser): - from databricks.koalas.frame import DataFrame - from databricks.koalas.series import Series - assert isinstance(kdf_or_kser, (DataFrame, Series)), \ - 'unexpected argument type: {}'.format(type(kdf_or_kser)) - self._kdf_or_kser = kdf_or_kser - - @property - def _is_df(self): - from databricks.koalas.frame import DataFrame - return isinstance(self._kdf_or_kser, DataFrame) - - @property - def _is_series(self): - from databricks.koalas.series import Series - return isinstance(self._kdf_or_kser, Series) - - @property - def _internal(self): - return self._kdf_or_kser._internal - def __getitem__(self, key): from databricks.koalas.frame import DataFrame from databricks.koalas.series import Series @@ -563,7 +546,7 @@ def __setitem__(self, key, value): self._kdf_or_kser[col_sel] = value -class ILocIndexer(object): +class ILocIndexer(_IndexerLike): """ Purely integer-location based indexing for selection by position. @@ -677,27 +660,6 @@ class ILocIndexer(object): 2 1000 3000 """ - def __init__(self, kdf_or_kser): - from databricks.koalas.frame import DataFrame - from databricks.koalas.series import Series - assert isinstance(kdf_or_kser, (DataFrame, Series)), \ - 'unexpected argument type: {}'.format(type(kdf_or_kser)) - self._kdf_or_kser = kdf_or_kser - - @property - def _is_df(self): - from databricks.koalas.frame import DataFrame - return isinstance(self._kdf_or_kser, DataFrame) - - @property - def _is_series(self): - from databricks.koalas.series import Series - return isinstance(self._kdf_or_kser, Series) - - @property - def _internal(self): - return self._kdf_or_kser._internal - def __getitem__(self, key): from databricks.koalas.frame import DataFrame from databricks.koalas.indexes import Index From f1ae487c8af2ac2abaf2f5e2aa3c1184dc0c8015 Mon Sep 17 00:00:00 2001 From: HaeJoon Lee <44108233+itholic@users.noreply.github.com> Date: Fri, 13 Dec 2019 17:33:44 +0900 Subject: [PATCH 2/2] 'isna' type functions should return proper message for MultiIndex (#1130) In pandas, `pd.MultiIndex.isna()`, `pd.MultiIndex.isnull()`, `pd.MultiIndex.notna()`, `pd.MultiIndex.notnull()` return same error message that `isna is not defined for MultiIndex` like below. ```python >>> pidx = pd.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2)]) >>> pidx.isna() Traceback (most recent call last): ... NotImplementedError: isna is not defined for MultiIndex >>> pidx.isnull() Traceback (most recent call last): ... NotImplementedError: isna is not defined for MultiIndex >>> pidx.notna() Traceback (most recent call last): ... NotImplementedError: isna is not defined for MultiIndex >>> pidx.notnull() Traceback (most recent call last): ... NotImplementedError: isna is not defined for MultiIndex ``` i think we'd better mimic them for our functions. --- databricks/koalas/base.py | 6 ++++++ databricks/koalas/tests/test_indexes.py | 23 +++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/databricks/koalas/base.py b/databricks/koalas/base.py index 165c631fb2..6c411a6b2b 100644 --- a/databricks/koalas/base.py +++ b/databricks/koalas/base.py @@ -558,6 +558,9 @@ def isnull(self): >>> ser.rename("a").to_frame().set_index("a").index.isna() Index([False, False, True], dtype='object', name='a') """ + from databricks.koalas.indexes import MultiIndex + if isinstance(self, MultiIndex): + raise NotImplementedError("isna is not defined for MultiIndex") if isinstance(self.spark_type, (FloatType, DoubleType)): return self._with_new_scol(self._scol.isNull() | F.isnan(self._scol)).rename(self.name) else: @@ -599,6 +602,9 @@ def notnull(self): >>> ser.rename("a").to_frame().set_index("a").index.notna() Index([True, True, False], dtype='object', name='a') """ + from databricks.koalas.indexes import MultiIndex + if isinstance(self, MultiIndex): + raise NotImplementedError("notna is not defined for MultiIndex") return (~self.isnull()).rename(self.name) notna = notnull diff --git a/databricks/koalas/tests/test_indexes.py b/databricks/koalas/tests/test_indexes.py index 3f2d2cf4d0..92a989aa1c 100644 --- a/databricks/koalas/tests/test_indexes.py +++ b/databricks/koalas/tests/test_indexes.py @@ -334,3 +334,26 @@ def test_index_fillna(self): with self.assertRaisesRegex(TypeError, "Unsupported type "): kidx.fillna([1, 2]) + + def test_multiindex_isna(self): + kidx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3)]) + + with self.assertRaisesRegex( + NotImplementedError, + "isna is not defined for MultiIndex"): + kidx.isna() + + with self.assertRaisesRegex( + NotImplementedError, + "isna is not defined for MultiIndex"): + kidx.isnull() + + with self.assertRaisesRegex( + NotImplementedError, + "notna is not defined for MultiIndex"): + kidx.notna() + + with self.assertRaisesRegex( + NotImplementedError, + "notna is not defined for MultiIndex"): + kidx.notnull()