Skip to content

Commit

Permalink
Implement Index.dropna & MultiIndex.dropna (#938)
Browse files Browse the repository at this point in the history
There is functionality `pandas.Index.dropna` (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Index.dropna.html#pandas.Index.dropna)

support it also for Koalas same as pandas.

```python
>>> df = ks.DataFrame([[1, 2], [4, 5], [7, 8]],
...                   index=['cobra', 'viper', None],
...                   columns=['max_speed', 'shield'])
>>> df
       max_speed  shield
cobra          1       2
viper          4       5
NaN            7       8

>>> df.index.dropna()
Index(['cobra', 'viper'], dtype='object')

>>> df.to_pandas().index.dropna()
Index(['cobra', 'viper'], dtype='object')
```
  • Loading branch information
itholic authored and HyukjinKwon committed Nov 25, 2019
1 parent bacd903 commit 29efe06
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 3 deletions.
58 changes: 57 additions & 1 deletion databricks/koalas/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
Wrappers for Indexes to behave similar to pandas Index, MultiIndex.
"""

from functools import partial
from functools import partial, reduce
from typing import Any, List, Optional, Tuple, Union

import pandas as pd
Expand All @@ -39,6 +39,7 @@
from databricks.koalas.series import Series
from databricks.koalas.internal import _InternalFrame
from databricks.koalas.utils import name_like_string
from databricks.koalas.internal import _InternalFrame


class Index(IndexOpsMixin):
Expand Down Expand Up @@ -449,6 +450,61 @@ def is_object(self):
"""
return is_object_dtype(self.dtype)

def dropna(self):
"""
Return Index or MultiIndex without NA/NaN values
Examples
--------
>>> df = ks.DataFrame([[1, 2], [4, 5], [7, 8]],
... index=['cobra', 'viper', None],
... columns=['max_speed', 'shield'])
>>> df
max_speed shield
cobra 1 2
viper 4 5
NaN 7 8
>>> df.index.dropna()
Index(['cobra', 'viper'], dtype='object')
Also support for MultiIndex
>>> midx = pd.MultiIndex([['lama', 'cow', 'falcon'],
... [None, 'weight', 'length']],
... [[0, 1, 1, 1, 1, 1, 2, 2, 2],
... [0, 1, 1, 0, 1, 2, 1, 1, 2]])
>>> s = ks.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, None],
... index=midx)
>>> s
lama NaN 45.0
cow weight 200.0
weight 1.2
NaN 30.0
weight 250.0
length 1.5
falcon weight 320.0
weight 1.0
length NaN
Name: 0, dtype: float64
>>> s.index.dropna() # doctest: +SKIP
MultiIndex([( 'cow', 'weight'),
( 'cow', 'weight'),
( 'cow', 'weight'),
( 'cow', 'length'),
('falcon', 'weight'),
('falcon', 'weight'),
('falcon', 'length')],
)
"""
kdf = self._kdf.copy()
sdf = kdf._internal.sdf.select(self._internal.index_scols).dropna()
internal = _InternalFrame(sdf=sdf, index_map=self._internal.index_map)
kdf = DataFrame(internal)
return Index(kdf) if type(self) == Index else MultiIndex(kdf)

def unique(self, level=None):
"""
Return unique values in the index.
Expand Down
2 changes: 0 additions & 2 deletions databricks/koalas/missing/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ class _MissingPandasLikeIndex(object):
drop = unsupported_function('drop')
drop_duplicates = unsupported_function('drop_duplicates')
droplevel = unsupported_function('droplevel')
dropna = unsupported_function('dropna')
duplicated = unsupported_function('duplicated')
equals = unsupported_function('equals')
factorize = unsupported_function('factorize')
Expand Down Expand Up @@ -145,7 +144,6 @@ class _MissingPandasLikeMultiIndex(object):
drop = unsupported_function('drop')
drop_duplicates = unsupported_function('drop_duplicates')
droplevel = unsupported_function('droplevel')
dropna = unsupported_function('dropna')
duplicated = unsupported_function('duplicated')
equal_levels = unsupported_function('equal_levels')
equals = unsupported_function('equals')
Expand Down
8 changes: 8 additions & 0 deletions docs/source/reference/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Missing Values
.. autosummary::
:toctree: api/

Index.dropna
Index.isna
Index.notna

Expand Down Expand Up @@ -96,6 +97,13 @@ MultiIndex Properties
MultiIndex.ndim
MultiIndex.T

MultiIndex Missing Values
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: api/

MultiIndex.dropna

MultiIndex Modifying and computations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
Expand Down

0 comments on commit 29efe06

Please sign in to comment.