Skip to content

Commit

Permalink
Fix fillna not to change index values. (#1241)
Browse files Browse the repository at this point in the history
`DataFrame.fillna()` should not change the index values.

```py
>>> import pandas as pd
>>> import numpy as np
>>> pdf = pd.DataFrame({'x': [np.nan, 2, 3, 4, np.nan, 6],
...                     'y': [1, 2, np.nan, 4, np.nan, np.nan],
...                     'z': [1, 2, 3, 4, np.nan, np.nan]}).set_index(['x', 'y'])
>>>
>>> pdf.fillna(-1)
           z
x   y
NaN 1.0  1.0
2.0 2.0  2.0
3.0 NaN  3.0
4.0 4.0  4.0
NaN NaN -1.0
6.0 NaN -1.0
>>> pdf.fillna({'x': -1, 'y': -2, 'z': -5})
           z
x   y
NaN 1.0  1.0
2.0 2.0  2.0
3.0 NaN  3.0
4.0 4.0  4.0
NaN NaN -5.0
6.0 NaN -5.0
```

whereas:

```py
>>> ks.from_pandas(pdf).fillna(-1)
             z
x    y
-1.0  1.0  1.0
 2.0  2.0  2.0
 3.0 -1.0  3.0
 4.0  4.0  4.0
-1.0 -1.0 -1.0
 6.0 -1.0 -1.0
>>> ks.from_pandas(pdf).fillna({'x': -1, 'y': -2, 'z': -5})
             z
x    y
-1.0  1.0  1.0
 2.0  2.0  2.0
 3.0 -2.0  3.0
 4.0  4.0  4.0
-1.0 -2.0 -5.0
 6.0 -2.0 -5.0
```
  • Loading branch information
ueshin authored Jan 30, 2020
1 parent 2e24a3d commit 4bdfe2d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
6 changes: 4 additions & 2 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4091,10 +4091,12 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None):
for v in value.values():
if not isinstance(v, (float, int, str, bool)):
raise TypeError("Unsupported type %s" % type(v))
value = {self._internal.column_name_for(key): value for key, value in value.items()}
value = {k if isinstance(k, tuple) else (k,): v for k, v in value.items()}
value = {self._internal.column_name_for(k): v for k, v in value.items()
if k in self._internal.column_index}
if limit is not None:
raise ValueError('limit parameter for value is not support now')
sdf = self._sdf.fillna(value)
sdf = self._sdf.fillna(value, subset=self._internal.data_columns)
kdf = DataFrame(self._internal.copy(
sdf=sdf,
column_scols=[scol_for(sdf, col) for col in self._internal.data_columns]))
Expand Down
12 changes: 9 additions & 3 deletions databricks/koalas/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,11 +609,10 @@ def test_fillna(self):
self.assert_eq(pdf.fillna(method='bfill'), kdf.fillna(method='bfill'))
self.assert_eq(pdf.fillna(method='bfill', limit=2), kdf.fillna(method='bfill', limit=2))

pdf = pd.DataFrame({'x': np.random.rand(6),
'y': np.random.rand(6),
'z': [1, 2, 3, 4, np.nan, np.nan]}).set_index(['x', 'y'])
pdf = pdf.set_index(['x', 'y'])
kdf = ks.from_pandas(pdf)
# check multi index
self.assert_eq(kdf.fillna(-1), pdf.fillna(-1))
self.assert_eq(pdf.fillna(method='bfill'), kdf.fillna(method='bfill'))
self.assert_eq(pdf.fillna(method='ffill'), kdf.fillna(method='ffill'))

Expand Down Expand Up @@ -656,6 +655,13 @@ def test_fillna(self):
self.assert_eq(pdf.fillna(method='bfill'), kdf.fillna(method='bfill'))
self.assert_eq(pdf.fillna(method='bfill', limit=2), kdf.fillna(method='bfill', limit=2))

# check multi index
pdf = pdf.set_index([('x', 'a'), ('x', 'b')])
kdf = ks.from_pandas(pdf)
self.assert_eq(kdf.fillna(-1), pdf.fillna(-1))
self.assert_eq(kdf.fillna({('x', 'a'): -1, ('x', 'b'): -2, ('y', 'c'): -5}),
pdf.fillna({('x', 'a'): -1, ('x', 'b'): -2, ('y', 'c'): -5}))

def test_isnull(self):
pdf = pd.DataFrame({'x': [1, 2, 3, 4, None, 6], 'y': list('abdabd')},
index=np.random.rand(6))
Expand Down

0 comments on commit 4bdfe2d

Please sign in to comment.