Skip to content

Commit

Permalink
fix(eda.data_array): handle empty df correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
dovahcrow committed Sep 9, 2020
1 parent a6b6e3b commit 97db86d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 6 deletions.
9 changes: 8 additions & 1 deletion dataprep/eda/create_report/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
for create_report(df) function."""

from typing import Any, Dict, List, Optional, Tuple, Union
from warnings import catch_warnings, filterwarnings

import dask
import dask.dataframe as dd
Expand Down Expand Up @@ -80,7 +81,13 @@ def format_basic(df: dd.DataFrame) -> Dict[str, Any]:
# aggregate all computations
data, completions = basic_computations(df)

(data,) = dask.compute(data)
with catch_warnings():
filterwarnings(
"ignore",
"invalid value encountered in true_divide",
category=RuntimeWarning,
)
(data,) = dask.compute(data)

# results dictionary
res: Dict[str, Any] = {}
Expand Down
13 changes: 8 additions & 5 deletions dataprep/eda/data_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,9 @@ def __init__(

if isinstance(df, dd.DataFrame):
is_pandas = False
self._ddf = df.astype({col: np.object for col in cat_cols})
if cat_cols and df.shape[1] != 0:
df = df.astype({col: np.object for col in cat_cols})
self._ddf = df
elif isinstance(df, DataArray):
self._ddf = df._ddf
self._values = df._values
Expand Down Expand Up @@ -329,10 +331,11 @@ def __getitem__(self, indexer: Union[Sequence[str], str]) -> "DataArray":
df = DataArray(subdf)
df._values = self.values[:, cidx] # pylint: disable=W0212

# coerce the array to it's minimal type
dtype = reduce(np.promote_types, df.dtypes.values)
if df._values.dtype != dtype:
df._values = df._values.astype(dtype)
if df.shape[1] != 0:
# coerce the array to it's minimal type
dtype = reduce(np.promote_types, df.dtypes.values)
if df._values.dtype != dtype:
df._values = df._values.astype(dtype)

df._nulls = self.nulls[:, cidx] # pylint: disable=W0212
if self._head is not None:
Expand Down

0 comments on commit 97db86d

Please sign in to comment.