diff --git a/dataprep/eda/create_report/formatter.py b/dataprep/eda/create_report/formatter.py index 2e3aa6064..c3dc0cdea 100644 --- a/dataprep/eda/create_report/formatter.py +++ b/dataprep/eda/create_report/formatter.py @@ -2,6 +2,7 @@ for create_report(df) function.""" from typing import Any, Dict, List, Optional, Tuple, Union +from warnings import catch_warnings, filterwarnings import dask import dask.dataframe as dd @@ -80,7 +81,13 @@ def format_basic(df: dd.DataFrame) -> Dict[str, Any]: # aggregate all computations data, completions = basic_computations(df) - (data,) = dask.compute(data) + with catch_warnings(): + filterwarnings( + "ignore", + "invalid value encountered in true_divide", + category=RuntimeWarning, + ) + (data,) = dask.compute(data) # results dictionary res: Dict[str, Any] = {} diff --git a/dataprep/eda/data_array.py b/dataprep/eda/data_array.py index f10a689de..e49692e1d 100644 --- a/dataprep/eda/data_array.py +++ b/dataprep/eda/data_array.py @@ -196,7 +196,9 @@ def __init__( if isinstance(df, dd.DataFrame): is_pandas = False - self._ddf = df.astype({col: np.object for col in cat_cols}) + if cat_cols and df.shape[1] != 0: + df = df.astype({col: np.object for col in cat_cols}) + self._ddf = df elif isinstance(df, DataArray): self._ddf = df._ddf self._values = df._values @@ -329,10 +331,11 @@ def __getitem__(self, indexer: Union[Sequence[str], str]) -> "DataArray": df = DataArray(subdf) df._values = self.values[:, cidx] # pylint: disable=W0212 - # coerce the array to it's minimal type - dtype = reduce(np.promote_types, df.dtypes.values) - if df._values.dtype != dtype: - df._values = df._values.astype(dtype) + if df.shape[1] != 0: + # coerce the array to it's minimal type + dtype = reduce(np.promote_types, df.dtypes.values) + if df._values.dtype != dtype: + df._values = df._values.astype(dtype) df._nulls = self.nulls[:, cidx] # pylint: disable=W0212 if self._head is not None: