Skip to content

Commit

Permalink
refactor: re-arrange describe() logic into two helper methods (#1005)
Browse files Browse the repository at this point in the history
* refactor: re-arrange describe logic into two helper methods

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
sycai and gcf-owl-bot[bot] authored Sep 24, 2024
1 parent 4cb62fd commit 5c1a4c7
Showing 1 changed file with 35 additions and 39 deletions.
74 changes: 35 additions & 39 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2302,52 +2302,19 @@ def melt(
self._block.melt(id_col_ids, val_col_ids, var_name, value_name)
)

_NUMERIC_DESCRIBE_AGGS = (
"count",
"mean",
"std",
"min",
"25%",
"50%",
"75%",
"max",
)
_NON_NUMERIC_DESCRIBE_AGGS = ("count", "nunique")

def describe(self, include: None | Literal["all"] = None) -> DataFrame:

allowed_non_numeric_types = {
bigframes.dtypes.STRING_DTYPE,
bigframes.dtypes.BOOL_DTYPE,
bigframes.dtypes.BYTES_DTYPE,
}

if include is None:
numeric_df = self._drop_non_numeric(permissive=False)
if len(numeric_df.columns) == 0:
# Describe eligible non-numeric columns
result = self.select_dtypes(include=allowed_non_numeric_types).agg(
self._NON_NUMERIC_DESCRIBE_AGGS
)
else:
# Otherwise, only describe numeric columns
result = numeric_df.agg(self._NUMERIC_DESCRIBE_AGGS)
return typing.cast(DataFrame, result)
return self._describe_non_numeric()

elif include == "all":
numeric_result = typing.cast(
DataFrame,
self._drop_non_numeric(permissive=False).agg(
self._NUMERIC_DESCRIBE_AGGS
),
)
# Otherwise, only describe numeric columns
return self._describe_numeric()

non_numeric_result = typing.cast(
DataFrame,
self.select_dtypes(include=allowed_non_numeric_types).agg(
self._NON_NUMERIC_DESCRIBE_AGGS
),
)
elif include == "all":
numeric_result = self._describe_numeric()
non_numeric_result = self._describe_non_numeric()

if len(numeric_result.columns) == 0:
return non_numeric_result
Expand All @@ -2364,6 +2331,35 @@ def describe(self, include: None | Literal["all"] = None) -> DataFrame:
else:
raise ValueError(f"Unsupported include type: {include}")

def _describe_numeric(self) -> DataFrame:
return typing.cast(
DataFrame,
self._drop_non_numeric(permissive=False).agg(
[
"count",
"mean",
"std",
"min",
"25%",
"50%",
"75%",
"max",
]
),
)

def _describe_non_numeric(self) -> DataFrame:
return typing.cast(
DataFrame,
self.select_dtypes(
include={
bigframes.dtypes.STRING_DTYPE,
bigframes.dtypes.BOOL_DTYPE,
bigframes.dtypes.BYTES_DTYPE,
}
).agg(["count", "nunique"]),
)

def skew(self, *, numeric_only: bool = False):
if not numeric_only:
frame = self._raise_on_non_numeric("skew")
Expand Down

0 comments on commit 5c1a4c7

Please sign in to comment.