diff --git a/bigframes/core/convert.py b/bigframes/core/convert.py new file mode 100644 index 0000000000..98f854ad72 --- /dev/null +++ b/bigframes/core/convert.py @@ -0,0 +1,49 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import pandas as pd + +import bigframes.core.indexes as index +import bigframes.series as series + + +def to_bf_series(obj, default_index: index.Index) -> series.Series: + if isinstance(obj, series.Series): + return obj + if isinstance(obj, pd.Series): + return series.Series(obj) + if isinstance(obj, index.Index): + return series.Series(obj, default_index) + if isinstance(obj, pd.Index): + return series.Series(obj, default_index) + if pd.api.types.is_list_like(obj): + return series.Series(obj, default_index) + else: + raise TypeError(f"Cannot interpret {obj} as series.") + + +def to_pd_series(obj, default_index: pd.Index) -> pd.Series: + if isinstance(obj, series.Series): + return obj.to_pandas() + if isinstance(obj, pd.Series): + return obj + if isinstance(obj, index.Index): + return pd.Series(obj.to_pandas(), default_index) + if isinstance(obj, pd.Index): + return pd.Series(obj, default_index) + if pd.api.types.is_list_like(obj): + return pd.Series(obj, default_index) + else: + raise TypeError(f"Cannot interpret {obj} as series.") diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 066b082490..97a100474a 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -50,6 +50,7 @@ from bigframes.core import log_adapter import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks +import bigframes.core.convert import bigframes.core.expression as ex import bigframes.core.groupby as groupby import bigframes.core.guid @@ -663,22 +664,20 @@ def _apply_binop( how: str = "outer", reverse: bool = False, ): - if isinstance(other, (float, int)): + if isinstance(other, (float, int, bool)): return self._apply_scalar_binop(other, op, reverse=reverse) - elif isinstance(other, indexes.Index): - return self._apply_series_binop( - other.to_series(index=self.index), - op, - axis=axis, - how=how, - reverse=reverse, - ) - elif isinstance(other, bigframes.series.Series): - return self._apply_series_binop( - other, op, axis=axis, how=how, reverse=reverse - ) elif isinstance(other, DataFrame): return self._apply_dataframe_binop(other, op, how=how, reverse=reverse) + elif isinstance(other, pandas.DataFrame): + return self._apply_dataframe_binop( + DataFrame(other), op, how=how, reverse=reverse + ) + elif utils.get_axis_number(axis) == 0: + bf_series = bigframes.core.convert.to_bf_series(other, self.index) + return self._apply_series_binop_axis_0(bf_series, op, how, reverse) + elif utils.get_axis_number(axis) == 1: + pd_series = bigframes.core.convert.to_pd_series(other, self.columns) + return self._apply_series_binop_axis_1(pd_series, op, how, reverse) raise NotImplementedError( f"binary operation is not implemented on the second operand of type {type(other).__name__}." f"{constants.FEEDBACK_LINK}" @@ -700,22 +699,13 @@ def _apply_scalar_binop( block = block.drop_columns([column_id]) return DataFrame(block) - def _apply_series_binop( + def _apply_series_binop_axis_0( self, other: bigframes.series.Series, op: ops.BinaryOp, - axis: str | int = "columns", how: str = "outer", reverse: bool = False, ) -> DataFrame: - if axis not in ("columns", "index", 0, 1): - raise ValueError(f"Invalid input: axis {axis}.") - - if axis in ("columns", 1): - raise NotImplementedError( - f"Row Series operations haven't been supported. {constants.FEEDBACK_LINK}" - ) - block, (get_column_left, get_column_right) = self._block.join( other._block, how=how ) @@ -738,6 +728,63 @@ def _apply_series_binop( block = block.with_index_labels(self.index.names) return DataFrame(block) + def _apply_series_binop_axis_1( + self, + other: pandas.Series, + op: ops.BinaryOp, + how: str = "outer", + reverse: bool = False, + ) -> DataFrame: + # Somewhat different alignment than df-df so separate codepath for now. + if self.columns.equals(other.index): + columns, lcol_indexer, rcol_indexer = self.columns, None, None + else: + if not (self.columns.is_unique and other.index.is_unique): + raise ValueError("Cannot align non-unique indices") + columns, lcol_indexer, rcol_indexer = self.columns.join( + other.index, how=how, return_indexers=True + ) + + binop_result_ids = [] + + column_indices = zip( + lcol_indexer if (lcol_indexer is not None) else range(len(columns)), + rcol_indexer if (rcol_indexer is not None) else range(len(columns)), + ) + + block = self._block + for left_index, right_index in column_indices: + if left_index >= 0 and right_index >= 0: # -1 indices indicate missing + self_col_id = self._block.value_columns[left_index] + other_scalar = other.iloc[right_index] + expr = ( + op.as_expr(ex.const(other_scalar), self_col_id) + if reverse + else op.as_expr(self_col_id, ex.const(other_scalar)) + ) + elif left_index >= 0: + self_col_id = self._block.value_columns[left_index] + expr = ( + op.as_expr(ex.const(None), self_col_id) + if reverse + else op.as_expr(self_col_id, ex.const(None)) + ) + elif right_index >= 0: + other_scalar = other.iloc[right_index] + expr = ( + op.as_expr(ex.const(other_scalar), ex.const(None)) + if reverse + else op.as_expr(ex.const(None), ex.const(other_scalar)) + ) + else: + # Should not be possible + raise ValueError("No right or left index.") + block, result_col_id = block.project_expr(expr) + binop_result_ids.append(result_col_id) + + block = block.select_columns(binop_result_ids) + return DataFrame(block.with_column_labels(columns)) + def _apply_dataframe_binop( self, other: DataFrame, diff --git a/bigframes/typing.py b/bigframes/typing.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index cf907b02d6..ae80a088b5 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -27,6 +27,7 @@ import bigframes import bigframes._config.display_options as display_options +import bigframes.core.indexes as bf_indexes import bigframes.dataframe as dataframe import bigframes.series as series from tests.system.utils import ( @@ -2074,6 +2075,37 @@ def test_series_binop_axis_index( assert_pandas_df_equal(bf_result, pd_result) +@skip_legacy_pandas +@pytest.mark.parametrize( + ("input"), + [ + ((1000, 2000, 3000)), + (pd.Index([1000, 2000, 3000])), + (bf_indexes.Index([1000, 2000, 3000])), + (pd.Series((1000, 2000), index=["int64_too", "float64_col"])), + (series.Series((1000, 2000), index=["int64_too", "float64_col"])), + ], + ids=[ + "tuple", + "pd_index", + "bf_index", + "pd_series", + "bf_series", + ], +) +def test_listlike_binop_axis_1(scalars_dfs, input): + scalars_df, scalars_pandas_df = scalars_dfs + + df_columns = ["int64_col", "float64_col", "int64_too"] + + bf_result = scalars_df[df_columns].add(input, axis=1).to_pandas() + if hasattr(input, "to_pandas"): + input = input.to_pandas() + pd_result = scalars_pandas_df[df_columns].add(input, axis=1) + + assert_pandas_df_equal(bf_result, pd_result, check_dtype=False) + + @pytest.mark.parametrize( ("left_labels", "right_labels"), [