-
Notifications
You must be signed in to change notification settings - Fork 42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Allow DataFrame binary ops to align on either axis and with loc… #544
Changes from all commits
1e0c39e
78369f7
bd5a8d1
3d74bef
b0624ba
ed4eef0
f00de99
f9e99a2
420ea03
8128b84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# Copyright 2023 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
from __future__ import annotations | ||
|
||
import pandas as pd | ||
|
||
import bigframes.core.indexes as index | ||
import bigframes.series as series | ||
|
||
|
||
def to_bf_series(obj, default_index: index.Index) -> series.Series: | ||
if isinstance(obj, series.Series): | ||
return obj | ||
if isinstance(obj, pd.Series): | ||
return series.Series(obj) | ||
if isinstance(obj, index.Index): | ||
return series.Series(obj, default_index) | ||
if isinstance(obj, pd.Index): | ||
return series.Series(obj, default_index) | ||
if pd.api.types.is_list_like(obj): | ||
return series.Series(obj, default_index) | ||
else: | ||
raise TypeError(f"Cannot interpret {obj} as series.") | ||
|
||
|
||
def to_pd_series(obj, default_index: pd.Index) -> pd.Series: | ||
if isinstance(obj, series.Series): | ||
return obj.to_pandas() | ||
if isinstance(obj, pd.Series): | ||
return obj | ||
if isinstance(obj, index.Index): | ||
return pd.Series(obj.to_pandas(), default_index) | ||
if isinstance(obj, pd.Index): | ||
return pd.Series(obj, default_index) | ||
if pd.api.types.is_list_like(obj): | ||
return pd.Series(obj, default_index) | ||
else: | ||
raise TypeError(f"Cannot interpret {obj} as series.") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
|
||
import bigframes | ||
import bigframes._config.display_options as display_options | ||
import bigframes.core.indexes as bf_indexes | ||
import bigframes.dataframe as dataframe | ||
import bigframes.series as series | ||
from tests.system.utils import ( | ||
|
@@ -2074,6 +2075,37 @@ def test_series_binop_axis_index( | |
assert_pandas_df_equal(bf_result, pd_result) | ||
|
||
|
||
@skip_legacy_pandas | ||
@pytest.mark.parametrize( | ||
("input"), | ||
[ | ||
((1000, 2000, 3000)), | ||
(pd.Index([1000, 2000, 3000])), | ||
(bf_indexes.Index([1000, 2000, 3000])), | ||
(pd.Series((1000, 2000), index=["int64_too", "float64_col"])), | ||
(series.Series((1000, 2000), index=["int64_too", "float64_col"])), | ||
], | ||
ids=[ | ||
"tuple", | ||
"pd_index", | ||
"bf_index", | ||
"pd_series", | ||
"bf_series", | ||
], | ||
) | ||
def test_listlike_binop_axis_1(scalars_dfs, input): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add axis=1 to some existing tests for various binary ops? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. axis=1 completely changes alignment so the objects would just fail to align on this axis for existing tests. I'm not too worried about the different ops breaking, the actual scalar op compilation doesn't change for axis=1, the big worry is alignment failing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, right, the existing objects wouldn't wouldn't work. Okay, sounds good. |
||
scalars_df, scalars_pandas_df = scalars_dfs | ||
|
||
df_columns = ["int64_col", "float64_col", "int64_too"] | ||
|
||
bf_result = scalars_df[df_columns].add(input, axis=1).to_pandas() | ||
if hasattr(input, "to_pandas"): | ||
input = input.to_pandas() | ||
pd_result = scalars_pandas_df[df_columns].add(input, axis=1) | ||
|
||
assert_pandas_df_equal(bf_result, pd_result, check_dtype=False) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("left_labels", "right_labels"), | ||
[ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
might need to add pandas.DataFrame to the type hint for other?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For what purpose? Mypy is able to infer based on the
isinstance
checks.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, gotcha, but should we remove the other type hints then? Just for consistency/cleanliness.