Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Remove index requirement from some dataframe APIs #1073

Merged
merged 2 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,8 @@ def vector_search(
)
if index_col_ids is not None:
df = query._session.read_gbq(sql, index_col=index_col_ids)
df.index.names = index_labels
else:
df = query._session.read_gbq(sql)
df.index.names = index_labels

return df
15 changes: 9 additions & 6 deletions bigframes/core/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,15 @@ def eval(df: dataframe.DataFrame, expr: str, target: Optional[dataframe.DataFram
Returns:
Result of evaluation.
"""
index_resolver = {
vendored_pandas_eval_parsing.clean_column_name(str(name)): EvalSeries(
df.index.get_level_values(level).to_series()
)
for level, name in enumerate(df.index.names)
}
if df._has_index:
index_resolver = {
vendored_pandas_eval_parsing.clean_column_name(str(name)): EvalSeries(
df.index.get_level_values(level).to_series()
)
for level, name in enumerate(df.index.names)
}
else:
index_resolver = {}
column_resolver = {
vendored_pandas_eval_parsing.clean_column_name(str(name)): EvalSeries(series)
for name, series in df.items()
Expand Down
3 changes: 2 additions & 1 deletion bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ def _apply_series_binop_axis_0(
reverse: bool = False,
) -> DataFrame:
bf_series = bigframes.core.convert.to_bf_series(
other, self.index, self._session
other, self.index if self._has_index else None, self._session
)
aligned_block, columns, expr_pairs = self._block._align_axis_0(
bf_series._block, how=how
Expand Down Expand Up @@ -3179,6 +3179,7 @@ def to_gbq(
clustering_columns: Union[pandas.Index, Iterable[typing.Hashable]] = (),
labels: dict[str, str] = {},
) -> str:
index = index and self._has_index
temp_table_ref = None

if destination_table is None:
Expand Down
53 changes: 53 additions & 0 deletions tests/system/small/test_null_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,23 @@
from tests.system.utils import skip_legacy_pandas


def test_null_index_to_gbq(session, scalars_df_null_index, dataset_id_not_created):
dataset_id = dataset_id_not_created
destination_table = f"{dataset_id}.scalars_df_unindexed"

result_table = scalars_df_null_index.to_gbq(
destination_table, clustering_columns=["int64_col"]
)
assert (
result_table == destination_table
if destination_table
else result_table is not None
)

loaded_scalars_df_index = session.read_gbq(result_table)
assert not loaded_scalars_df_index.empty


def test_null_index_materialize(scalars_df_null_index, scalars_pandas_df_default_index):
bf_result = scalars_df_null_index.to_pandas()
pd.testing.assert_frame_equal(
Expand Down Expand Up @@ -83,6 +100,23 @@ def test_null_index_aggregate(scalars_df_null_index, scalars_pandas_df_default_i
)


def test_null_index_binop_series_axis_0(
scalars_df_null_index, scalars_pandas_df_default_index
):
bf_result = (
scalars_df_null_index[["int64_col", "int64_too"]]
.add(scalars_df_null_index["int64_col"], axis=0)
.to_pandas()
)
pd_result = scalars_pandas_df_default_index[["int64_col", "int64_too"]].add(
scalars_pandas_df_default_index.int64_col, axis=0
)

pd.testing.assert_frame_equal(
bf_result, pd_result, check_dtype=False, check_index_type=False
)


def test_null_index_groupby_aggregate(
scalars_df_null_index, scalars_pandas_df_default_index
):
Expand Down Expand Up @@ -139,6 +173,25 @@ def test_null_index_merge_left_null_index_object(
assert got.shape == expected.shape


@skip_legacy_pandas
@pytest.mark.parametrize(
("expr",),
[
("new_col = int64_col + int64_too",),
("new_col = (rowindex > 3) | bool_col",),
("int64_too = bool_col\nnew_col2 = rowindex",),
],
)
def test_null_index_df_eval(
scalars_df_null_index, scalars_pandas_df_default_index, expr
):

bf_result = scalars_df_null_index.eval(expr).to_pandas()
pd_result = scalars_pandas_df_default_index.eval(expr)

pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)


def test_null_index_merge_right_null_index_object(
scalars_df_null_index, scalars_df_default_index, scalars_pandas_df_default_index
):
Expand Down