From 01451fc23f9e0e15bdedb94b3558bcb338d126e0 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 14 Nov 2024 21:21:42 +0000 Subject: [PATCH 1/2] fix: Fix null index join with 'on' arg --- bigframes/dataframe.py | 20 ++++++++++++-------- tests/system/small/test_null_index.py | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 0b639a5649..185a756fed 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2696,6 +2696,7 @@ def join( self, other: DataFrame, *, on: Optional[str] = None, how: str = "left" ) -> DataFrame: left, right = self, other + if not left.columns.intersection(right.columns).empty: raise NotImplementedError( f"Deduping column names is not implemented. {constants.FEEDBACK_LINK}" @@ -2721,11 +2722,13 @@ def join( ) # Switch left index with on column left_columns = left.columns - left_idx_original_names = left.index.names + left_idx_original_names = left.index.names if left._has_index else () left_idx_names_in_cols = [ - f"bigframes_left_idx_name_{i}" for i in range(len(left.index.names)) + f"bigframes_left_idx_name_{i}" + for i in range(len(left_idx_original_names)) ] - left.index.names = left_idx_names_in_cols + if left._has_index: + left.index.names = left_idx_names_in_cols left = left.reset_index(drop=False) left = left.set_index(on) @@ -2736,11 +2739,12 @@ def join( combined_df = combined_df.set_index(left_idx_names_in_cols) # To be consistent with Pandas - combined_df.index.names = ( - left_idx_original_names - if how in ("inner", "left") - else ([None] * len(combined_df.index.names)) - ) + if combined_df._has_index: + combined_df.index.names = ( + left_idx_original_names + if how in ("inner", "left") + else ([None] * len(combined_df.index.names)) + ) # Reorder columns combined_df = combined_df[list(left_columns) + list(right.columns)] diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py index c5be49a56b..75dddc2cb3 100644 --- a/tests/system/small/test_null_index.py +++ b/tests/system/small/test_null_index.py @@ -268,6 +268,25 @@ def test_null_index_series_self_join( ) +def test_null_index_series_self_join_on( + scalars_df_null_index, scalars_pandas_df_default_index +): + # caller doesn't need index, but do need index on arg to join with 'on' + bf_result = scalars_df_null_index[["int64_col", "string_col"]].join( + scalars_df_null_index[["int64_too", "bool_col"]].set_index("int64_too"), + on="int64_col", + ) + pd_result = scalars_pandas_df_default_index[["int64_col", "string_col"]].join( + scalars_pandas_df_default_index[["int64_too", "bool_col"]].set_index( + "int64_too" + ), + on="int64_col", + ) + pd.testing.assert_frame_equal( + bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False + ) + + def test_null_index_series_self_aligns( scalars_df_null_index, scalars_pandas_df_default_index ): From bbc815d6415bbfa555add6e0b4ac6249829b4ea5 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 20 Nov 2024 01:20:48 +0000 Subject: [PATCH 2/2] add test --- tests/system/small/test_null_index.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py index 75dddc2cb3..da9baa0069 100644 --- a/tests/system/small/test_null_index.py +++ b/tests/system/small/test_null_index.py @@ -341,6 +341,25 @@ def test_null_index_df_concat(scalars_df_null_index, scalars_pandas_df_default_i ) +def test_null_index_map_dict_input( + scalars_df_null_index, scalars_pandas_df_default_index +): + + local_map = dict() + # construct a local map, incomplete to cover behavior + for s in scalars_pandas_df_default_index.string_col[:-3]: + if isinstance(s, str): + local_map[s] = ord(s[0]) + + pd_result = scalars_pandas_df_default_index.string_col.map(local_map) + pd_result = pd_result.astype("Int64") # pandas type differences + bf_result = scalars_df_null_index.string_col.map(local_map) + + pd.testing.assert_series_equal( + bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False + ) + + def test_null_index_align_error(scalars_df_null_index): with pytest.raises(bigframes.exceptions.NullIndexError): _ = (