From 5b355efde122ed76b1cff39900ab8f94f5a13a30 Mon Sep 17 00:00:00 2001 From: TrevorBergeron Date: Wed, 20 Nov 2024 17:43:53 -0800 Subject: [PATCH] fix: Fix error loading local dataframes into bigquery (#1165) --- bigframes/session/loader.py | 2 ++ tests/system/small/test_dataframe.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py index 90c7d2dc9e..a6a2c88e18 100644 --- a/bigframes/session/loader.py +++ b/bigframes/session/loader.py @@ -158,10 +158,12 @@ def read_pandas_load_job( ordering_col = f"rowid_{suffix}" suffix += 1 + # Maybe should just convert to pyarrow or parquet? pandas_dataframe_copy = pandas_dataframe.copy() pandas_dataframe_copy.index.names = new_idx_ids pandas_dataframe_copy.columns = pandas.Index(new_col_ids) pandas_dataframe_copy[ordering_col] = np.arange(pandas_dataframe_copy.shape[0]) + pandas_dataframe_copy = pandas_dataframe_copy.reset_index(drop=False) job_config = bigquery.LoadJobConfig() # Specify the datetime dtypes, which is auto-detected as timestamp types. diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index f69eb2eb4a..12ca13eb80 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -99,6 +99,20 @@ def test_df_construct_pandas_load_job(scalars_dfs_maybe_ordered): assert_dfs_equivalent(pd_result, bf_result) +def test_df_construct_structs(session): + pd_frame = pd.Series( + [ + {"version": 1, "project": "pandas"}, + {"version": 2, "project": "pandas"}, + {"version": 1, "project": "numpy"}, + ] + ).to_frame() + bf_series = session.read_pandas(pd_frame) + pd.testing.assert_frame_equal( + bf_series.to_pandas(), pd_frame, check_index_type=False, check_dtype=False + ) + + def test_df_construct_pandas_set_dtype(scalars_dfs): columns = [ "int64_too",