From f95bf754302ceaf79a5d379e8d357d3103b188d2 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Mon, 7 Oct 2024 23:44:38 +0000 Subject: [PATCH 1/2] fix: correct zero row count display in DataFrame from table view --- bigframes/core/nodes.py | 4 +++- tests/system/small/test_dataframe.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index e65040686e..9c30bc762b 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -508,6 +508,7 @@ class GbqTable: table_id: str = field() physical_schema: Tuple[bq.SchemaField, ...] = field() n_rows: int = field() + is_view: bool = field() cluster_cols: typing.Optional[Tuple[str, ...]] @staticmethod @@ -523,6 +524,7 @@ def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable: table_id=table.table_id, physical_schema=schema, n_rows=table.num_rows, + is_view=(table.table_type == "VIEW"), cluster_cols=None if table.clustering_fields is None else tuple(table.clustering_fields), @@ -603,7 +605,7 @@ def variables_introduced(self) -> int: @property def row_count(self) -> typing.Optional[int]: - if self.source.sql_predicate is None: + if self.source.sql_predicate is None and not self.source.table.is_view: return self.source.table.n_rows return None diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index cfd6efe9bd..6793a2bb6c 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -1521,6 +1521,13 @@ def test_shape(scalars_dfs): assert bf_result == pd_result +def test_view_shape(session): + view_df = session.read_gbq("bigframes-dev.bigframes_tests_sys.base_table_view") + table_ref = session.read_gbq("bigframes-dev.bigframes_tests_sys.base_table") + + assert view_df.shape == table_ref.shape + + def test_len(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs bf_result = len(scalars_df) From 24cd146ccacab6f203e931e4f48d784e5e75b951 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Tue, 8 Oct 2024 00:23:09 +0000 Subject: [PATCH 2/2] update logic and test --- bigframes/core/nodes.py | 6 +++--- tests/system/small/test_dataframe.py | 21 +++++++++++++++++---- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 9c30bc762b..d750ee63fb 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -508,7 +508,7 @@ class GbqTable: table_id: str = field() physical_schema: Tuple[bq.SchemaField, ...] = field() n_rows: int = field() - is_view: bool = field() + is_physical_table: bool = field() cluster_cols: typing.Optional[Tuple[str, ...]] @staticmethod @@ -524,7 +524,7 @@ def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable: table_id=table.table_id, physical_schema=schema, n_rows=table.num_rows, - is_view=(table.table_type == "VIEW"), + is_physical_table=(table.table_type == "TABLE"), cluster_cols=None if table.clustering_fields is None else tuple(table.clustering_fields), @@ -605,7 +605,7 @@ def variables_introduced(self) -> int: @property def row_count(self) -> typing.Optional[int]: - if self.source.sql_predicate is None and not self.source.table.is_view: + if self.source.sql_predicate is None and self.source.table.is_physical_table: return self.source.table.n_rows return None diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 6793a2bb6c..6ee9fb8247 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -1521,11 +1521,24 @@ def test_shape(scalars_dfs): assert bf_result == pd_result -def test_view_shape(session): - view_df = session.read_gbq("bigframes-dev.bigframes_tests_sys.base_table_view") - table_ref = session.read_gbq("bigframes-dev.bigframes_tests_sys.base_table") +@pytest.mark.parametrize( + "reference_table, test_table", + [ + ( + "bigframes-dev.bigframes_tests_sys.base_table", + "bigframes-dev.bigframes_tests_sys.base_table_view", + ), + ( + "bigframes-dev.bigframes_tests_sys.csv_native_table", + "bigframes-dev.bigframes_tests_sys.csv_external_table", + ), + ], +) +def test_view_and_external_table_shape(session, reference_table, test_table): + reference_df = session.read_gbq(reference_table) + test_df = session.read_gbq(test_table) - assert view_df.shape == table_ref.shape + assert test_df.shape == reference_df.shape def test_len(scalars_dfs):