From b53607015abb79be0aa5666681f1c53b5b1bc2b5 Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Mon, 7 Oct 2024 18:14:01 -0700 Subject: [PATCH] fix: correct zero row count in DataFrame from table view (#1062) * fix: correct zero row count display in DataFrame from table view * update logic and test --- bigframes/core/nodes.py | 4 +++- tests/system/small/test_dataframe.py | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index e65040686e..d750ee63fb 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -508,6 +508,7 @@ class GbqTable: table_id: str = field() physical_schema: Tuple[bq.SchemaField, ...] = field() n_rows: int = field() + is_physical_table: bool = field() cluster_cols: typing.Optional[Tuple[str, ...]] @staticmethod @@ -523,6 +524,7 @@ def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable: table_id=table.table_id, physical_schema=schema, n_rows=table.num_rows, + is_physical_table=(table.table_type == "TABLE"), cluster_cols=None if table.clustering_fields is None else tuple(table.clustering_fields), @@ -603,7 +605,7 @@ def variables_introduced(self) -> int: @property def row_count(self) -> typing.Optional[int]: - if self.source.sql_predicate is None: + if self.source.sql_predicate is None and self.source.table.is_physical_table: return self.source.table.n_rows return None diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index cfd6efe9bd..6ee9fb8247 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -1521,6 +1521,26 @@ def test_shape(scalars_dfs): assert bf_result == pd_result +@pytest.mark.parametrize( + "reference_table, test_table", + [ + ( + "bigframes-dev.bigframes_tests_sys.base_table", + "bigframes-dev.bigframes_tests_sys.base_table_view", + ), + ( + "bigframes-dev.bigframes_tests_sys.csv_native_table", + "bigframes-dev.bigframes_tests_sys.csv_external_table", + ), + ], +) +def test_view_and_external_table_shape(session, reference_table, test_table): + reference_df = session.read_gbq(reference_table) + test_df = session.read_gbq(test_table) + + assert test_df.shape == reference_df.shape + + def test_len(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs bf_result = len(scalars_df)