From be6eb3429f9fd6ad5839826983ba96a0e0a071d3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 2 Dec 2021 14:42:14 -0600 Subject: [PATCH] test: check extreme DATE/DATETIME values can be loaded from pandas DataFrame (#1078) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards #1076 🦕 (edit: moved to https://github.com/googleapis/python-db-dtypes-pandas/issues/45 ) --- tests/system/test_pandas.py | 41 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 1541dd3b9..f3534cd19 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -268,7 +268,7 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): See: https://github.com/googleapis/google-cloud-python/issues/7370 """ # Schema with all scalar types. - scalars_schema = ( + table_schema = ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), @@ -283,15 +283,6 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) num_rows = 100 nulls = [None] * num_rows df_data = [ @@ -372,7 +363,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id # See: # https://github.com/googleapis/python-bigquery/issues/61 # https://issuetracker.google.com/issues/151765076 - scalars_schema = ( + table_schema = ( + bigquery.SchemaField("row_num", "INTEGER"), bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), @@ -387,17 +379,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) - df_data = [ + ("row_num", [1, 2, 3]), ("bool_col", [True, None, False]), ("bytes_col", [b"abc", None, b"def"]), ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), @@ -464,6 +447,22 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id assert tuple(table.schema) == table_schema assert table.num_rows == 3 + result = bigquery_client.list_rows(table).to_dataframe() + result.sort_values("row_num", inplace=True) + + # Check that extreme DATE/DATETIME values are loaded correctly. + # https://github.com/googleapis/python-bigquery/issues/1076 + assert result["date_col"][0] == datetime.date(1, 1, 1) + assert result["date_col"][2] == datetime.date(9999, 12, 31) + assert result["dt_col"][0] == datetime.datetime(1, 1, 1, 0, 0, 0) + assert result["dt_col"][2] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + assert result["ts_col"][0] == datetime.datetime( + 1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc + ) + assert result["ts_col"][2] == datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + def test_load_table_from_dataframe_w_struct_datatype(bigquery_client, dataset_id): """Test that a DataFrame with struct datatype can be uploaded if a