Skip to content

Commit

Permalink
test: check extreme DATE/DATETIME values can be loaded from pandas Da…
Browse files Browse the repository at this point in the history
…taFrame (#1078)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Towards #1076 🦕
(edit: moved to googleapis/python-db-dtypes-pandas#45 )
  • Loading branch information
tswast authored Dec 2, 2021
1 parent a135956 commit be6eb34
Showing 1 changed file with 20 additions and 21 deletions.
41 changes: 20 additions & 21 deletions tests/system/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
See: https://github.com/googleapis/google-cloud-python/issues/7370
"""
# Schema with all scalar types.
scalars_schema = (
table_schema = (
bigquery.SchemaField("bool_col", "BOOLEAN"),
bigquery.SchemaField("bytes_col", "BYTES"),
bigquery.SchemaField("date_col", "DATE"),
Expand All @@ -283,15 +283,6 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
bigquery.SchemaField("ts_col", "TIMESTAMP"),
)

table_schema = scalars_schema + (
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
# mode mismatch. See:
# https://issuetracker.google.com/133415569#comment3
# bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"),
# TODO: Support writing StructArrays to Parquet. See:
# https://jira.apache.org/jira/browse/ARROW-2587
# bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema),
)
num_rows = 100
nulls = [None] * num_rows
df_data = [
Expand Down Expand Up @@ -372,7 +363,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
# See:
# https://github.com/googleapis/python-bigquery/issues/61
# https://issuetracker.google.com/issues/151765076
scalars_schema = (
table_schema = (
bigquery.SchemaField("row_num", "INTEGER"),
bigquery.SchemaField("bool_col", "BOOLEAN"),
bigquery.SchemaField("bytes_col", "BYTES"),
bigquery.SchemaField("date_col", "DATE"),
Expand All @@ -387,17 +379,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
bigquery.SchemaField("ts_col", "TIMESTAMP"),
)

table_schema = scalars_schema + (
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
# mode mismatch. See:
# https://issuetracker.google.com/133415569#comment3
# bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"),
# TODO: Support writing StructArrays to Parquet. See:
# https://jira.apache.org/jira/browse/ARROW-2587
# bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema),
)

df_data = [
("row_num", [1, 2, 3]),
("bool_col", [True, None, False]),
("bytes_col", [b"abc", None, b"def"]),
("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]),
Expand Down Expand Up @@ -464,6 +447,22 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
assert tuple(table.schema) == table_schema
assert table.num_rows == 3

result = bigquery_client.list_rows(table).to_dataframe()
result.sort_values("row_num", inplace=True)

# Check that extreme DATE/DATETIME values are loaded correctly.
# https://github.com/googleapis/python-bigquery/issues/1076
assert result["date_col"][0] == datetime.date(1, 1, 1)
assert result["date_col"][2] == datetime.date(9999, 12, 31)
assert result["dt_col"][0] == datetime.datetime(1, 1, 1, 0, 0, 0)
assert result["dt_col"][2] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
assert result["ts_col"][0] == datetime.datetime(
1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
)
assert result["ts_col"][2] == datetime.datetime(
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
)


def test_load_table_from_dataframe_w_struct_datatype(bigquery_client, dataset_id):
"""Test that a DataFrame with struct datatype can be uploaded if a
Expand Down

0 comments on commit be6eb34

Please sign in to comment.