Skip to content

Commit

Permalink
tests: add system tests for to_arrow with extreme values (#813)
Browse files Browse the repository at this point in the history
* tests: add system tests for `to_arrow` with extreme values

* fix bad merge

* revert pandas tests

* revert pandas tests

* fix link to decimal types

Co-authored-by: Peter Lamut <[email protected]>

* use north and south pole as extreme geography points

* add another row of extreme values

* base64 encode bytes columns

Co-authored-by: Peter Lamut <[email protected]>
  • Loading branch information
tswast and plamut authored Jul 27, 2021
1 parent da87fd9 commit c293e3c
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 2 deletions.
2 changes: 2 additions & 0 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def pyarrow_numeric():


def pyarrow_bignumeric():
# 77th digit is partial.
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
return pyarrow.decimal256(76, 38)


Expand Down
2 changes: 2 additions & 0 deletions tests/data/scalars.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
5 changes: 5 additions & 0 deletions tests/data/scalars_extreme.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"}
{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
62 changes: 62 additions & 0 deletions tests/data/scalars_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
[
{
"mode": "NULLABLE",
"name": "timestamp_col",
"type": "TIMESTAMP"
},
{
"mode": "NULLABLE",
"name": "time_col",
"type": "TIME"
},
{
"mode": "NULLABLE",
"name": "float64_col",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "datetime_col",
"type": "DATETIME"
},
{
"mode": "NULLABLE",
"name": "bignumeric_col",
"type": "BIGNUMERIC"
},
{
"mode": "NULLABLE",
"name": "numeric_col",
"type": "NUMERIC"
},
{
"mode": "NULLABLE",
"name": "geography_col",
"type": "GEOGRAPHY"
},
{
"mode": "NULLABLE",
"name": "date_col",
"type": "DATE"
},
{
"mode": "NULLABLE",
"name": "string_col",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "bool_col",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "bytes_col",
"type": "BYTES"
},
{
"mode": "NULLABLE",
"name": "int64_col",
"type": "INTEGER"
}
]
48 changes: 46 additions & 2 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import pathlib

from google.cloud import bigquery
import pytest
import test_utils.prefixer

from google.cloud import bigquery
from google.cloud.bigquery import enums
from . import helpers


prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system")

DATA_DIR = pathlib.Path(__file__).parent.parent / "data"


@pytest.fixture(scope="session", autouse=True)
def cleanup_datasets(bigquery_client: bigquery.Client):
Expand All @@ -36,6 +41,11 @@ def bigquery_client():
return bigquery.Client()


@pytest.fixture(scope="session")
def project_id(bigquery_client: bigquery.Client):
return bigquery_client.project


@pytest.fixture(scope="session")
def bqstorage_client(bigquery_client):
from google.cloud import bigquery_storage
Expand All @@ -54,3 +64,37 @@ def dataset_id(bigquery_client):
@pytest.fixture
def table_id(dataset_id):
return f"{dataset_id}.table_{helpers.temp_suffix()}"


@pytest.fixture(scope="session")
def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
job_config = bigquery.LoadJobConfig()
job_config.schema = schema
job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
full_table_id = f"{project_id}.{dataset_id}.scalars"
with open(DATA_DIR / "scalars.jsonl", "rb") as data_file:
job = bigquery_client.load_table_from_file(
data_file, full_table_id, job_config=job_config
)
job.result()
yield full_table_id
bigquery_client.delete_table(full_table_id)


@pytest.fixture(scope="session")
def scalars_extreme_table(
bigquery_client: bigquery.Client, project_id: str, dataset_id: str
):
schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
job_config = bigquery.LoadJobConfig()
job_config.schema = schema
job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
full_table_id = f"{project_id}.{dataset_id}.scalars_extreme"
with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file:
job = bigquery_client.load_table_from_file(
data_file, full_table_id, job_config=job_config
)
job.result()
yield full_table_id
bigquery_client.delete_table(full_table_id)
88 changes: 88 additions & 0 deletions tests/system/test_arrow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""System tests for Arrow connector."""

import pytest

pyarrow = pytest.importorskip(
"pyarrow", minversion="3.0.0"
) # Needs decimal256 for BIGNUMERIC columns.


@pytest.mark.parametrize(
("max_results", "scalars_table_name"),
(
(None, "scalars_table"), # Use BQ Storage API.
(10, "scalars_table"), # Use REST API.
(None, "scalars_extreme_table"), # Use BQ Storage API.
(10, "scalars_extreme_table"), # Use REST API.
),
)
def test_list_rows_nullable_scalars_dtypes(
bigquery_client,
scalars_table,
scalars_extreme_table,
max_results,
scalars_table_name,
):
table_id = scalars_table
if scalars_table_name == "scalars_extreme_table":
table_id = scalars_extreme_table
arrow_table = bigquery_client.list_rows(
table_id, max_results=max_results,
).to_arrow()

schema = arrow_table.schema
bignumeric_type = schema.field("bignumeric_col").type
# 77th digit is partial.
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
assert bignumeric_type.precision in {76, 77}
assert bignumeric_type.scale == 38

bool_type = schema.field("bool_col").type
assert bool_type.equals(pyarrow.bool_())

bytes_type = schema.field("bytes_col").type
assert bytes_type.equals(pyarrow.binary())

date_type = schema.field("date_col").type
assert date_type.equals(pyarrow.date32())

datetime_type = schema.field("datetime_col").type
assert datetime_type.unit == "us"
assert datetime_type.tz is None

float64_type = schema.field("float64_col").type
assert float64_type.equals(pyarrow.float64())

geography_type = schema.field("geography_col").type
assert geography_type.equals(pyarrow.string())

int64_type = schema.field("int64_col").type
assert int64_type.equals(pyarrow.int64())

numeric_type = schema.field("numeric_col").type
assert numeric_type.precision == 38
assert numeric_type.scale == 9

string_type = schema.field("string_col").type
assert string_type.equals(pyarrow.string())

time_type = schema.field("time_col").type
assert time_type.equals(pyarrow.time64("us"))

timestamp_type = schema.field("timestamp_col").type
assert timestamp_type.unit == "us"
assert timestamp_type.tz is not None

0 comments on commit c293e3c

Please sign in to comment.