Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: add system tests for to_arrow with extreme values #813

Merged
merged 9 commits into from
Jul 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def pyarrow_numeric():


def pyarrow_bignumeric():
# 77th digit is partial.
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
return pyarrow.decimal256(76, 38)


Expand Down
2 changes: 2 additions & 0 deletions tests/data/scalars.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
5 changes: 5 additions & 0 deletions tests/data/scalars_extreme.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"}
{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
62 changes: 62 additions & 0 deletions tests/data/scalars_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
[
{
"mode": "NULLABLE",
"name": "timestamp_col",
"type": "TIMESTAMP"
},
{
"mode": "NULLABLE",
"name": "time_col",
"type": "TIME"
},
{
"mode": "NULLABLE",
"name": "float64_col",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "datetime_col",
"type": "DATETIME"
},
{
"mode": "NULLABLE",
"name": "bignumeric_col",
"type": "BIGNUMERIC"
},
{
"mode": "NULLABLE",
"name": "numeric_col",
"type": "NUMERIC"
},
{
"mode": "NULLABLE",
"name": "geography_col",
"type": "GEOGRAPHY"
},
{
"mode": "NULLABLE",
"name": "date_col",
"type": "DATE"
},
{
"mode": "NULLABLE",
"name": "string_col",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "bool_col",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "bytes_col",
"type": "BYTES"
},
{
"mode": "NULLABLE",
"name": "int64_col",
"type": "INTEGER"
}
]
48 changes: 46 additions & 2 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import pathlib

from google.cloud import bigquery
import pytest
import test_utils.prefixer

from google.cloud import bigquery
from google.cloud.bigquery import enums
from . import helpers


prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system")

DATA_DIR = pathlib.Path(__file__).parent.parent / "data"


@pytest.fixture(scope="session", autouse=True)
def cleanup_datasets(bigquery_client: bigquery.Client):
Expand All @@ -36,6 +41,11 @@ def bigquery_client():
return bigquery.Client()


@pytest.fixture(scope="session")
def project_id(bigquery_client: bigquery.Client):
return bigquery_client.project


@pytest.fixture(scope="session")
def bqstorage_client(bigquery_client):
from google.cloud import bigquery_storage
Expand All @@ -54,3 +64,37 @@ def dataset_id(bigquery_client):
@pytest.fixture
def table_id(dataset_id):
return f"{dataset_id}.table_{helpers.temp_suffix()}"


@pytest.fixture(scope="session")
def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
job_config = bigquery.LoadJobConfig()
job_config.schema = schema
job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
full_table_id = f"{project_id}.{dataset_id}.scalars"
with open(DATA_DIR / "scalars.jsonl", "rb") as data_file:
job = bigquery_client.load_table_from_file(
data_file, full_table_id, job_config=job_config
)
job.result()
yield full_table_id
bigquery_client.delete_table(full_table_id)


@pytest.fixture(scope="session")
def scalars_extreme_table(
bigquery_client: bigquery.Client, project_id: str, dataset_id: str
):
schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
job_config = bigquery.LoadJobConfig()
job_config.schema = schema
job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
full_table_id = f"{project_id}.{dataset_id}.scalars_extreme"
with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file:
job = bigquery_client.load_table_from_file(
data_file, full_table_id, job_config=job_config
)
job.result()
yield full_table_id
bigquery_client.delete_table(full_table_id)
88 changes: 88 additions & 0 deletions tests/system/test_arrow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""System tests for Arrow connector."""

import pytest

pyarrow = pytest.importorskip(
"pyarrow", minversion="3.0.0"
) # Needs decimal256 for BIGNUMERIC columns.


@pytest.mark.parametrize(
("max_results", "scalars_table_name"),
(
(None, "scalars_table"), # Use BQ Storage API.
(10, "scalars_table"), # Use REST API.
(None, "scalars_extreme_table"), # Use BQ Storage API.
(10, "scalars_extreme_table"), # Use REST API.
),
)
def test_list_rows_nullable_scalars_dtypes(
bigquery_client,
scalars_table,
scalars_extreme_table,
max_results,
scalars_table_name,
):
table_id = scalars_table
if scalars_table_name == "scalars_extreme_table":
table_id = scalars_extreme_table
arrow_table = bigquery_client.list_rows(
table_id, max_results=max_results,
).to_arrow()

schema = arrow_table.schema
bignumeric_type = schema.field("bignumeric_col").type
# 77th digit is partial.
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
assert bignumeric_type.precision in {76, 77}
assert bignumeric_type.scale == 38

bool_type = schema.field("bool_col").type
assert bool_type.equals(pyarrow.bool_())

bytes_type = schema.field("bytes_col").type
assert bytes_type.equals(pyarrow.binary())

date_type = schema.field("date_col").type
assert date_type.equals(pyarrow.date32())

datetime_type = schema.field("datetime_col").type
assert datetime_type.unit == "us"
assert datetime_type.tz is None

float64_type = schema.field("float64_col").type
assert float64_type.equals(pyarrow.float64())

geography_type = schema.field("geography_col").type
assert geography_type.equals(pyarrow.string())

int64_type = schema.field("int64_col").type
assert int64_type.equals(pyarrow.int64())

numeric_type = schema.field("numeric_col").type
assert numeric_type.precision == 38
assert numeric_type.scale == 9

string_type = schema.field("string_col").type
assert string_type.equals(pyarrow.string())

time_type = schema.field("time_col").type
assert time_type.equals(pyarrow.time64("us"))

timestamp_type = schema.field("timestamp_col").type
assert timestamp_type.unit == "us"
assert timestamp_type.tz is not None