diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 2ff96da4d..b381fa5f7 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -93,6 +93,8 @@ def pyarrow_numeric(): def pyarrow_bignumeric(): + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types return pyarrow.decimal256(76, 38) diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl new file mode 100644 index 000000000..4419a6e9a --- /dev/null +++ b/tests/data/scalars.jsonl @@ -0,0 +1,2 @@ +{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl new file mode 100644 index 000000000..ceccd8dbc --- /dev/null +++ b/tests/data/scalars_extreme.jsonl @@ -0,0 +1,5 @@ +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json new file mode 100644 index 000000000..00bd150fd --- /dev/null +++ b/tests/data/scalars_schema.json @@ -0,0 +1,62 @@ +[ + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" + }, + { + "mode": "NULLABLE", + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "float64_col", + "type": "FLOAT" + }, + { + "mode": "NULLABLE", + "name": "datetime_col", + "type": "DATETIME" + }, + { + "mode": "NULLABLE", + "name": "bignumeric_col", + "type": "BIGNUMERIC" + }, + { + "mode": "NULLABLE", + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "NULLABLE", + "name": "geography_col", + "type": "GEOGRAPHY" + }, + { + "mode": "NULLABLE", + "name": "date_col", + "type": "DATE" + }, + { + "mode": "NULLABLE", + "name": "string_col", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "bool_col", + "type": "BOOLEAN" + }, + { + "mode": "NULLABLE", + "name": "bytes_col", + "type": "BYTES" + }, + { + "mode": "NULLABLE", + "name": "int64_col", + "type": "INTEGER" + } +] diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 7b389013f..cc2c2a4dc 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -12,15 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest +import pathlib -from google.cloud import bigquery +import pytest import test_utils.prefixer +from google.cloud import bigquery +from google.cloud.bigquery import enums from . import helpers + prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system") +DATA_DIR = pathlib.Path(__file__).parent.parent / "data" + @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): @@ -36,6 +41,11 @@ def bigquery_client(): return bigquery.Client() +@pytest.fixture(scope="session") +def project_id(bigquery_client: bigquery.Client): + return bigquery_client.project + + @pytest.fixture(scope="session") def bqstorage_client(bigquery_client): from google.cloud import bigquery_storage @@ -54,3 +64,37 @@ def dataset_id(bigquery_client): @pytest.fixture def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" + + +@pytest.fixture(scope="session") +def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + full_table_id = f"{project_id}.{dataset_id}.scalars" + with open(DATA_DIR / "scalars.jsonl", "rb") as data_file: + job = bigquery_client.load_table_from_file( + data_file, full_table_id, job_config=job_config + ) + job.result() + yield full_table_id + bigquery_client.delete_table(full_table_id) + + +@pytest.fixture(scope="session") +def scalars_extreme_table( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +): + schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + full_table_id = f"{project_id}.{dataset_id}.scalars_extreme" + with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file: + job = bigquery_client.load_table_from_file( + data_file, full_table_id, job_config=job_config + ) + job.result() + yield full_table_id + bigquery_client.delete_table(full_table_id) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py new file mode 100644 index 000000000..f97488e39 --- /dev/null +++ b/tests/system/test_arrow.py @@ -0,0 +1,88 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""System tests for Arrow connector.""" + +import pytest + +pyarrow = pytest.importorskip( + "pyarrow", minversion="3.0.0" +) # Needs decimal256 for BIGNUMERIC columns. + + +@pytest.mark.parametrize( + ("max_results", "scalars_table_name"), + ( + (None, "scalars_table"), # Use BQ Storage API. + (10, "scalars_table"), # Use REST API. + (None, "scalars_extreme_table"), # Use BQ Storage API. + (10, "scalars_extreme_table"), # Use REST API. + ), +) +def test_list_rows_nullable_scalars_dtypes( + bigquery_client, + scalars_table, + scalars_extreme_table, + max_results, + scalars_table_name, +): + table_id = scalars_table + if scalars_table_name == "scalars_extreme_table": + table_id = scalars_extreme_table + arrow_table = bigquery_client.list_rows( + table_id, max_results=max_results, + ).to_arrow() + + schema = arrow_table.schema + bignumeric_type = schema.field("bignumeric_col").type + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + assert bignumeric_type.precision in {76, 77} + assert bignumeric_type.scale == 38 + + bool_type = schema.field("bool_col").type + assert bool_type.equals(pyarrow.bool_()) + + bytes_type = schema.field("bytes_col").type + assert bytes_type.equals(pyarrow.binary()) + + date_type = schema.field("date_col").type + assert date_type.equals(pyarrow.date32()) + + datetime_type = schema.field("datetime_col").type + assert datetime_type.unit == "us" + assert datetime_type.tz is None + + float64_type = schema.field("float64_col").type + assert float64_type.equals(pyarrow.float64()) + + geography_type = schema.field("geography_col").type + assert geography_type.equals(pyarrow.string()) + + int64_type = schema.field("int64_col").type + assert int64_type.equals(pyarrow.int64()) + + numeric_type = schema.field("numeric_col").type + assert numeric_type.precision == 38 + assert numeric_type.scale == 9 + + string_type = schema.field("string_col").type + assert string_type.equals(pyarrow.string()) + + time_type = schema.field("time_col").type + assert time_type.equals(pyarrow.time64("us")) + + timestamp_type = schema.field("timestamp_col").type + assert timestamp_type.unit == "us" + assert timestamp_type.tz is not None