From 586a022276a337d0cb3f869966290f6f1eaab9b6 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 22 Jun 2021 18:08:20 -0400 Subject: [PATCH 01/21] Added support for dbi struct parameters with explicit types --- google/cloud/bigquery/dbapi/_helpers.py | 157 ++++++++++++++++++++++-- google/cloud/bigquery/dbapi/cursor.py | 12 +- tests/unit/test_dbapi__helpers.py | 122 ++++++++++++++++++ tests/unit/test_dbapi_cursor.py | 4 + 4 files changed, 282 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 3b0d8134c..bef1e6bec 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -18,9 +18,10 @@ import decimal import functools import numbers +import re from google.cloud import bigquery -from google.cloud.bigquery import table, enums +from google.cloud.bigquery import table, enums, query from google.cloud.bigquery.dbapi import exceptions @@ -113,6 +114,135 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None): return bigquery.ArrayQueryParameter(name, array_type, value) +complex_query_parameter_parse = re.compile( + r""" + \s* + (ARRAY|STRUCT|RECORD) # Type + \s* + <([A-Z0-9<> ,]+)> # Subtype(s) + \s*$ + """, + re.IGNORECASE | re.VERBOSE, + ).match +parse_struct_field = re.compile( + r""" + (?:(\w+)\s+) # field name + ([A-Z0-9<> ,]+) # Field type + $""", re.VERBOSE | re.IGNORECASE).match + + +def split_struct_fields(fields): + fields = fields.split(',') + while fields: + field = fields.pop(0) + while fields and field.count('<') != field.count('>'): + field += ',' + fields.pop(0) + yield field + + +def complex_query_parameter_type(name: str, type_: str, base: str): + type_ = type_.strip() + if '<' not in type_: + try: + type_ = getattr(enums.SqlParameterScalarTypes, type_.upper()) + except AttributeError: + raise exceptions.ProgrammingError( + f"Invalid scalar type, {type_}, in {base}") + if name: + type_ = type_.with_name(name) + return type_ + + m = complex_query_parameter_parse(type_) + if not m: + raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}") + tname, sub = m.groups() + tname = tname.upper() + sub = sub.strip() + if tname == 'ARRAY': + return query.ArrayQueryParameterType( + complex_query_parameter_type(None, sub, base), + name=name) + else: + fields = [] + for field_string in split_struct_fields(sub): + field_string = field_string.strip() + m = parse_struct_field(field_string) + if not m: + raise exceptions.ProgrammingError( + f"Invalid struct field, {field_string}, in {base}") + field_name, field_type = m.groups() + fields.append(complex_query_parameter_type( + field_name, field_type, base)) + + return query.StructQueryParameterType(*fields, name=name) + + +def complex_query_parameter(name, value, type_, base=None): + """ + Construct a query parameter for a complex type (array or struct record) + + or for a subtype, which may not be complex + """ + type_ = type_.strip() + base = base or type_ + if '>' not in type_: + try: + type_ = getattr(enums.SqlParameterScalarTypes, type_.upper())._type + except AttributeError: + raise exceptions.ProgrammingError( + f"The given parameter type, {type_}," + f" for {name} is not a valid BigQuery scalar type, in {base}." + ) + + return query.ScalarQueryParameter(name, type_, value) + + m = complex_query_parameter_parse(type_) + if not m: + raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}") + tname, sub = m.groups() + tname = tname.upper() + sub = sub.strip() + if tname == 'ARRAY': + if not array_like(value): + raise exceptions.ProgrammingError( + f"Array type with non-array-like value" + f" with type {type(value).__name__}") + array_type = complex_query_parameter_type(name, sub, base) + if isinstance(array_type, query.ArrayQueryParameterType): + raise exceptions.ProgrammingError(f"Array can't contain an array in {base}") + return query.ArrayQueryParameter( + name, + array_type, + [complex_query_parameter(None, v, sub, base) + for v in value] if '<' in sub else value, + ) + else: + fields = [] + if not isinstance(value, collections_abc.Mapping): + raise exceptions.ProgrammingError( + f"Non-mapping value for type {type_}") + value_keys = set(value) + for field_string in split_struct_fields(sub): + field_string = field_string.strip() + m = parse_struct_field(field_string) + if not m: + raise exceptions.ProgrammingError( + f"Invalid struct field, {field_string}, in {base or type_}") + field_name, field_type = m.groups() + if field_name not in value: + raise exceptions.ProgrammingError( + f"No field value for {field_name} in {type_}") + value_keys.remove(field_name) + fields.append( + complex_query_parameter( + field_name, value[field_name], field_type, base) + ) + if value_keys: + raise exceptions.ProgrammingError(f"Extra data keys for {type_}") + + return query.StructQueryParameter(name, *fields) + + def to_query_parameters_list(parameters, parameter_types): """Converts a sequence of parameter values into query parameters. @@ -129,7 +259,9 @@ def to_query_parameters_list(parameters, parameter_types): result = [] for value, type_ in zip(parameters, parameter_types): - if isinstance(value, collections_abc.Mapping): + if type_ is not None and '<' in type_: + param = complex_query_parameter(None, value, type_) + elif isinstance(value, collections_abc.Mapping): raise NotImplementedError("STRUCT-like parameter values are not supported.") elif array_like(value): param = array_to_query_parameter(value, None, type_) @@ -157,20 +289,21 @@ def to_query_parameters_dict(parameters, query_parameter_types): result = [] for name, value in parameters.items(): - if isinstance(value, collections_abc.Mapping): + query_parameter_type = query_parameter_types.get(name) + if query_parameter_type is not None and '<' in query_parameter_type: + param = complex_query_parameter(name, value, query_parameter_type) + elif isinstance(value, collections_abc.Mapping): raise NotImplementedError( "STRUCT-like parameter values are not supported " "(parameter {}).".format(name) - ) - else: - query_parameter_type = query_parameter_types.get(name) - if array_like(value): - param = array_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type ) - else: - param = scalar_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type, + elif array_like(value): + param = array_to_query_parameter( + value, name=name, query_parameter_type=query_parameter_type + ) + else: + param = scalar_to_query_parameter( + value, name=name, query_parameter_type=query_parameter_type, ) result.append(param) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index c8fc49378..96889a3c9 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -483,7 +483,17 @@ def _format_operation(operation, parameters): def _extract_types( - operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub + operation, extra_type_sub=re.compile( + r""" + (%*) # Extra %s. We'll dal with these in the replacement code + % # Beginning of replacement, %s, %(...)s + (?:\( # Begin of optional name and/or type + ([^:)]*) # name + (?::([a-zA-Z0-9<>, ]+))? # type + \))? # End of optional name and/or type + s # End of replacement + """, + re.VERBOSE).sub ): """Remove type information from parameter placeholders. diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 250ba46d9..a6607181e 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -16,6 +16,7 @@ import decimal import math import operator as op +import re import unittest import pytest @@ -416,3 +417,124 @@ def test_to_query_parameters_list_w_types(): bigquery.ScalarQueryParameter(None, "STRING", None), bigquery.ArrayQueryParameter(None, "FLOAT64", []), ] + +@pytest.mark.parametrize( + "value,type_,expect", + [ + ([], 'ARRAY', + {'parameterType': {'type': 'ARRAY', 'arrayType': {'type': 'INT64'}}, + 'parameterValue': {'arrayValues': []}, + }), + ([1, 2], 'ARRAY', + {'parameterType': {'type': 'ARRAY', 'arrayType': {'type': 'INT64'}}, + 'parameterValue': {'arrayValues': [{'value': '1'}, {'value': '2'}]}, + }), + (dict(name='par', + children=[ + dict(name='ch1', bdate=datetime.date(2021, 1, 1)), + dict(name='ch2', bdate=datetime.date(2021, 1, 2)), + ]), + 'struct>>', + { + 'parameterType': + {'structTypes': + [{'name': 'name', + 'type': {'type': 'STRING'}}, + {'name': 'children', + 'type': {'arrayType': {'structTypes': [{'name': 'name', + 'type': {'type': 'STRING'}}, + {'name': 'bdate', + 'type': {'type': 'DATE'}}], + 'type': 'STRUCT'}, + 'type': 'ARRAY'}}], + 'type': 'STRUCT'}, + 'parameterValue': + {'structValues': + {'children': + {'arrayValues': [{'structValues': {'bdate': {'value': '2021-01-01'}, + 'name': {'value': 'ch1'}}}, + {'structValues': {'bdate': {'value': '2021-01-02'}, + 'name': {'value': 'ch2'}}}]}, + 'name': {'value': 'par'}}}, + } + ), + ]) +def test_complex_query_parameter_type(type_, value, expect): + from google.cloud.bigquery.dbapi._helpers import complex_query_parameter + param = complex_query_parameter("test", value, type_).to_api_repr() + assert param.pop('name') == 'test' + assert param == expect + + +@pytest.mark.parametrize( + "value,type_,expect", + [ + ([], 'ARRAY', "Invalid scalar type, INT, in ARRAY"), + ([], 'x', "Invalid parameter type, x"), + ({}, 'struct', "Invalid struct field, int, in struct"), + ({'x': 1}, 'struct', + "The given parameter type, int," + " for x is not a valid BigQuery scalar type, in struct."), + ([], 'x<', "Invalid parameter type, x<"), + (0, 'ARRAY', "Array type with non-array-like value with type int"), + ([], 'ARRAY>', + "Array can't contain an array in ARRAY>"), + ([], 'struct', "Non-mapping value for type struct"), + ({}, 'struct', "No field value for x in struct"), + ({'x': 1, 'y': 1}, 'struct', "Extra data keys for struct"), + ([], 'array>', "Invalid struct field, xxx, in array>"), + ([], 'array<<>>', "Invalid parameter type, <>"), + ]) +def test_complex_query_parameter_type_errors(type_, value, expect): + from google.cloud.bigquery.dbapi._helpers import complex_query_parameter + from google.cloud.bigquery.dbapi import exceptions + + with pytest.raises( + exceptions.ProgrammingError, + match="^" + re.escape(expect) + "$", + ): + complex_query_parameter("test", value, type_) + + +@pytest.mark.parametrize( + "parameters,parameter_types,expect", + [ + ([[], dict(name='ch1', bdate=datetime.date(2021, 1, 1))], + ['ARRAY', 'struct'], + [ + {'parameterType': {'arrayType': {'type': 'INT64'}, + 'type': 'ARRAY'}, + 'parameterValue': {'arrayValues': []}}, + {'parameterType': {'structTypes': [{'name': 'name', + 'type': {'type': 'STRING'}}, + {'name': 'bdate', + 'type': {'type': 'DATE'}}], + 'type': 'STRUCT'}, + 'parameterValue': {'structValues': {'bdate': {'value': '2021-01-01'}, + 'name': {'value': 'ch1'}}}}, + ]), + (dict(ids=[], child=dict(name='ch1', bdate=datetime.date(2021, 1, 1))), + dict(ids='ARRAY', child='struct'), + [ + {'name': 'ids', + 'parameterType': {'arrayType': {'type': 'INT64'}, + 'type': 'ARRAY'}, + 'parameterValue': {'arrayValues': []}}, + {'name': 'child', + 'parameterType': {'structTypes': [{'name': 'name', + 'type': {'type': 'STRING'}}, + {'name': 'bdate', + 'type': {'type': 'DATE'}}], + 'type': 'STRUCT'}, + 'parameterValue': {'structValues': {'bdate': {'value': '2021-01-01'}, + 'name': {'value': 'ch1'}}}}, + ]), + ]) +def test_to_query_parameters_complex_types(parameters, parameter_types, expect): + from google.cloud.bigquery.dbapi._helpers import to_query_parameters + + result = [ + p.to_api_repr() + for p in to_query_parameters(parameters, parameter_types) + ] + assert result == expect diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index a2d6693d0..9c8cbef2e 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -809,6 +809,10 @@ def test__format_operation_no_placeholders(self): "values(%%%%%(foo:INT64)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), ], ) def test__extract_types(inp, expect): From 13f910713b8824817c7aae540ad94a0f72bfce7a Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 09:52:02 -0400 Subject: [PATCH 02/21] Make the dataset_id fixture a session fixture --- tests/system/conftest.py | 7 ++++++- tests/system/test_pandas.py | 11 ++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 4b5fcb543..60ed0a1fa 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -31,9 +31,14 @@ def bqstorage_client(bigquery_client): return bigquery_storage.BigQueryReadClient(credentials=bigquery_client._credentials) -@pytest.fixture +@pytest.fixture(scope="session") def dataset_id(bigquery_client): dataset_id = f"bqsystem_{helpers.temp_suffix()}" bigquery_client.create_dataset(dataset_id) + print("fix create dataset", dataset_id) yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) + +@pytest.fixture +def table_id(dataset_id): + return f"{dataset_id}.table_{helpers.temp_suffix()}" diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 1164e36da..ddf5eaf43 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -149,7 +149,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype( reason="Only `pandas version >=1.0.0` is supported", ) def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( - bigquery_client, dataset_id + bigquery_client, dataset_id, table_id ): """Test that a DataFrame containing column with None-type values and int64 datatype can be uploaded without specifying a schema. @@ -157,9 +157,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( https://github.com/googleapis/python-bigquery/issues/22 """ - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - bigquery_client.project, dataset_id - ) df_data = collections.OrderedDict( [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] ) @@ -511,7 +508,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( - bigquery_client, dataset_id + bigquery_client, dataset_id, table_id ): from google.cloud.bigquery.job import SourceFormat @@ -536,10 +533,6 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( ) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - bigquery_client.project, dataset_id - ) - job_config = bigquery.LoadJobConfig( schema=table_schema, source_format=SourceFormat.CSV ) From aea7baee1ae96af69de1044eb08e4995de09e177 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 09:53:00 -0400 Subject: [PATCH 03/21] Make the dataset_id fixture a session fixture --- tests/system/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 60ed0a1fa..c7e513b08 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -35,7 +35,6 @@ def bqstorage_client(bigquery_client): def dataset_id(bigquery_client): dataset_id = f"bqsystem_{helpers.temp_suffix()}" bigquery_client.create_dataset(dataset_id) - print("fix create dataset", dataset_id) yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) From 6f2613004cefd64d195e480563d45e138ebfc11c Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 09:56:09 -0400 Subject: [PATCH 04/21] system test of the struct machinery --- tests/system/test_structs.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/system/test_structs.py diff --git a/tests/system/test_structs.py b/tests/system/test_structs.py new file mode 100644 index 000000000..3ce00166d --- /dev/null +++ b/tests/system/test_structs.py @@ -0,0 +1,24 @@ +import datetime + +from google.cloud.bigquery.dbapi import connect + +def test_structs(bigquery_client, dataset_id): + person_type = ('struct>>') + table = dataset_id + ".test_struct" + conn = connect(bigquery_client) + cursor = conn.cursor() + cursor.execute(f"create table {table} (person {person_type})") + data = dict(name='par', + children=[ + dict(name='ch1', bdate=datetime.date(2021, 1, 1)), + dict(name='ch2', bdate=datetime.date(2021, 1, 2)), + ]) + cursor.execute( + f"insert into {table} (person) values (%(v:{person_type})s)", + dict(v=data), + ) + + cursor.execute(f"select * from {table}") + [[result]] = list(cursor) + assert result == data From c386448f0d8c3a661a0cde13296fd82107346acf Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 11:04:35 -0400 Subject: [PATCH 05/21] Verify that we can bind non-parameterized types (In fact, we can't bind with parametrized types.) --- tests/system/test_structs.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/system/test_structs.py b/tests/system/test_structs.py index 3ce00166d..d5c03fc9f 100644 --- a/tests/system/test_structs.py +++ b/tests/system/test_structs.py @@ -1,24 +1,31 @@ import datetime +import pytest + from google.cloud.bigquery.dbapi import connect -def test_structs(bigquery_client, dataset_id): - person_type = ('struct>>') - table = dataset_id + ".test_struct" +person_type = ('struct>>') +person_type_sized = ('struct>>') + +@pytest.mark.parametrize( + "person_type_decl", [person_type, person_type_sized] + ) +def test_structs(bigquery_client, dataset_id, person_type_decl, table_id): conn = connect(bigquery_client) cursor = conn.cursor() - cursor.execute(f"create table {table} (person {person_type})") + cursor.execute(f"create table {table_id} (person {person_type_decl})") data = dict(name='par', children=[ dict(name='ch1', bdate=datetime.date(2021, 1, 1)), dict(name='ch2', bdate=datetime.date(2021, 1, 2)), ]) cursor.execute( - f"insert into {table} (person) values (%(v:{person_type})s)", + f"insert into {table_id} (person) values (%(v:{person_type})s)", dict(v=data), ) - cursor.execute(f"select * from {table}") + cursor.execute(f"select * from {table_id}") [[result]] = list(cursor) assert result == data From 71c86143bd092fdae31b73c6560939ede7be7f8f Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 11:45:49 -0400 Subject: [PATCH 06/21] Parse and remove type parameters from explcit types. --- google/cloud/bigquery/dbapi/_helpers.py | 23 ++++++++++++-- google/cloud/bigquery/dbapi/cursor.py | 18 +++++++++-- tests/unit/test_dbapi__helpers.py | 40 +++++++++++++++++++++++-- tests/unit/test_dbapi_cursor.py | 19 ++++++++++++ 4 files changed, 94 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index bef1e6bec..0cc32943c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -28,9 +28,20 @@ _NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28") _NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") +type_parameters_re = re.compile(r""" + \( + \s*[0-9]+\s* + (, + \s*[0-9]+\s* + )* + \) + """, re.VERBOSE) + def _parameter_type(name, value, query_parameter_type=None, value_doc=""): if query_parameter_type: + # Strip type parameters + query_parameter_type = type_parameters_re.sub('', query_parameter_type) try: parameter_type = getattr( enums.SqlParameterScalarTypes, query_parameter_type.upper() @@ -119,7 +130,7 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None): \s* (ARRAY|STRUCT|RECORD) # Type \s* - <([A-Z0-9<> ,]+)> # Subtype(s) + <([A-Z0-9<> ,()]+)> # Subtype(s) \s*$ """, re.IGNORECASE | re.VERBOSE, @@ -127,7 +138,7 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None): parse_struct_field = re.compile( r""" (?:(\w+)\s+) # field name - ([A-Z0-9<> ,]+) # Field type + ([A-Z0-9<> ,()]+) # Field type $""", re.VERBOSE | re.IGNORECASE).match @@ -143,6 +154,10 @@ def split_struct_fields(fields): def complex_query_parameter_type(name: str, type_: str, base: str): type_ = type_.strip() if '<' not in type_: + # Scalar + + # Strip type parameters + type_ = type_parameters_re.sub('', type_).strip() try: type_ = getattr(enums.SqlParameterScalarTypes, type_.upper()) except AttributeError: @@ -186,6 +201,10 @@ def complex_query_parameter(name, value, type_, base=None): type_ = type_.strip() base = base or type_ if '>' not in type_: + # Scalar + + # Strip type parameters + type_ = type_parameters_re.sub('', type_).strip() try: type_ = getattr(enums.SqlParameterScalarTypes, type_.upper())._type except AttributeError: diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 96889a3c9..d9b9086b0 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -485,12 +485,26 @@ def _format_operation(operation, parameters): def _extract_types( operation, extra_type_sub=re.compile( r""" - (%*) # Extra %s. We'll dal with these in the replacement code + (%*) # Extra %s. We'll deal with these in the replacement code + % # Beginning of replacement, %s, %(...)s + (?:\( # Begin of optional name and/or type ([^:)]*) # name - (?::([a-zA-Z0-9<>, ]+))? # type + (?:: # ':' introduces type + ( # start of type group + [a-zA-Z0-9<>, ]+ # First part, no parens + + (?: # start sets of parens + non-paren text + \([0-9 ,]+\) # comma-separated groups of digits in parens + # (e.g. string(10)) + (?=[, >)]) # Must be followed by ,>) or space + [a-zA-Z0-9<>, ]* # Optional non-paren chars + )* # Can be zero or more of parens and following text + ) # end of type group + )? # close type clause ":type" \))? # End of optional name and/or type + s # End of replacement """, re.VERBOSE).sub diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index a6607181e..5e16cfa66 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -395,11 +395,13 @@ def test_to_query_parameters_dict_w_types(): assert sorted( _helpers.to_query_parameters( - dict(i=1, x=1.2, y=None, z=[]), dict(x="numeric", y="string", z="float64") + dict(i=1, x=1.2, y=None, q="hi", z=[]), + dict(x="numeric", y="string", q="string(9)", z="float64") ), key=lambda p: p.name, ) == [ bigquery.ScalarQueryParameter("i", "INT64", 1), + bigquery.ScalarQueryParameter("q", "STRING", "hi"), bigquery.ScalarQueryParameter("x", "NUMERIC", 1.2), bigquery.ScalarQueryParameter("y", "STRING", None), bigquery.ArrayQueryParameter("z", "FLOAT64", []), @@ -410,11 +412,12 @@ def test_to_query_parameters_list_w_types(): from google.cloud import bigquery assert _helpers.to_query_parameters( - [1, 1.2, None, []], [None, "numeric", "string", "float64"] + [1, 1.2, None, 'hi', []], [None, "numeric", "string", "string(9)", "float64"] ) == [ bigquery.ScalarQueryParameter(None, "INT64", 1), bigquery.ScalarQueryParameter(None, "NUMERIC", 1.2), bigquery.ScalarQueryParameter(None, "STRING", None), + bigquery.ScalarQueryParameter(None, "STRING", "hi"), bigquery.ArrayQueryParameter(None, "FLOAT64", []), ] @@ -458,6 +461,39 @@ def test_to_query_parameters_list_w_types(): 'name': {'value': 'par'}}}, } ), + (dict(name='par', + children=[ + dict(name='ch1', bdate=datetime.date(2021, 1, 1)), + dict(name='ch2', bdate=datetime.date(2021, 1, 2)), + ]), + 'struct>>', + { + 'parameterType': + {'structTypes': + [{'name': 'name', + 'type': {'type': 'STRING'}}, + {'name': 'children', + 'type': {'arrayType': {'structTypes': [{'name': 'name', + 'type': {'type': 'STRING'}}, + {'name': 'bdate', + 'type': {'type': 'DATE'}}], + 'type': 'STRUCT'}, + 'type': 'ARRAY'}}], + 'type': 'STRUCT'}, + 'parameterValue': + {'structValues': + {'children': + {'arrayValues': [{'structValues': {'bdate': {'value': '2021-01-01'}, + 'name': {'value': 'ch1'}}}, + {'structValues': {'bdate': {'value': '2021-01-02'}, + 'name': {'value': 'ch2'}}}]}, + 'name': {'value': 'par'}}}, + } + ), + (['1', 'hi'], 'ARRAY', + {'parameterType': {'type': 'ARRAY', 'arrayType': {'type': 'STRING'}}, + 'parameterValue': {'arrayValues': [{'value': '1'}, {'value': 'hi'}]}, + }), ]) def test_complex_query_parameter_type(type_, value, expect): from google.cloud.bigquery.dbapi._helpers import complex_query_parameter diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 9c8cbef2e..90dddd517 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -813,6 +813,25 @@ def test__format_operation_no_placeholders(self): "values(%%%%%(foo:struct)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), + ( + "values(%(foo:struct)s, %(bar)s)", + ("values(%(foo)s, %(bar)s)", + dict(foo="struct")), + ), + ( + "values(%(foo:struct)s, %(bar)s)", + ("values(%(foo)s, %(bar)s)", + dict(foo="struct")), + ), + ( + "values(%(foo:string(10))s, %(bar)s)", + ("values(%(foo)s, %(bar)s)", + dict(foo="string(10)")), + ), ], ) def test__extract_types(inp, expect): From 6f5b345a14f2bed007903f59a5f60fef84a8513d Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 12:07:45 -0400 Subject: [PATCH 07/21] Document passing struct data. --- docs/dbapi.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/dbapi.rst b/docs/dbapi.rst index 41ec85833..299f71911 100644 --- a/docs/dbapi.rst +++ b/docs/dbapi.rst @@ -37,7 +37,14 @@ colon, as in:: insert into people (name, income) values (%(name:string)s, %(income:numeric)s) -For unnamed parameters, use the named syntax with a type, but now +For unnamed parameters, use the named syntax with a type, but no name, as in:: insert into people (name, income) values (%(:string)s, %(:numeric)s) + +Providing type information is the *only* way to pass `struct` data:: + + cursor.execute( + "insert into points (point) values (%(:struct)s)", + [{"x": 10, "y": 20}], + ) From 411c336191ce1d15879dd8fc3fec3975df54c22c Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 12:11:01 -0400 Subject: [PATCH 08/21] blacken --- google/cloud/bigquery/dbapi/_helpers.py | 82 +++--- google/cloud/bigquery/dbapi/cursor.py | 6 +- tests/system/conftest.py | 1 + tests/system/test_structs.py | 30 +-- tests/unit/test_dbapi__helpers.py | 343 +++++++++++++++--------- tests/unit/test_dbapi_cursor.py | 15 +- 6 files changed, 289 insertions(+), 188 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 0cc32943c..2ccd197c6 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -28,20 +28,23 @@ _NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28") _NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") -type_parameters_re = re.compile(r""" +type_parameters_re = re.compile( + r""" \( \s*[0-9]+\s* (, \s*[0-9]+\s* )* \) - """, re.VERBOSE) + """, + re.VERBOSE, +) def _parameter_type(name, value, query_parameter_type=None, value_doc=""): if query_parameter_type: # Strip type parameters - query_parameter_type = type_parameters_re.sub('', query_parameter_type) + query_parameter_type = type_parameters_re.sub("", query_parameter_type) try: parameter_type = getattr( enums.SqlParameterScalarTypes, query_parameter_type.upper() @@ -134,35 +137,38 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None): \s*$ """, re.IGNORECASE | re.VERBOSE, - ).match +).match parse_struct_field = re.compile( r""" (?:(\w+)\s+) # field name ([A-Z0-9<> ,()]+) # Field type - $""", re.VERBOSE | re.IGNORECASE).match + $""", + re.VERBOSE | re.IGNORECASE, +).match def split_struct_fields(fields): - fields = fields.split(',') + fields = fields.split(",") while fields: field = fields.pop(0) - while fields and field.count('<') != field.count('>'): - field += ',' + fields.pop(0) + while fields and field.count("<") != field.count(">"): + field += "," + fields.pop(0) yield field def complex_query_parameter_type(name: str, type_: str, base: str): type_ = type_.strip() - if '<' not in type_: + if "<" not in type_: # Scalar # Strip type parameters - type_ = type_parameters_re.sub('', type_).strip() + type_ = type_parameters_re.sub("", type_).strip() try: type_ = getattr(enums.SqlParameterScalarTypes, type_.upper()) except AttributeError: raise exceptions.ProgrammingError( - f"Invalid scalar type, {type_}, in {base}") + f"Invalid scalar type, {type_}, in {base}" + ) if name: type_ = type_.with_name(name) return type_ @@ -173,10 +179,10 @@ def complex_query_parameter_type(name: str, type_: str, base: str): tname, sub = m.groups() tname = tname.upper() sub = sub.strip() - if tname == 'ARRAY': + if tname == "ARRAY": return query.ArrayQueryParameterType( - complex_query_parameter_type(None, sub, base), - name=name) + complex_query_parameter_type(None, sub, base), name=name + ) else: fields = [] for field_string in split_struct_fields(sub): @@ -184,10 +190,10 @@ def complex_query_parameter_type(name: str, type_: str, base: str): m = parse_struct_field(field_string) if not m: raise exceptions.ProgrammingError( - f"Invalid struct field, {field_string}, in {base}") + f"Invalid struct field, {field_string}, in {base}" + ) field_name, field_type = m.groups() - fields.append(complex_query_parameter_type( - field_name, field_type, base)) + fields.append(complex_query_parameter_type(field_name, field_type, base)) return query.StructQueryParameterType(*fields, name=name) @@ -200,18 +206,18 @@ def complex_query_parameter(name, value, type_, base=None): """ type_ = type_.strip() base = base or type_ - if '>' not in type_: + if ">" not in type_: # Scalar # Strip type parameters - type_ = type_parameters_re.sub('', type_).strip() + type_ = type_parameters_re.sub("", type_).strip() try: type_ = getattr(enums.SqlParameterScalarTypes, type_.upper())._type except AttributeError: raise exceptions.ProgrammingError( f"The given parameter type, {type_}," f" for {name} is not a valid BigQuery scalar type, in {base}." - ) + ) return query.ScalarQueryParameter(name, type_, value) @@ -221,41 +227,43 @@ def complex_query_parameter(name, value, type_, base=None): tname, sub = m.groups() tname = tname.upper() sub = sub.strip() - if tname == 'ARRAY': + if tname == "ARRAY": if not array_like(value): raise exceptions.ProgrammingError( f"Array type with non-array-like value" - f" with type {type(value).__name__}") + f" with type {type(value).__name__}" + ) array_type = complex_query_parameter_type(name, sub, base) if isinstance(array_type, query.ArrayQueryParameterType): raise exceptions.ProgrammingError(f"Array can't contain an array in {base}") return query.ArrayQueryParameter( name, array_type, - [complex_query_parameter(None, v, sub, base) - for v in value] if '<' in sub else value, - ) + [complex_query_parameter(None, v, sub, base) for v in value] + if "<" in sub + else value, + ) else: fields = [] if not isinstance(value, collections_abc.Mapping): - raise exceptions.ProgrammingError( - f"Non-mapping value for type {type_}") + raise exceptions.ProgrammingError(f"Non-mapping value for type {type_}") value_keys = set(value) for field_string in split_struct_fields(sub): field_string = field_string.strip() m = parse_struct_field(field_string) if not m: raise exceptions.ProgrammingError( - f"Invalid struct field, {field_string}, in {base or type_}") + f"Invalid struct field, {field_string}, in {base or type_}" + ) field_name, field_type = m.groups() if field_name not in value: raise exceptions.ProgrammingError( - f"No field value for {field_name} in {type_}") + f"No field value for {field_name} in {type_}" + ) value_keys.remove(field_name) fields.append( - complex_query_parameter( - field_name, value[field_name], field_type, base) - ) + complex_query_parameter(field_name, value[field_name], field_type, base) + ) if value_keys: raise exceptions.ProgrammingError(f"Extra data keys for {type_}") @@ -278,7 +286,7 @@ def to_query_parameters_list(parameters, parameter_types): result = [] for value, type_ in zip(parameters, parameter_types): - if type_ is not None and '<' in type_: + if type_ is not None and "<" in type_: param = complex_query_parameter(None, value, type_) elif isinstance(value, collections_abc.Mapping): raise NotImplementedError("STRUCT-like parameter values are not supported.") @@ -309,21 +317,21 @@ def to_query_parameters_dict(parameters, query_parameter_types): for name, value in parameters.items(): query_parameter_type = query_parameter_types.get(name) - if query_parameter_type is not None and '<' in query_parameter_type: + if query_parameter_type is not None and "<" in query_parameter_type: param = complex_query_parameter(name, value, query_parameter_type) elif isinstance(value, collections_abc.Mapping): raise NotImplementedError( "STRUCT-like parameter values are not supported " "(parameter {}).".format(name) - ) + ) elif array_like(value): param = array_to_query_parameter( value, name=name, query_parameter_type=query_parameter_type - ) + ) else: param = scalar_to_query_parameter( value, name=name, query_parameter_type=query_parameter_type, - ) + ) result.append(param) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index d9b9086b0..587598d5f 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -483,7 +483,8 @@ def _format_operation(operation, parameters): def _extract_types( - operation, extra_type_sub=re.compile( + operation, + extra_type_sub=re.compile( r""" (%*) # Extra %s. We'll deal with these in the replacement code @@ -507,7 +508,8 @@ def _extract_types( s # End of replacement """, - re.VERBOSE).sub + re.VERBOSE, + ).sub, ): """Remove type information from parameter placeholders. diff --git a/tests/system/conftest.py b/tests/system/conftest.py index c7e513b08..4eef60e92 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -38,6 +38,7 @@ def dataset_id(bigquery_client): yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) + @pytest.fixture def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" diff --git a/tests/system/test_structs.py b/tests/system/test_structs.py index d5c03fc9f..20740f614 100644 --- a/tests/system/test_structs.py +++ b/tests/system/test_structs.py @@ -4,27 +4,27 @@ from google.cloud.bigquery.dbapi import connect -person_type = ('struct>>') -person_type_sized = ('struct>>') +person_type = "struct>>" +person_type_sized = ( + "struct>>" +) -@pytest.mark.parametrize( - "person_type_decl", [person_type, person_type_sized] - ) + +@pytest.mark.parametrize("person_type_decl", [person_type, person_type_sized]) def test_structs(bigquery_client, dataset_id, person_type_decl, table_id): conn = connect(bigquery_client) cursor = conn.cursor() cursor.execute(f"create table {table_id} (person {person_type_decl})") - data = dict(name='par', - children=[ - dict(name='ch1', bdate=datetime.date(2021, 1, 1)), - dict(name='ch2', bdate=datetime.date(2021, 1, 2)), - ]) + data = dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ) cursor.execute( - f"insert into {table_id} (person) values (%(v:{person_type})s)", - dict(v=data), - ) + f"insert into {table_id} (person) values (%(v:{person_type})s)", dict(v=data), + ) cursor.execute(f"select * from {table_id}") [[result]] = list(cursor) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 5e16cfa66..f5b02d760 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -396,7 +396,7 @@ def test_to_query_parameters_dict_w_types(): assert sorted( _helpers.to_query_parameters( dict(i=1, x=1.2, y=None, q="hi", z=[]), - dict(x="numeric", y="string", q="string(9)", z="float64") + dict(x="numeric", y="string", q="string(9)", z="float64"), ), key=lambda p: p.name, ) == [ @@ -412,7 +412,7 @@ def test_to_query_parameters_list_w_types(): from google.cloud import bigquery assert _helpers.to_query_parameters( - [1, 1.2, None, 'hi', []], [None, "numeric", "string", "string(9)", "float64"] + [1, 1.2, None, "hi", []], [None, "numeric", "string", "string(9)", "float64"] ) == [ bigquery.ScalarQueryParameter(None, "INT64", 1), bigquery.ScalarQueryParameter(None, "NUMERIC", 1.2), @@ -421,156 +421,243 @@ def test_to_query_parameters_list_w_types(): bigquery.ArrayQueryParameter(None, "FLOAT64", []), ] + @pytest.mark.parametrize( "value,type_,expect", [ - ([], 'ARRAY', - {'parameterType': {'type': 'ARRAY', 'arrayType': {'type': 'INT64'}}, - 'parameterValue': {'arrayValues': []}, - }), - ([1, 2], 'ARRAY', - {'parameterType': {'type': 'ARRAY', 'arrayType': {'type': 'INT64'}}, - 'parameterValue': {'arrayValues': [{'value': '1'}, {'value': '2'}]}, - }), - (dict(name='par', - children=[ - dict(name='ch1', bdate=datetime.date(2021, 1, 1)), - dict(name='ch2', bdate=datetime.date(2021, 1, 2)), - ]), - 'struct>>', - { - 'parameterType': - {'structTypes': - [{'name': 'name', - 'type': {'type': 'STRING'}}, - {'name': 'children', - 'type': {'arrayType': {'structTypes': [{'name': 'name', - 'type': {'type': 'STRING'}}, - {'name': 'bdate', - 'type': {'type': 'DATE'}}], - 'type': 'STRUCT'}, - 'type': 'ARRAY'}}], - 'type': 'STRUCT'}, - 'parameterValue': - {'structValues': - {'children': - {'arrayValues': [{'structValues': {'bdate': {'value': '2021-01-01'}, - 'name': {'value': 'ch1'}}}, - {'structValues': {'bdate': {'value': '2021-01-02'}, - 'name': {'value': 'ch2'}}}]}, - 'name': {'value': 'par'}}}, - } - ), - (dict(name='par', - children=[ - dict(name='ch1', bdate=datetime.date(2021, 1, 1)), - dict(name='ch2', bdate=datetime.date(2021, 1, 2)), - ]), - 'struct>>', - { - 'parameterType': - {'structTypes': - [{'name': 'name', - 'type': {'type': 'STRING'}}, - {'name': 'children', - 'type': {'arrayType': {'structTypes': [{'name': 'name', - 'type': {'type': 'STRING'}}, - {'name': 'bdate', - 'type': {'type': 'DATE'}}], - 'type': 'STRUCT'}, - 'type': 'ARRAY'}}], - 'type': 'STRUCT'}, - 'parameterValue': - {'structValues': - {'children': - {'arrayValues': [{'structValues': {'bdate': {'value': '2021-01-01'}, - 'name': {'value': 'ch1'}}}, - {'structValues': {'bdate': {'value': '2021-01-02'}, - 'name': {'value': 'ch2'}}}]}, - 'name': {'value': 'par'}}}, - } - ), - (['1', 'hi'], 'ARRAY', - {'parameterType': {'type': 'ARRAY', 'arrayType': {'type': 'STRING'}}, - 'parameterValue': {'arrayValues': [{'value': '1'}, {'value': 'hi'}]}, - }), - ]) + ( + [], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": []}, + }, + ), + ( + [1, 2], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "2"}]}, + }, + ), + ( + dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ), + "struct>>", + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + { + "name": "children", + "type": { + "arrayType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "type": "ARRAY", + }, + }, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "children": { + "arrayValues": [ + { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + { + "structValues": { + "bdate": {"value": "2021-01-02"}, + "name": {"value": "ch2"}, + } + }, + ] + }, + "name": {"value": "par"}, + } + }, + }, + ), + ( + dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ), + "struct>>", + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + { + "name": "children", + "type": { + "arrayType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "type": "ARRAY", + }, + }, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "children": { + "arrayValues": [ + { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + { + "structValues": { + "bdate": {"value": "2021-01-02"}, + "name": {"value": "ch2"}, + } + }, + ] + }, + "name": {"value": "par"}, + } + }, + }, + ), + ( + ["1", "hi"], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "STRING"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "hi"}]}, + }, + ), + ], +) def test_complex_query_parameter_type(type_, value, expect): from google.cloud.bigquery.dbapi._helpers import complex_query_parameter + param = complex_query_parameter("test", value, type_).to_api_repr() - assert param.pop('name') == 'test' + assert param.pop("name") == "test" assert param == expect @pytest.mark.parametrize( "value,type_,expect", [ - ([], 'ARRAY', "Invalid scalar type, INT, in ARRAY"), - ([], 'x', "Invalid parameter type, x"), - ({}, 'struct', "Invalid struct field, int, in struct"), - ({'x': 1}, 'struct', - "The given parameter type, int," - " for x is not a valid BigQuery scalar type, in struct."), - ([], 'x<', "Invalid parameter type, x<"), - (0, 'ARRAY', "Array type with non-array-like value with type int"), - ([], 'ARRAY>', - "Array can't contain an array in ARRAY>"), - ([], 'struct', "Non-mapping value for type struct"), - ({}, 'struct', "No field value for x in struct"), - ({'x': 1, 'y': 1}, 'struct', "Extra data keys for struct"), - ([], 'array>', "Invalid struct field, xxx, in array>"), - ([], 'array<<>>', "Invalid parameter type, <>"), - ]) + ([], "ARRAY", "Invalid scalar type, INT, in ARRAY"), + ([], "x", "Invalid parameter type, x"), + ({}, "struct", "Invalid struct field, int, in struct"), + ( + {"x": 1}, + "struct", + "The given parameter type, int," + " for x is not a valid BigQuery scalar type, in struct.", + ), + ([], "x<", "Invalid parameter type, x<"), + (0, "ARRAY", "Array type with non-array-like value with type int"), + ( + [], + "ARRAY>", + "Array can't contain an array in ARRAY>", + ), + ([], "struct", "Non-mapping value for type struct"), + ({}, "struct", "No field value for x in struct"), + ({"x": 1, "y": 1}, "struct", "Extra data keys for struct"), + ([], "array>", "Invalid struct field, xxx, in array>"), + ([], "array<<>>", "Invalid parameter type, <>"), + ], +) def test_complex_query_parameter_type_errors(type_, value, expect): from google.cloud.bigquery.dbapi._helpers import complex_query_parameter from google.cloud.bigquery.dbapi import exceptions with pytest.raises( - exceptions.ProgrammingError, - match="^" + re.escape(expect) + "$", - ): + exceptions.ProgrammingError, match="^" + re.escape(expect) + "$", + ): complex_query_parameter("test", value, type_) @pytest.mark.parametrize( "parameters,parameter_types,expect", [ - ([[], dict(name='ch1', bdate=datetime.date(2021, 1, 1))], - ['ARRAY', 'struct'], - [ - {'parameterType': {'arrayType': {'type': 'INT64'}, - 'type': 'ARRAY'}, - 'parameterValue': {'arrayValues': []}}, - {'parameterType': {'structTypes': [{'name': 'name', - 'type': {'type': 'STRING'}}, - {'name': 'bdate', - 'type': {'type': 'DATE'}}], - 'type': 'STRUCT'}, - 'parameterValue': {'structValues': {'bdate': {'value': '2021-01-01'}, - 'name': {'value': 'ch1'}}}}, - ]), - (dict(ids=[], child=dict(name='ch1', bdate=datetime.date(2021, 1, 1))), - dict(ids='ARRAY', child='struct'), - [ - {'name': 'ids', - 'parameterType': {'arrayType': {'type': 'INT64'}, - 'type': 'ARRAY'}, - 'parameterValue': {'arrayValues': []}}, - {'name': 'child', - 'parameterType': {'structTypes': [{'name': 'name', - 'type': {'type': 'STRING'}}, - {'name': 'bdate', - 'type': {'type': 'DATE'}}], - 'type': 'STRUCT'}, - 'parameterValue': {'structValues': {'bdate': {'value': '2021-01-01'}, - 'name': {'value': 'ch1'}}}}, - ]), - ]) + ( + [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))], + ["ARRAY", "struct"], + [ + { + "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, + "parameterValue": {"arrayValues": []}, + }, + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + }, + ], + ), + ( + dict(ids=[], child=dict(name="ch1", bdate=datetime.date(2021, 1, 1))), + dict(ids="ARRAY", child="struct"), + [ + { + "name": "ids", + "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, + "parameterValue": {"arrayValues": []}, + }, + { + "name": "child", + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + }, + ], + ), + ], +) def test_to_query_parameters_complex_types(parameters, parameter_types, expect): from google.cloud.bigquery.dbapi._helpers import to_query_parameters - result = [ - p.to_api_repr() - for p in to_query_parameters(parameters, parameter_types) - ] + result = [p.to_api_repr() for p in to_query_parameters(parameters, parameter_types)] assert result == expect diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 90dddd517..026810aaf 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -819,18 +819,21 @@ def test__format_operation_no_placeholders(self): ), ( "values(%(foo:struct)s, %(bar)s)", - ("values(%(foo)s, %(bar)s)", - dict(foo="struct")), + ( + "values(%(foo)s, %(bar)s)", + dict(foo="struct"), + ), ), ( "values(%(foo:struct)s, %(bar)s)", - ("values(%(foo)s, %(bar)s)", - dict(foo="struct")), + ( + "values(%(foo)s, %(bar)s)", + dict(foo="struct"), + ), ), ( "values(%(foo:string(10))s, %(bar)s)", - ("values(%(foo)s, %(bar)s)", - dict(foo="string(10)")), + ("values(%(foo)s, %(bar)s)", dict(foo="string(10)")), ), ], ) From ef2b323ee11f89b1f9c1ffff58bdb976f46d7af5 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 15:27:17 -0400 Subject: [PATCH 09/21] using match.groups() throws off pytypes, also fix some type hints. --- google/cloud/bigquery/dbapi/_helpers.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 2ccd197c6..cb392ef67 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -19,6 +19,7 @@ import functools import numbers import re +import typing from google.cloud import bigquery from google.cloud.bigquery import table, enums, query @@ -156,7 +157,9 @@ def split_struct_fields(fields): yield field -def complex_query_parameter_type(name: str, type_: str, base: str): +def complex_query_parameter_type( + name: typing.Optional[str], type_: str, base: str +): type_ = type_.strip() if "<" not in type_: # Scalar @@ -176,7 +179,7 @@ def complex_query_parameter_type(name: str, type_: str, base: str): m = complex_query_parameter_parse(type_) if not m: raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}") - tname, sub = m.groups() + tname, sub = m.group(1, 2) tname = tname.upper() sub = sub.strip() if tname == "ARRAY": @@ -198,7 +201,9 @@ def complex_query_parameter_type(name: str, type_: str, base: str): return query.StructQueryParameterType(*fields, name=name) -def complex_query_parameter(name, value, type_, base=None): +def complex_query_parameter( + name: typing.Optional[str], value, type_: str, base: typing.Optional[str] = None +): """ Construct a query parameter for a complex type (array or struct record) @@ -224,7 +229,7 @@ def complex_query_parameter(name, value, type_, base=None): m = complex_query_parameter_parse(type_) if not m: raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}") - tname, sub = m.groups() + tname, sub = m.group(1, 2) tname = tname.upper() sub = sub.strip() if tname == "ARRAY": From 654b108e2059da4f0050611abbc9f984bc6f8e39 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 23 Jun 2021 19:28:50 +0000 Subject: [PATCH 10/21] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md --- google/cloud/bigquery/dbapi/_helpers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index cb392ef67..5aa07fd3c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -157,9 +157,7 @@ def split_struct_fields(fields): yield field -def complex_query_parameter_type( - name: typing.Optional[str], type_: str, base: str -): +def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): type_ = type_.strip() if "<" not in type_: # Scalar From 525b8fd9c23fb78064296fcbc6e36d7731c78718 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 15:35:45 -0400 Subject: [PATCH 11/21] blacken --- google/cloud/bigquery/dbapi/_helpers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index cb392ef67..5aa07fd3c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -157,9 +157,7 @@ def split_struct_fields(fields): yield field -def complex_query_parameter_type( - name: typing.Optional[str], type_: str, base: str -): +def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): type_ = type_.strip() if "<" not in type_: # Scalar From 462f2eb2ac15da1c4f3c5af91ca0ae5408bd84eb Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 15:57:21 -0400 Subject: [PATCH 12/21] remove type hints -- maybe they broke docs? --- google/cloud/bigquery/dbapi/_helpers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 5aa07fd3c..75ef12b7e 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -157,7 +157,7 @@ def split_struct_fields(fields): yield field -def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): +def complex_query_parameter_type(name, type_, base): type_ = type_.strip() if "<" not in type_: # Scalar @@ -199,9 +199,7 @@ def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: s return query.StructQueryParameterType(*fields, name=name) -def complex_query_parameter( - name: typing.Optional[str], value, type_: str, base: typing.Optional[str] = None -): +def complex_query_parameter(name, value, type_, base=None): """ Construct a query parameter for a complex type (array or struct record) From a6393e695936914a2cfeea50fa586ac65fc85501 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 16:17:34 -0400 Subject: [PATCH 13/21] Revert "remove type hints -- maybe they broke docs?" This reverts commit 462f2eb2ac15da1c4f3c5af91ca0ae5408bd84eb. --- google/cloud/bigquery/dbapi/_helpers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 75ef12b7e..5aa07fd3c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -157,7 +157,7 @@ def split_struct_fields(fields): yield field -def complex_query_parameter_type(name, type_, base): +def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): type_ = type_.strip() if "<" not in type_: # Scalar @@ -199,7 +199,9 @@ def complex_query_parameter_type(name, type_, base): return query.StructQueryParameterType(*fields, name=name) -def complex_query_parameter(name, value, type_, base=None): +def complex_query_parameter( + name: typing.Optional[str], value, type_: str, base: typing.Optional[str] = None +): """ Construct a query parameter for a complex type (array or struct record) From e63e8b7a06df91cfeae2160fd9ec4dcb55ba878c Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 23 Jun 2021 16:41:53 -0400 Subject: [PATCH 14/21] pin gcp-sphinx-docfx-yaml==0.2.0 so docfx doesn't fail. --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 0dfe7bf93..2bc2afde1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -300,7 +300,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml==0.2.0" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From 91b002851809527ec8e37b4cb3fdb16b1570059a Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 24 Jun 2021 13:06:45 -0400 Subject: [PATCH 15/21] Review comments: examples, and guard against large number of fields --- google/cloud/bigquery/dbapi/_helpers.py | 36 +++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 5aa07fd3c..064bf42c4 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -148,16 +148,35 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None): ).match -def split_struct_fields(fields): +def _split_struct_fields(fields): + # Split a string of struct fields. They're defined by commas, but + # we have to avoid splitting on commas interbal to fields. For + # example: + # name string, children array> + # + # only has 2 top-level fields. fields = fields.split(",") + fields = list(reversed(fields)) # in the off chance that there are very many while fields: - field = fields.pop(0) + field = fields.pop() while fields and field.count("<") != field.count(">"): - field += "," + fields.pop(0) + field += "," + fields.pop() yield field def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): + """Construct a parameter type (`StructQueryParameterType`) for a complex type + + or a non-complex type that's part of a complex type. + + Examples: + + array> + + struct>> + + This is used for computing array types. + """ type_ = type_.strip() if "<" not in type_: # Scalar @@ -186,7 +205,7 @@ def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: s ) else: fields = [] - for field_string in split_struct_fields(sub): + for field_string in _split_struct_fields(sub): field_string = field_string.strip() m = parse_struct_field(field_string) if not m: @@ -206,6 +225,13 @@ def complex_query_parameter( Construct a query parameter for a complex type (array or struct record) or for a subtype, which may not be complex + + Examples: + + array> + + struct>> + """ type_ = type_.strip() base = base or type_ @@ -251,7 +277,7 @@ def complex_query_parameter( if not isinstance(value, collections_abc.Mapping): raise exceptions.ProgrammingError(f"Non-mapping value for type {type_}") value_keys = set(value) - for field_string in split_struct_fields(sub): + for field_string in _split_struct_fields(sub): field_string = field_string.strip() m = parse_struct_field(field_string) if not m: From 35555aa69c10ea56fa2934fa63a1c6a71494241d Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 24 Jun 2021 17:08:21 +0000 Subject: [PATCH 16/21] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md --- google/cloud/bigquery/dbapi/_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 064bf42c4..1718a700c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -156,7 +156,7 @@ def _split_struct_fields(fields): # # only has 2 top-level fields. fields = fields.split(",") - fields = list(reversed(fields)) # in the off chance that there are very many + fields = list(reversed(fields)) # in the off chance that there are very many while fields: field = fields.pop() while fields and field.count("<") != field.count(">"): From 55543013d236c4e2a302c9e9478f6b75d63b8910 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 25 Jun 2021 09:57:29 -0400 Subject: [PATCH 17/21] Factored some repeated code in handling complex parameters --- google/cloud/bigquery/dbapi/_helpers.py | 193 ++++++++++++------------ tests/unit/test_dbapi__helpers.py | 7 +- 2 files changed, 104 insertions(+), 96 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 1718a700c..090c986f4 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -129,26 +129,17 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None): return bigquery.ArrayQueryParameter(name, array_type, value) -complex_query_parameter_parse = re.compile( - r""" - \s* - (ARRAY|STRUCT|RECORD) # Type - \s* - <([A-Z0-9<> ,()]+)> # Subtype(s) - \s*$ - """, - re.IGNORECASE | re.VERBOSE, -).match -parse_struct_field = re.compile( - r""" - (?:(\w+)\s+) # field name - ([A-Z0-9<> ,()]+) # Field type - $""", - re.VERBOSE | re.IGNORECASE, -).match - - -def _split_struct_fields(fields): +def _parse_struct_fields( + fields, + base, + parse_struct_field=re.compile( + r""" + (?:(\w+)\s+) # field name + ([A-Z0-9<> ,()]+) # Field type + $""", + re.VERBOSE | re.IGNORECASE, + ).match, +): # Split a string of struct fields. They're defined by commas, but # we have to avoid splitting on commas interbal to fields. For # example: @@ -161,23 +152,33 @@ def _split_struct_fields(fields): field = fields.pop() while fields and field.count("<") != field.count(">"): field += "," + fields.pop() - yield field - -def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): - """Construct a parameter type (`StructQueryParameterType`) for a complex type - - or a non-complex type that's part of a complex type. - - Examples: - - array> - - struct>> - - This is used for computing array types. - """ - type_ = type_.strip() + m = parse_struct_field(field.strip()) + if not m: + raise exceptions.ProgrammingError( + f"Invalid struct field, {field}, in {base}" + ) + yield m.group(1, 2) + + +SCALAR, ARRAY, STRUCT = "sar" + + +def _parse_type( + type_, + name, + base, + complex_query_parameter_parse=re.compile( + r""" + \s* + (ARRAY|STRUCT|RECORD) # Type + \s* + <([A-Z0-9<> ,()]+)> # Subtype(s) + \s*$ + """, + re.IGNORECASE | re.VERBOSE, + ).match, +): if "<" not in type_: # Scalar @@ -187,35 +188,57 @@ def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: s type_ = getattr(enums.SqlParameterScalarTypes, type_.upper()) except AttributeError: raise exceptions.ProgrammingError( - f"Invalid scalar type, {type_}, in {base}" + f"The given parameter type, {type_}," + f"{' for ' + name if name else ''}" + f" is not a valid BigQuery scalar type, in {base}." ) if name: type_ = type_.with_name(name) - return type_ + return SCALAR, type_ m = complex_query_parameter_parse(type_) if not m: raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}") tname, sub = m.group(1, 2) - tname = tname.upper() - sub = sub.strip() - if tname == "ARRAY": - return query.ArrayQueryParameterType( - complex_query_parameter_type(None, sub, base), name=name - ) + if tname.upper() == "ARRAY": + sub_type = complex_query_parameter_type(None, sub, base) + if isinstance(sub_type, query.ArrayQueryParameterType): + raise exceptions.ProgrammingError(f"Array can't contain an array in {base}") + sub_type._complex__src = sub + return ARRAY, sub_type else: - fields = [] - for field_string in _split_struct_fields(sub): - field_string = field_string.strip() - m = parse_struct_field(field_string) - if not m: - raise exceptions.ProgrammingError( - f"Invalid struct field, {field_string}, in {base}" - ) - field_name, field_type = m.groups() - fields.append(complex_query_parameter_type(field_name, field_type, base)) + return STRUCT, _parse_struct_fields(sub, base) - return query.StructQueryParameterType(*fields, name=name) + +def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): + """Construct a parameter type (`StructQueryParameterType`) for a complex type + + or a non-complex type that's part of a complex type. + + Examples: + + array> + + struct>> + + This is used for computing array types. + """ + + type_type, sub_type = _parse_type(type_, name, base) + if type_type == SCALAR: + type_ = sub_type + elif type_type == ARRAY: + type_ = query.ArrayQueryParameterType(sub_type, name=name) + elif type_type == STRUCT: + fields = [ + complex_query_parameter_type(field_name, field_type, base) + for field_name, field_type in sub_type + ] + type_ = query.StructQueryParameterType(*fields, name=name) + else: # pragma: NO COVER + raise AssertionError("Bad type_type", type_type) # Can't happen :) + + return type_ def complex_query_parameter( @@ -233,58 +256,34 @@ def complex_query_parameter( struct>> """ - type_ = type_.strip() base = base or type_ - if ">" not in type_: - # Scalar - - # Strip type parameters - type_ = type_parameters_re.sub("", type_).strip() - try: - type_ = getattr(enums.SqlParameterScalarTypes, type_.upper())._type - except AttributeError: - raise exceptions.ProgrammingError( - f"The given parameter type, {type_}," - f" for {name} is not a valid BigQuery scalar type, in {base}." - ) - return query.ScalarQueryParameter(name, type_, value) + type_type, sub_type = _parse_type(type_, name, base) - m = complex_query_parameter_parse(type_) - if not m: - raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}") - tname, sub = m.group(1, 2) - tname = tname.upper() - sub = sub.strip() - if tname == "ARRAY": + if type_type == SCALAR: + param = query.ScalarQueryParameter(name, sub_type._type, value) + elif type_type == ARRAY: if not array_like(value): raise exceptions.ProgrammingError( f"Array type with non-array-like value" f" with type {type(value).__name__}" ) - array_type = complex_query_parameter_type(name, sub, base) - if isinstance(array_type, query.ArrayQueryParameterType): - raise exceptions.ProgrammingError(f"Array can't contain an array in {base}") - return query.ArrayQueryParameter( + param = query.ArrayQueryParameter( name, - array_type, - [complex_query_parameter(None, v, sub, base) for v in value] - if "<" in sub - else value, + sub_type, + value + if isinstance(sub_type, query.ScalarQueryParameterType) + else [ + complex_query_parameter(None, v, sub_type._complex__src, base) + for v in value + ], ) - else: - fields = [] + elif type_type == STRUCT: if not isinstance(value, collections_abc.Mapping): raise exceptions.ProgrammingError(f"Non-mapping value for type {type_}") value_keys = set(value) - for field_string in _split_struct_fields(sub): - field_string = field_string.strip() - m = parse_struct_field(field_string) - if not m: - raise exceptions.ProgrammingError( - f"Invalid struct field, {field_string}, in {base or type_}" - ) - field_name, field_type = m.groups() + fields = [] + for field_name, field_type in sub_type: if field_name not in value: raise exceptions.ProgrammingError( f"No field value for {field_name} in {type_}" @@ -296,7 +295,11 @@ def complex_query_parameter( if value_keys: raise exceptions.ProgrammingError(f"Extra data keys for {type_}") - return query.StructQueryParameter(name, *fields) + param = query.StructQueryParameter(name, *fields) + else: # pragma: NO COVER + raise AssertionError("Bad type_type", type_type) # Can't happen :) + + return param def to_query_parameters_list(parameters, parameter_types): diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index f5b02d760..841bf3e03 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -566,7 +566,12 @@ def test_complex_query_parameter_type(type_, value, expect): @pytest.mark.parametrize( "value,type_,expect", [ - ([], "ARRAY", "Invalid scalar type, INT, in ARRAY"), + ( + [], + "ARRAY", + "The given parameter type, INT," + " is not a valid BigQuery scalar type, in ARRAY.", + ), ([], "x", "Invalid parameter type, x"), ({}, "struct", "Invalid struct field, int, in struct"), ( From 8830113ac2e169f763db5f13831a9836c7071384 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 25 Jun 2021 10:55:21 -0400 Subject: [PATCH 18/21] Improved the error for dict (structish) parameter values without explicit struct type and also factored some repeated code --- google/cloud/bigquery/dbapi/_helpers.py | 62 ++++++++++--------------- tests/unit/test_dbapi__helpers.py | 29 +++++++++++- 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 090c986f4..c0e65f74e 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -302,6 +302,24 @@ def complex_query_parameter( return param +def _dispatch_parameter(type_, value, name=None): + if type_ is not None and "<" in type_: + param = complex_query_parameter(name, value, type_) + elif isinstance(value, collections_abc.Mapping): + raise NotImplementedError( + f"STRUCT-like parameter values are not supported" + f"{' (parameter ' + name + ')' if name else ''}," + f" unless an explicit type is give in the parameter placeholder" + f" (e.g. '%({name if name else ''}:struct<...>)s')." + ) + elif array_like(value): + param = array_to_query_parameter(value, name, type_) + else: + param = scalar_to_query_parameter(value, name, type_) + + return param + + def to_query_parameters_list(parameters, parameter_types): """Converts a sequence of parameter values into query parameters. @@ -315,21 +333,9 @@ def to_query_parameters_list(parameters, parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of query parameters. """ - result = [] - - for value, type_ in zip(parameters, parameter_types): - if type_ is not None and "<" in type_: - param = complex_query_parameter(None, value, type_) - elif isinstance(value, collections_abc.Mapping): - raise NotImplementedError("STRUCT-like parameter values are not supported.") - elif array_like(value): - param = array_to_query_parameter(value, None, type_) - else: - param = scalar_to_query_parameter(value, None, type_) - - result.append(param) - - return result + return [_dispatch_parameter(type_, value) + for value, type_ in zip(parameters, parameter_types) + ] def to_query_parameters_dict(parameters, query_parameter_types): @@ -345,29 +351,9 @@ def to_query_parameters_dict(parameters, query_parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of named query parameters. """ - result = [] - - for name, value in parameters.items(): - query_parameter_type = query_parameter_types.get(name) - if query_parameter_type is not None and "<" in query_parameter_type: - param = complex_query_parameter(name, value, query_parameter_type) - elif isinstance(value, collections_abc.Mapping): - raise NotImplementedError( - "STRUCT-like parameter values are not supported " - "(parameter {}).".format(name) - ) - elif array_like(value): - param = array_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type - ) - else: - param = scalar_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type, - ) - - result.append(param) - - return result + return [_dispatch_parameter(query_parameter_types.get(name), value, name) + for name, value in parameters.items() + ] def to_query_parameters(parameters, parameter_types): diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 841bf3e03..f616b55d5 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -563,6 +563,10 @@ def test_complex_query_parameter_type(type_, value, expect): assert param == expect +def _expected_error_match(expect): + return "^" + re.escape(expect) + "$" + + @pytest.mark.parametrize( "value,type_,expect", [ @@ -599,11 +603,10 @@ def test_complex_query_parameter_type_errors(type_, value, expect): from google.cloud.bigquery.dbapi import exceptions with pytest.raises( - exceptions.ProgrammingError, match="^" + re.escape(expect) + "$", + exceptions.ProgrammingError, match=_expected_error_match(expect), ): complex_query_parameter("test", value, type_) - @pytest.mark.parametrize( "parameters,parameter_types,expect", [ @@ -666,3 +669,25 @@ def test_to_query_parameters_complex_types(parameters, parameter_types, expect): result = [p.to_api_repr() for p in to_query_parameters(parameters, parameter_types)] assert result == expect + +def test_to_query_parameters_struct_error(): + from google.cloud.bigquery.dbapi._helpers import to_query_parameters + + with pytest.raises( + NotImplementedError, + match=_expected_error_match( + "STRUCT-like parameter values are not supported, " + "unless an explicit type is give in the parameter placeholder " + "(e.g. '%(:struct<...>)s').") + ): + to_query_parameters([dict(x=1)], [None]) + + with pytest.raises( + NotImplementedError, + match=_expected_error_match( + "STRUCT-like parameter values are not supported (parameter foo), " + "unless an explicit type is give in the parameter placeholder " + "(e.g. '%(foo:struct<...>)s').") + ): + to_query_parameters(dict(foo=dict(x=1)), {}) + From a72cafba0d3cf05734f9544886f6d68e50bbd6b3 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 25 Jun 2021 10:56:33 -0400 Subject: [PATCH 19/21] blacken --- google/cloud/bigquery/dbapi/_helpers.py | 16 +++++++++------- tests/unit/test_dbapi__helpers.py | 13 ++++++++----- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index c0e65f74e..e44ecb2a7 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -311,7 +311,7 @@ def _dispatch_parameter(type_, value, name=None): f"{' (parameter ' + name + ')' if name else ''}," f" unless an explicit type is give in the parameter placeholder" f" (e.g. '%({name if name else ''}:struct<...>)s')." - ) + ) elif array_like(value): param = array_to_query_parameter(value, name, type_) else: @@ -333,9 +333,10 @@ def to_query_parameters_list(parameters, parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of query parameters. """ - return [_dispatch_parameter(type_, value) - for value, type_ in zip(parameters, parameter_types) - ] + return [ + _dispatch_parameter(type_, value) + for value, type_ in zip(parameters, parameter_types) + ] def to_query_parameters_dict(parameters, query_parameter_types): @@ -351,9 +352,10 @@ def to_query_parameters_dict(parameters, query_parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of named query parameters. """ - return [_dispatch_parameter(query_parameter_types.get(name), value, name) - for name, value in parameters.items() - ] + return [ + _dispatch_parameter(query_parameter_types.get(name), value, name) + for name, value in parameters.items() + ] def to_query_parameters(parameters, parameter_types): diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index f616b55d5..b33203354 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -607,6 +607,7 @@ def test_complex_query_parameter_type_errors(type_, value, expect): ): complex_query_parameter("test", value, type_) + @pytest.mark.parametrize( "parameters,parameter_types,expect", [ @@ -670,6 +671,7 @@ def test_to_query_parameters_complex_types(parameters, parameter_types, expect): result = [p.to_api_repr() for p in to_query_parameters(parameters, parameter_types)] assert result == expect + def test_to_query_parameters_struct_error(): from google.cloud.bigquery.dbapi._helpers import to_query_parameters @@ -678,8 +680,9 @@ def test_to_query_parameters_struct_error(): match=_expected_error_match( "STRUCT-like parameter values are not supported, " "unless an explicit type is give in the parameter placeholder " - "(e.g. '%(:struct<...>)s').") - ): + "(e.g. '%(:struct<...>)s')." + ), + ): to_query_parameters([dict(x=1)], [None]) with pytest.raises( @@ -687,7 +690,7 @@ def test_to_query_parameters_struct_error(): match=_expected_error_match( "STRUCT-like parameter values are not supported (parameter foo), " "unless an explicit type is give in the parameter placeholder " - "(e.g. '%(foo:struct<...>)s').") - ): + "(e.g. '%(foo:struct<...>)s')." + ), + ): to_query_parameters(dict(foo=dict(x=1)), {}) - From cba9697d5b545198b271430643c78ea56a73a1c4 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 25 Jun 2021 11:00:35 -0400 Subject: [PATCH 20/21] removed repeated word --- docs/dbapi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/dbapi.rst b/docs/dbapi.rst index 299f71911..81f000bc7 100644 --- a/docs/dbapi.rst +++ b/docs/dbapi.rst @@ -25,7 +25,7 @@ and using named parameters:: Providing explicit type information ----------------------------------- -BigQuery requires type information for parameters. The The BigQuery +BigQuery requires type information for parameters. The BigQuery DB-API can usually determine parameter types for parameters based on provided values. Sometimes, however, types can't be determined (for example when `None` is passed) or are determined incorrectly (for From 12bd9419f59a2443605e79f194adfa786cf93cd7 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 29 Jun 2021 13:59:34 -0400 Subject: [PATCH 21/21] Update google/cloud/bigquery/dbapi/_helpers.py Co-authored-by: Tim Swast --- google/cloud/bigquery/dbapi/_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index e44ecb2a7..9c134b47c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -141,7 +141,7 @@ def _parse_struct_fields( ).match, ): # Split a string of struct fields. They're defined by commas, but - # we have to avoid splitting on commas interbal to fields. For + # we have to avoid splitting on commas internal to fields. For # example: # name string, children array> #