diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index c07f148f0..2567653c0 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: - image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d + image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest + digest: sha256:87eee22d276554e4e52863ec9b1cb6a7245815dfae20439712bf644348215a5a diff --git a/.github/.OwlBot.yaml b/.github/.OwlBot.yaml index 2b6451c19..e54051157 100644 --- a/.github/.OwlBot.yaml +++ b/.github/.OwlBot.yaml @@ -13,7 +13,7 @@ # limitations under the License. docker: - image: gcr.io/repo-automation-bots/owlbot-python:latest + image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest deep-remove-regex: - /owl-bot-staging diff --git a/.kokoro/continuous/prerelease-deps-3.8.cfg b/.kokoro/continuous/prerelease-deps-3.8.cfg new file mode 100644 index 000000000..3595fb43f --- /dev/null +++ b/.kokoro/continuous/prerelease-deps-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3cb6bee..d531ec477 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,33 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27) + + +### Bug Fixes + +* remove py.typed since package fails mypy check ([#988](https://www.github.com/googleapis/python-bigquery/issues/988)) ([39030f2](https://www.github.com/googleapis/python-bigquery/commit/39030f26ce081cfacd456b84694c68e3f04ed48d)) + +## [2.27.0](https://www.github.com/googleapis/python-bigquery/compare/v2.26.0...v2.27.0) (2021-09-24) + + +### Features + +* Add py.typed for PEP 561 compliance ([#976](https://www.github.com/googleapis/python-bigquery/issues/976)) ([96e6bee](https://www.github.com/googleapis/python-bigquery/commit/96e6beef3c63b663b7e5879b1458f4dd1a47a5b5)) +* include key metadata in Job representation ([#964](https://www.github.com/googleapis/python-bigquery/issues/964)) ([acca1cb](https://www.github.com/googleapis/python-bigquery/commit/acca1cb7baaa3b00508246c994ade40314d421c3)) + + +### Bug Fixes + +* Arrow extension-type metadata was not set when calling the REST API or when there are no rows ([#946](https://www.github.com/googleapis/python-bigquery/issues/946)) ([864383b](https://www.github.com/googleapis/python-bigquery/commit/864383bc01636b3774f7da194587b8b7edd0383d)) +* disambiguate missing policy tags from explicitly unset policy tags ([#983](https://www.github.com/googleapis/python-bigquery/issues/983)) ([f83c00a](https://www.github.com/googleapis/python-bigquery/commit/f83c00acead70fc0ce9959eefb133a672d816277)) +* remove default timeout ([#974](https://www.github.com/googleapis/python-bigquery/issues/974)) ([1cef0d4](https://www.github.com/googleapis/python-bigquery/commit/1cef0d4664bf448168b26487a71795144b7f4d6b)) + + +### Documentation + +* simplify destination table sample with f-strings ([#966](https://www.github.com/googleapis/python-bigquery/issues/966)) ([ab6e76f](https://www.github.com/googleapis/python-bigquery/commit/ab6e76f9489262fd9c1876a1c4f93d7e139aa999)) + ## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) diff --git a/README.rst b/README.rst index 8454cf9c0..d0ad059a2 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ processing power of Google's infrastructure. - `Product Documentation`_ .. |GA| image:: https://img.shields.io/badge/support-GA-gold.svg - :target: https://github.com/googleapis/google-cloud-python/blob/master/README.rst#general-availability + :target: https://github.com/googleapis/google-cloud-python/blob/main/README.rst#general-availability .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg diff --git a/docs/conf.py b/docs/conf.py index b8ddbd8c8..9545d06ed 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -363,7 +363,7 @@ "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.github.io/grpc/python/", None), - "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), "geopandas": ("https://geopandas.org/", None), } diff --git a/docs/reference.rst b/docs/reference.rst index 128dee718..8fff2e68f 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -128,6 +128,7 @@ Schema :toctree: generated schema.SchemaField + schema.PolicyTagList Query diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 660a660b4..ec8f68af0 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -87,6 +87,7 @@ from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType +from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.standard_sql import StandardSqlDataType from google.cloud.bigquery.standard_sql import StandardSqlField @@ -145,6 +146,7 @@ "RoutineReference", # Shared helpers "SchemaField", + "PolicyTagList", "UDFResource", "ExternalConfig", "BigtableOptions", diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index e7f5bd59b..f400f9b70 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -604,8 +604,9 @@ def _get_sub_prop(container, keys, default=None): container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): - A sequence of keys to attempt to get the value for. Each item in - the sequence represents a deeper nesting. The first key is for + A sequence of keys to attempt to get the value for. If ``keys`` is a + string, it is treated as sequence containing a single string key. Each item + in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. default (Optional[object]): @@ -632,6 +633,9 @@ def _get_sub_prop(container, keys, default=None): Returns: object: The value if present or the default. """ + if isinstance(keys, str): + keys = [keys] + sub_val = container for key in keys: if key not in sub_val: @@ -647,8 +651,9 @@ def _set_sub_prop(container, keys, value): container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): - A sequence of keys to attempt to set the value for. Each item in - the sequence represents a deeper nesting. The first key is for + A sequence of keys to attempt to set the value for. If ``keys`` is a + string, it is treated as sequence containing a single string key. Each item + in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. value (object): Value to set within the container. @@ -675,6 +680,9 @@ def _set_sub_prop(container, keys, value): >>> container {'key': {'subkey': 'new'}} """ + if isinstance(keys, str): + keys = [keys] + sub_val = container for key in keys[:-1]: if key not in sub_val: diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index a627f5226..b034c0fd1 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -183,6 +183,13 @@ def pyarrow_timestamp(): # the type ID matters, and it's the same for all decimal256 instances. pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", } +BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { + "GEOGRAPHY": { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + }, + "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"}, +} def bq_to_arrow_struct_data_type(field): @@ -233,7 +240,12 @@ def bq_to_arrow_field(bq_field, array_type=None): if array_type is not None: arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" - return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) + metadata = BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA.get( + bq_field.field_type.upper() if bq_field.field_type else "" + ) + return pyarrow.field( + bq_field.name, arrow_type, nullable=is_nullable, metadata=metadata + ) warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) return None diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index e5fc592a6..698181092 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -357,6 +357,11 @@ def reservation_usage(self): def transaction_info(self) -> Optional[TransactionInfo]: """Information of the multi-statement transaction if this job is part of one. + Since a scripting query job can execute multiple transactions, this + property is only expected on child jobs. Use the + :meth:`google.cloud.bigquery.client.Client.list_jobs` method with the + ``parent_job`` parameter to iterate over child jobs. + .. versionadded:: 2.24.0 """ info = self._properties.get("statistics", {}).get("transactionInfo") @@ -722,6 +727,14 @@ def cancelled(self): and self.error_result.get("reason") == _STOPPED_REASON ) + def __repr__(self): + result = ( + f"{self.__class__.__name__}<" + f"project={self.project}, location={self.location}, id={self.job_id}" + ">" + ) + return result + class _JobConfig(object): """Abstract base class for job configuration objects. diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 18b7b13ec..2f8dfbb8b 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -174,6 +174,20 @@ def label_columns(self) -> Sequence[standard_sql.StandardSqlField]: """ return self._properties.get("labelColumns", []) + @property + def best_trial_id(self) -> Optional[int]: + """The best trial_id across all training runs. + + .. deprecated:: + This property is deprecated! + + Read-only. + """ + value = self._properties.get("bestTrialId") + if value is not None: + value = int(value) + return value + @property def expires(self) -> Optional[datetime.datetime]: """The datetime when this model expires. diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 830582322..8a86973cd 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -60,7 +60,7 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ -DEFAULT_TIMEOUT = 5.0 * 60.0 +DEFAULT_TIMEOUT = None """The default API timeout. This is the time to wait per request. To adjust the total wait time, set a diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index b52e288f4..f221e65a8 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,13 +15,13 @@ """Schemas for BigQuery tables / queries.""" import collections -from typing import Optional +import enum +from typing import Iterable, Union from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames -_DEFAULT_VALUE = object() _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: @@ -50,47 +50,62 @@ """String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" +class _DefaultSentinel(enum.Enum): + """Object used as 'sentinel' indicating default value should be used. + + Uses enum so that pytype/mypy knows that this is the only possible value. + https://stackoverflow.com/a/60605919/101923 + + Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8. + https://docs.python.org/3/library/typing.html#typing.Literal + """ + + DEFAULT_VALUE = object() + + +_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE + + class SchemaField(object): """Describe a single field within a table schema. Args: - name (str): The name of the field. + name: The name of the field. - field_type (str): The type of the field. See + field_type: + The type of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type - mode (Optional[str]): The mode of the field. See + mode: + Defaults to ``'NULLABLE'``. The mode of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode - description (Optional[str]): Description for the field. + description: Description for the field. - fields (Optional[Tuple[google.cloud.bigquery.schema.SchemaField]]): - Subfields (requires ``field_type`` of 'RECORD'). + fields: Subfields (requires ``field_type`` of 'RECORD'). - policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + policy_tags: The policy tag list for the field. - precision (Optional[int]): + precision: Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. - scale (Optional[int]): + scale: Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. - max_length (Optional[int]): - Maximim length of fields with STRING or BYTES type. - + max_length: Maximum length of fields with STRING or BYTES type. """ def __init__( self, - name, - field_type, - mode="NULLABLE", - description=_DEFAULT_VALUE, - fields=(), - policy_tags=None, - precision=_DEFAULT_VALUE, - scale=_DEFAULT_VALUE, - max_length=_DEFAULT_VALUE, + name: str, + field_type: str, + mode: str = "NULLABLE", + description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, + fields: Iterable["SchemaField"] = (), + policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, + precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, ): self._properties = { "name": name, @@ -106,28 +121,12 @@ def __init__( self._properties["scale"] = scale if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length + if policy_tags is not _DEFAULT_VALUE: + self._properties["policyTags"] = ( + policy_tags.to_api_repr() if policy_tags is not None else None + ) self._fields = tuple(fields) - self._policy_tags = self._determine_policy_tags(field_type, policy_tags) - - @staticmethod - def _determine_policy_tags( - field_type: str, given_policy_tags: Optional["PolicyTagList"] - ) -> Optional["PolicyTagList"]: - """Return the given policy tags, or their suitable representation if `None`. - - Args: - field_type: The type of the schema field. - given_policy_tags: The policy tags to maybe ajdust. - """ - if given_policy_tags is not None: - return given_policy_tags - - if field_type is not None and field_type.upper() in _STRUCT_TYPES: - return None - - return PolicyTagList() - @staticmethod def __get_int(api_repr, name): v = api_repr.get(name, _DEFAULT_VALUE) @@ -153,10 +152,10 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) + policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) - policy_tags = cls._determine_policy_tags( - field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags")) - ) + if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: + policy_tags = PolicyTagList.from_api_repr(policy_tags) return cls( field_type=field_type, @@ -231,7 +230,8 @@ def policy_tags(self): """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list definition for this field. """ - return self._policy_tags + resource = self._properties.get("policyTags") + return PolicyTagList.from_api_repr(resource) if resource is not None else None def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. @@ -245,10 +245,6 @@ def to_api_repr(self) -> dict: # add this to the serialized representation. if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] - else: - # Explicitly include policy tag definition (we must not do it for RECORD - # fields, because those are not leaf fields). - answer["policyTags"] = self.policy_tags.to_api_repr() # Done; return the serialized dictionary. return answer @@ -273,7 +269,7 @@ def _key(self): field_type = f"{field_type}({self.precision})" policy_tags = ( - () if self._policy_tags is None else tuple(sorted(self._policy_tags.names)) + () if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) return ( diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c44289324..75901afb4 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -64,6 +64,7 @@ import pandas import geopandas from google.cloud import bigquery_storage + from google.cloud.bigquery.dataset import DatasetReference _NO_PANDAS_ERROR = ( @@ -117,45 +118,95 @@ def _view_use_legacy_sql_getter(table): return True -class TableReference(object): +class _TableBase: + """Base class for Table-related classes with common functionality.""" + + _PROPERTY_TO_API_FIELD = { + "dataset_id": ["tableReference", "datasetId"], + "project": ["tableReference", "projectId"], + "table_id": ["tableReference", "tableId"], + } + + def __init__(self): + self._properties = {} + + @property + def project(self) -> str: + """Project bound to the table.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) + + @property + def dataset_id(self) -> str: + """ID of dataset containing the table.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) + + @property + def table_id(self) -> str: + """The table ID.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"] + ) + + @property + def path(self) -> str: + """URL path for the table's APIs.""" + return ( + f"/projects/{self.project}/datasets/{self.dataset_id}" + f"/tables/{self.table_id}" + ) + + def __eq__(self, other): + if isinstance(other, _TableBase): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + + +class TableReference(_TableBase): """TableReferences are pointers to tables. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference Args: - dataset_ref (google.cloud.bigquery.dataset.DatasetReference): + dataset_ref: A pointer to the dataset - table_id (str): The ID of the table + table_id: + The ID of the table """ - def __init__(self, dataset_ref, table_id): - self._project = dataset_ref.project - self._dataset_id = dataset_ref.dataset_id - self._table_id = table_id - - @property - def project(self): - """str: Project bound to the table""" - return self._project - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._dataset_id + _PROPERTY_TO_API_FIELD = { + "dataset_id": "datasetId", + "project": "projectId", + "table_id": "tableId", + } - @property - def table_id(self): - """str: The table ID.""" - return self._table_id + def __init__(self, dataset_ref: "DatasetReference", table_id: str): + self._properties = {} - @property - def path(self): - """str: URL path for the table's APIs.""" - return "/projects/%s/datasets/%s/tables/%s" % ( - self._project, - self._dataset_id, - self._table_id, + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["project"], + dataset_ref.project, + ) + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["dataset_id"], + dataset_ref.dataset_id, + ) + _helpers._set_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"], table_id, ) @classmethod @@ -216,6 +267,7 @@ def from_api_repr(cls, resource: dict) -> "TableReference": project = resource["projectId"] dataset_id = resource["datasetId"] table_id = resource["tableId"] + return cls(DatasetReference(project, dataset_id), table_id) def to_api_repr(self) -> dict: @@ -224,11 +276,7 @@ def to_api_repr(self) -> dict: Returns: Dict[str, object]: Table reference represented as an API resource """ - return { - "projectId": self._project, - "datasetId": self._dataset_id, - "tableId": self._table_id, - } + return copy.deepcopy(self._properties) def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. @@ -248,54 +296,25 @@ def to_bqstorage(self) -> str: str: A reference to this table in the BigQuery Storage API. """ - table_id, _, _ = self._table_id.partition("@") + table_id, _, _ = self.table_id.partition("@") table_id, _, _ = table_id.partition("$") - table_ref = "projects/{}/datasets/{}/tables/{}".format( - self._project, self._dataset_id, table_id, + table_ref = ( + f"projects/{self.project}/datasets/{self.dataset_id}/tables/{table_id}" ) - return table_ref - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - Tuple[str]: The contents of this :class:`DatasetReference`. - """ - return (self._project, self._dataset_id, self._table_id) - - def __eq__(self, other): - if isinstance(other, (Table, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - elif isinstance(other, TableReference): - return self._key() == other._key() - else: - return NotImplemented - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(self._key()) - def __str__(self): return f"{self.project}.{self.dataset_id}.{self.table_id}" def __repr__(self): from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference(self._project, self._dataset_id) - return "TableReference({}, '{}')".format(repr(dataset_ref), self._table_id) + dataset_ref = DatasetReference(self.project, self.dataset_id) + return f"TableReference({dataset_ref!r}, '{self.table_id}')" -class Table(object): +class Table(_TableBase): """Tables represent a set of rows whose values correspond to a schema. See @@ -316,9 +335,9 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { + **_TableBase._PROPERTY_TO_API_FIELD, "clustering_fields": "clustering", "created": "creationTime", - "dataset_id": ["tableReference", "datasetId"], "description": "description", "encryption_configuration": "encryptionConfiguration", "etag": "etag", @@ -337,14 +356,12 @@ class Table(object): "num_rows": "numRows", "partition_expiration": "timePartitioning", "partitioning_type": "timePartitioning", - "project": ["tableReference", "projectId"], "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", "schema": "schema", "snapshot_definition": "snapshotDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", - "table_id": ["tableReference", "tableId"], "time_partitioning": "timePartitioning", "type": "type", "view_use_legacy_sql": "view", @@ -359,38 +376,8 @@ def __init__(self, table_ref, schema=None): if schema is not None: self.schema = schema - @property - def project(self): - """str: Project bound to the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["project"] - ) - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] - ) - - @property - def table_id(self): - """str: ID of the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["table_id"] - ) - reference = property(_reference_getter) - @property - def path(self): - """str: URL path for the table's APIs.""" - return "/projects/%s/datasets/%s/tables/%s" % ( - self.project, - self.dataset_id, - self.table_id, - ) - @property def require_partition_filter(self): """bool: If set to true, queries over the partitioned table require a @@ -1031,29 +1018,11 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) - def __eq__(self, other): - if isinstance(other, Table): - return ( - self._properties["tableReference"] - == other._properties["tableReference"] - ) - elif isinstance(other, (TableReference, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - else: - return NotImplemented - - def __hash__(self): - return hash((self.project, self.dataset_id, self.table_id)) - def __repr__(self): return "Table({})".format(repr(self.reference)) -class TableListItem(object): +class TableListItem(_TableBase): """A read-only table resource from a list operation. For performance reasons, the BigQuery API only includes some of the table @@ -1117,21 +1086,6 @@ def expires(self): 1000.0 * float(expiration_time) ) - @property - def project(self): - """str: Project bound to the table.""" - return self._properties["tableReference"]["projectId"] - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._properties["tableReference"]["datasetId"] - - @property - def table_id(self): - """str: ID of the table.""" - return self._properties["tableReference"]["tableId"] - reference = property(_reference_getter) @property @@ -1267,19 +1221,6 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) - def __eq__(self, other): - if isinstance(other, (Table, TableReference, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - else: - return NotImplemented - - def __hash__(self): - return hash((self.project, self.dataset_id, self.table_id)) - def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. @@ -1783,10 +1724,14 @@ def to_arrow( if owns_bqstorage_client: bqstorage_client._transport.grpc_channel.close() - if record_batches: + if record_batches and bqstorage_client is not None: return pyarrow.Table.from_batches(record_batches) else: - # No records, use schema based on BigQuery schema. + # No records (not record_batches), use schema based on BigQuery schema + # **or** + # we used the REST API (bqstorage_client is None), + # which doesn't add arrow extension metadata, so we let + # `bq_to_arrow_schema` do it. arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) @@ -2225,7 +2170,10 @@ def to_geodataframe( """ if geopandas is None: raise ValueError(_NO_GEOPANDAS_ERROR) - return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + + # Since an empty GeoDataFrame has no geometry column, we do not CRS on it, + # because that's deprecated. + return geopandas.GeoDataFrame() def to_dataframe_iterable( self, diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 1f7d79ab9..3e5c77ede 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.26.0" +__version__ = "2.27.1" diff --git a/noxfile.py b/noxfile.py index dbf6a163c..d41573407 100644 --- a/noxfile.py +++ b/noxfile.py @@ -16,6 +16,7 @@ import pathlib import os +import re import shutil import nox @@ -97,6 +98,13 @@ def unit(session): @nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) def unit_noextras(session): """Run the unit test suite.""" + + # Install optional dependencies that are out-of-date. + # https://github.com/googleapis/python-bigquery/issues/933 + # There is no pyarrow 1.0.0 package for Python 3.9. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + session.install("pyarrow==1.0.0") + default(session, install_extras=False) @@ -205,9 +213,31 @@ def prerelease_deps(session): # PyArrow prerelease packages are published to an alternative PyPI host. # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages session.install( - "--extra-index-url", "https://pypi.fury.io/arrow-nightlies/", "--pre", "pyarrow" + "--extra-index-url", + "https://pypi.fury.io/arrow-nightlies/", + "--prefer-binary", + "--pre", + "--upgrade", + "pyarrow", + ) + session.install( + "--extra-index-url", + "https://pypi.anaconda.org/scipy-wheels-nightly/simple", + "--prefer-binary", + "--pre", + "--upgrade", + "pandas", + ) + + session.install( + "--pre", + "--upgrade", + "google-api-core", + "google-cloud-bigquery-storage", + "google-cloud-core", + "google-resumable-media", + "grpcio", ) - session.install("--pre", "grpcio", "pandas") session.install( "freezegun", "google-cloud-datacatalog", @@ -219,7 +249,30 @@ def prerelease_deps(session): "pytest", "pytest-cov", ) - session.install("-e", ".[all]") + + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + # We use --no-deps to ensure that pre-release versions aren't overwritten + # by the version ranges in setup.py. + session.install(*deps) + session.install("--no-deps", "-e", ".[all]") # Print out prerelease package versions. session.run("python", "-c", "import grpc; print(grpc.__version__)") diff --git a/owlbot.py b/owlbot.py index be493957e..dd9255d2f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -30,7 +30,7 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": "http://pandas.pydata.org/pandas-docs/dev", + "pandas": "http://pandas.pydata.org/pandas-docs/stable/", "geopandas": "https://geopandas.org/", }, ) @@ -42,6 +42,7 @@ "noxfile.py", "docs/multiprocessing.rst", ".coveragerc", + ".github/CODEOWNERS", # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8fb578018..46162762c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -2,7 +2,7 @@ attrs==21.2.0 cachetools==4.2.2 certifi==2021.5.30 cffi==1.14.6 -charset-normalizer==2.0.4 +charset-normalizer==2.0.6 click==8.0.1 click-plugins==1.1.1 cligj==0.7.2 @@ -11,17 +11,17 @@ Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 google-api-core==2.0.1 -google-auth==2.0.2 -google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.7.0 +google-auth==2.2.0 +google-cloud-bigquery==2.27.1 +google-cloud-bigquery-storage==2.9.0 google-cloud-core==2.0.0 -google-crc32c==1.1.2 -google-resumable-media==2.0.2 +google-crc32c==1.2.0 +google-resumable-media==2.0.3 googleapis-common-protos==1.53.0 -grpcio==1.39.0 +grpcio==1.41.0 idna==3.2 importlib-metadata==4.8.1 -libcst==0.3.20 +libcst==0.3.21 munch==2.5.0 mypy-extensions==0.4.3 numpy==1.19.5; python_version < "3.7" @@ -29,8 +29,6 @@ numpy==1.21.2; python_version > "3.6" packaging==21.0 pandas==1.1.5; python_version < '3.7' pandas==1.3.2; python_version >= '3.7' -proto-plus==1.19.0 -protobuf==3.17.3 pyarrow==5.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 @@ -47,5 +45,5 @@ Shapely==1.7.1 six==1.16.0 typing-extensions==3.10.0.2 typing-inspect==0.7.1 -urllib3==1.26.6 +urllib3==1.26.7 zipp==3.5.0 diff --git a/samples/snippets/natality_tutorial.py b/samples/snippets/natality_tutorial.py index a8d90501a..ed08b279a 100644 --- a/samples/snippets/natality_tutorial.py +++ b/samples/snippets/natality_tutorial.py @@ -38,12 +38,12 @@ def run_natality_tutorial(override_values={}): # Prepare a reference to a new dataset for storing the query results. dataset_id = "natality_regression" - dataset_id_full = "{}.{}".format(client.project, dataset_id) + dataset_id_full = f"{client.project}.{dataset_id}" # [END bigquery_query_natality_tutorial] # To facilitate testing, we replace values with alternatives # provided by the testing harness. dataset_id = override_values.get("dataset_id", dataset_id) - dataset_id_full = "{}.{}".format(client.project, dataset_id) + dataset_id_full = f"{client.project}.{dataset_id}" # [START bigquery_query_natality_tutorial] dataset = bigquery.Dataset(dataset_id_full) @@ -51,15 +51,13 @@ def run_natality_tutorial(override_values={}): # Create the new BigQuery dataset. dataset = client.create_dataset(dataset) - # In the new BigQuery dataset, create a reference to a new table for - # storing the query results. - table_ref = dataset.table("regression_input") - # Configure the query job. job_config = bigquery.QueryJobConfig() - # Set the destination table to the table reference created above. - job_config.destination = table_ref + # Set the destination table to where you want to store query results. + # As of google-cloud-bigquery 1.11.0, a fully qualified table ID can be + # used in place of a TableReference. + job_config.destination = f"{dataset_id_full}.regression_input" # Set up a query in Standard SQL, which is the default for the BigQuery # Python client library. diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index e096af157..f9b9d023c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ -google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.7.0 +google-cloud-bigquery==2.27.1 +google-cloud-bigquery-storage==2.9.0 google-auth-oauthlib==0.4.6 -grpcio==1.39.0 +grpcio==1.41.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' diff --git a/tests/system/conftest.py b/tests/system/conftest.py index cc2c2a4dc..7eec76a32 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. import pathlib +import re import pytest import test_utils.prefixer @@ -61,6 +62,17 @@ def dataset_id(bigquery_client): bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) +@pytest.fixture() +def dataset_client(bigquery_client, dataset_id): + import google.cloud.bigquery.job + + return bigquery.Client( + default_query_job_config=google.cloud.bigquery.job.QueryJobConfig( + default_dataset=f"{bigquery_client.project}.{dataset_id}", + ) + ) + + @pytest.fixture def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" @@ -98,3 +110,8 @@ def scalars_extreme_table( job.result() yield full_table_id bigquery_client.delete_table(full_table_id) + + +@pytest.fixture +def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub): + return replace_non_anum("_", request.node.name) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index f36dc0944..092562b3c 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -106,3 +106,62 @@ def test_list_rows_nullable_scalars_dtypes( timestamp_type = schema.field("timestamp_col").type assert timestamp_type.unit == "us" assert timestamp_type.tz is not None + + +@pytest.mark.parametrize("do_insert", [True, False]) +def test_arrow_extension_types_same_for_storage_and_REST_APIs_894( + dataset_client, test_table_name, do_insert +): + types = dict( + astring=("STRING", "'x'"), + astring9=("STRING(9)", "'x'"), + abytes=("BYTES", "b'x'"), + abytes9=("BYTES(9)", "b'x'"), + anumeric=("NUMERIC", "42"), + anumeric9=("NUMERIC(9)", "42"), + anumeric92=("NUMERIC(9,2)", "42"), + abignumeric=("BIGNUMERIC", "42e30"), + abignumeric49=("BIGNUMERIC(37)", "42e30"), + abignumeric492=("BIGNUMERIC(37,2)", "42e30"), + abool=("BOOL", "true"), + adate=("DATE", "'2021-09-06'"), + adatetime=("DATETIME", "'2021-09-06T09:57:26'"), + ageography=("GEOGRAPHY", "ST_GEOGFROMTEXT('point(0 0)')"), + # Can't get arrow data for interval :( + # ainterval=('INTERVAL', "make_interval(1, 2, 3, 4, 5, 6)"), + aint64=("INT64", "42"), + afloat64=("FLOAT64", "42.0"), + astruct=("STRUCT", "struct(42)"), + atime=("TIME", "'1:2:3'"), + atimestamp=("TIMESTAMP", "'2021-09-06T09:57:26'"), + ) + columns = ", ".join(f"{k} {t[0]}" for k, t in types.items()) + dataset_client.query(f"create table {test_table_name} ({columns})").result() + if do_insert: + names = list(types) + values = ", ".join(types[name][1] for name in names) + names = ", ".join(names) + dataset_client.query( + f"insert into {test_table_name} ({names}) values ({values})" + ).result() + at = dataset_client.query(f"select * from {test_table_name}").result().to_arrow() + storage_api_metadata = { + at.field(i).name: at.field(i).metadata for i in range(at.num_columns) + } + at = ( + dataset_client.query(f"select * from {test_table_name}") + .result() + .to_arrow(create_bqstorage_client=False) + ) + rest_api_metadata = { + at.field(i).name: at.field(i).metadata for i in range(at.num_columns) + } + + assert rest_api_metadata == storage_api_metadata + assert rest_api_metadata["adatetime"] == { + b"ARROW:extension:name": b"google:sqlType:datetime" + } + assert rest_api_metadata["ageography"] == { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + } diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 120e10f45..4884112ac 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -665,14 +665,15 @@ def test_unset_table_schema_attributes(self): mode=old_field.mode, description=None, fields=old_field.fields, - policy_tags=None, + policy_tags=PolicyTagList(), ) table.schema = new_schema updated_table = Config.CLIENT.update_table(table, ["schema"]) self.assertFalse(updated_table.schema[1].description) # Empty string or None. - self.assertEqual(updated_table.schema[1].policy_tags.names, ()) + # policyTags key expected to be missing from response. + self.assertIsNone(updated_table.schema[1].policy_tags) def test_update_table_clustering_configuration(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) @@ -1578,9 +1579,15 @@ def test_transaction_info(self): query_job = Config.CLIENT.query(sql) query_job.result() - # Transaction ID set by the server should be accessible - assert query_job.transaction_info is not None - assert query_job.transaction_info.transaction_id != "" + child_jobs = Config.CLIENT.list_jobs(parent_job=query_job) + begin_transaction_job = next(iter(child_jobs)) + + # Transaction ID set by the server should be accessible on the child + # job responsible for `BEGIN TRANSACTION`. It is not expected to be + # present on the parent job itself. + # https://github.com/googleapis/python-bigquery/issues/975 + assert begin_transaction_job.transaction_info is not None + assert begin_transaction_job.transaction_info.transaction_id != "" def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 075d3b680..cbf4dff27 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -26,8 +26,8 @@ import pytest from google.cloud import bigquery -from google.cloud.bigquery import enums from google.cloud import bigquery_storage +from google.cloud.bigquery import enums from . import helpers diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index feba65aa5..c2ae78eaa 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -54,3 +54,8 @@ def disable_add_server_timeout_header(request): noop_add_server_timeout_header, ): yield + + +def pytest_configure(config): + # Explicitly register custom test markers to avoid warnings. + config.addinivalue_line("markers", "enable_add_server_timeout_header") diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index aa8e9c045..e320c72cb 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -1043,6 +1043,12 @@ def test_cancelled_w_error_result_w_stopped(self): self.assertTrue(job.cancelled()) + def test_repr(self): + client = _make_client(project="project-foo") + job = self._make_one("job-99", client) + job._properties.setdefault("jobReference", {})["location"] = "ABC" + assert repr(job) == "_AsyncJob" + class Test_JobConfig(unittest.TestCase): JOB_TYPE = "testing" diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index cbe087dac..5a0c5a83f 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -484,13 +484,11 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -503,13 +501,11 @@ def test_schema_setter_valid_mappings_list(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } schema = [full_name_repr, age_repr] config.schema = schema diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index c5f9b77c1..3cc1dd4c4 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -99,6 +99,7 @@ def test_from_api_repr(target_class): ), }, ], + "bestTrialId": "123", "featureColumns": [], "encryptionConfiguration": {"kmsKeyName": KMS_KEY_NAME}, } @@ -119,6 +120,7 @@ def test_from_api_repr(target_class): assert got.model_type == "LOGISTIC_REGRESSION" assert got.labels == {"greeting": "こんにちは"} assert got.encryption_configuration.kms_key_name == KMS_KEY_NAME + assert got.best_trial_id == 123 assert got.training_runs[0]["trainingOptions"]["initialLearnRate"] == 1.0 assert ( google.cloud._helpers._rfc3339_to_datetime(got.training_runs[0]["startTime"]) @@ -161,6 +163,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert len(got.training_runs) == 0 assert len(got.feature_columns) == 0 assert len(got.label_columns) == 0 + assert got.best_trial_id is None def test_from_api_repr_w_unknown_fields(target_class): diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 035f04456..2377be79c 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1143,9 +1143,13 @@ def test_w_empty_container_default_default(self): def test_w_missing_key_explicit_default(self): self.assertEqual(self._call_fut({"key2": 2}, ["key1"], default=1), 1) - def test_w_matching_single_key(self): + def test_w_matching_single_key_in_sequence(self): self.assertEqual(self._call_fut({"key1": 1}, ["key1"]), 1) + def test_w_matching_single_string_key(self): + data = {"k": {"e": {"y": "foo"}}, "key": "bar"} + self.assertEqual(self._call_fut(data, "key"), "bar") + def test_w_matching_first_key_missing_second_key(self): self.assertIsNone(self._call_fut({"key1": {"key3": 3}}, ["key1", "key2"])) @@ -1159,11 +1163,16 @@ def _call_fut(self, container, keys, value): return _set_sub_prop(container, keys, value) - def test_w_empty_container_single_key(self): + def test_w_empty_container_single_key_in_sequence(self): container = {} self._call_fut(container, ["key1"], "value") self.assertEqual(container, {"key1": "value"}) + def test_w_empty_container_single_string_key(self): + container = {} + self._call_fut(container, "key", "value") + self.assertEqual(container, {"key": "value"}) + def test_w_empty_container_nested_keys(self): container = {} self._call_fut(container, ["key1", "key2", "key3"], "value") diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 0140beb77..36becf182 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1610,3 +1610,25 @@ def test_bq_to_arrow_field_type_override(module_under_test): ).type == pyarrow.binary() ) + + +@pytest.mark.parametrize( + "field_type, metadata", + [ + ("datetime", {b"ARROW:extension:name": b"google:sqlType:datetime"}), + ( + "geography", + { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + }, + ), + ], +) +def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata): + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", field_type) + ).metadata + == metadata + ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 9b12128c6..2ddf98077 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -940,18 +940,8 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query}, @@ -985,18 +975,8 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query, "useLegacySql": False}, @@ -1919,6 +1899,7 @@ def test_update_routine(self): def test_update_table(self): from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.table import Table path = "projects/%s/datasets/%s/tables/%s" % ( @@ -1945,7 +1926,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -1956,7 +1936,15 @@ def test_update_table(self): } ) schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + # Explicly setting policyTags to no names should be included in the sent resource. + # https://github.com/googleapis/python-bigquery/issues/981 + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + description=None, + policy_tags=PolicyTagList(names=()), + ), SchemaField( "age", "INTEGER", mode="REQUIRED", description="New field description" ), @@ -1994,7 +1982,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -2113,21 +2100,14 @@ def test_update_table_w_query(self): "type": "STRING", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "this is a column", - "policyTags": {"names": []}, - }, - { - "name": "country", - "type": "STRING", - "mode": "NULLABLE", - "policyTags": {"names": []}, }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ @@ -6706,7 +6686,13 @@ def test_load_table_from_dataframe(self): assert field["type"] == table_field.field_type assert field["mode"] == table_field.mode assert len(field.get("fields", [])) == len(table_field.fields) - assert field["policyTags"]["names"] == [] + # Avoid accidentally updating policy tags when not explicitly included. + # https://github.com/googleapis/python-bigquery/issues/981 + # Also, avoid 403 if someone has permission to write to table but + # not update policy tags by omitting policy tags we might have + # received from a get table request. + # https://github.com/googleapis/python-bigquery/pull/557 + assert "policyTags" not in field # Omit unnecessary fields when they come from getting the table # (not passed in via job_config) assert "description" not in field @@ -7904,21 +7890,18 @@ def test_schema_to_json_with_file_path(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] @@ -7951,21 +7934,18 @@ def test_schema_to_json_with_file_object(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 1f49dba5d..3dc9dd179 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -78,14 +78,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - } - ] + "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index edc05494c..c845d08c1 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -44,15 +44,40 @@ def test_constructor_defaults(self): self.assertEqual(field.mode, "NULLABLE") self.assertIsNone(field.description) self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, PolicyTagList()) + self.assertIsNone(field.policy_tags) def test_constructor_explicit(self): - field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") self.assertEqual(field.mode, "REQUIRED") self.assertEqual(field.description, "Testing") self.assertEqual(field.fields, ()) + self.assertEqual( + field.policy_tags, + PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) + + def test_constructor_explicit_none(self): + field = self._make_one("test", "STRING", description=None, policy_tags=None) + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) def test_constructor_subfields(self): sub_field1 = self._make_one("area_code", "STRING") @@ -68,20 +93,6 @@ def test_constructor_subfields(self): self.assertEqual(field.fields[0], sub_field1) self.assertEqual(field.fields[1], sub_field2) - def test_constructor_with_policy_tags(self): - from google.cloud.bigquery.schema import PolicyTagList - - policy = PolicyTagList(names=("foo", "bar")) - field = self._make_one( - "test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy - ) - self.assertEqual(field.name, "test") - self.assertEqual(field.field_type, "STRING") - self.assertEqual(field.mode, "REQUIRED") - self.assertEqual(field.description, "Testing") - self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, policy) - def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -90,17 +101,28 @@ def test_to_api_repr(self): policy.to_api_repr(), {"names": ["foo", "bar"]}, ) - field = self._make_one("foo", "INTEGER", "NULLABLE", policy_tags=policy) + field = self._make_one( + "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + ) self.assertEqual( field.to_api_repr(), { "mode": "NULLABLE", "name": "foo", "type": "INTEGER", + "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, }, ) + def test_to_api_repr_omits_unset_properties(self): + # Prevent accidentally modifying fields that aren't explicitly set. + # https://github.com/googleapis/python-bigquery/issues/981 + field = self._make_one("foo", "INTEGER") + resource = field.to_api_repr() + self.assertNotIn("description", resource) + self.assertNotIn("policyTags", resource) + def test_to_api_repr_with_subfield(self): for record_type in ("RECORD", "STRUCT"): subfield = self._make_one("bar", "INTEGER", "NULLABLE") @@ -108,14 +130,7 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "policyTags": {"names": []}, - } - ], + "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], "mode": "REQUIRED", "name": "foo", "type": record_type, @@ -165,9 +180,15 @@ def test_from_api_repr_defaults(self): self.assertEqual(field.name, "foo") self.assertEqual(field.field_type, "RECORD") self.assertEqual(field.mode, "NULLABLE") - self.assertEqual(field.description, None) self.assertEqual(len(field.fields), 0) + # Keys not present in API representation shouldn't be included in + # _properties. + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) + self.assertNotIn("description", field._properties) + self.assertNotIn("policyTags", field._properties) + def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") @@ -583,22 +604,10 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ) def test_w_description(self): @@ -624,7 +633,6 @@ def test_w_description(self): "type": "STRING", "mode": "REQUIRED", "description": DESCRIPTION, - "policyTags": {"names": []}, }, ) self.assertEqual( @@ -634,7 +642,6 @@ def test_w_description(self): "type": "INTEGER", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, ) @@ -650,13 +657,7 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -665,18 +666,8 @@ def test_w_subfields(self): "type": "RECORD", "mode": "REPEATED", "fields": [ - { - "name": "type", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "number", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, ], }, ) @@ -888,83 +879,43 @@ def test_from_api_repr_parameterized(api, expect, key2): [ ( dict(name="n", field_type="NUMERIC"), - dict(name="n", type="NUMERIC", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="NUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9,), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2,), ), ( dict(name="n", field_type="BIGNUMERIC"), - dict( - name="n", type="BIGNUMERIC", mode="NULLABLE", policyTags={"names": []} - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40,), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2,), ), ( dict(name="n", field_type="STRING"), - dict(name="n", type="STRING", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="STRING", mode="NULLABLE"), ), ( dict(name="n", field_type="STRING", max_length=9), - dict( - name="n", - type="STRING", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="STRING", mode="NULLABLE", maxLength=9,), ), ( dict(name="n", field_type="BYTES"), - dict(name="n", type="BYTES", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="BYTES", mode="NULLABLE"), ), ( dict(name="n", field_type="BYTES", max_length=9), - dict( - name="n", - type="BYTES", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9,), ), ], ) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index ed9ed5d0f..a34b0d56b 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -93,6 +93,189 @@ def test_ctor_with_key(self): self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) +class TestTableBase: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import _TableBase + + return _TableBase + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + instance = self._make_one() + assert instance._properties == {} + + def test_project(self): + instance = self._make_one() + instance._properties = {"tableReference": {"projectId": "p_1"}} + assert instance.project == "p_1" + + def test_dataset_id(self): + instance = self._make_one() + instance._properties = {"tableReference": {"datasetId": "ds_1"}} + assert instance.dataset_id == "ds_1" + + def test_table_id(self): + instance = self._make_one() + instance._properties = {"tableReference": {"tableId": "tbl_1"}} + assert instance.table_id == "tbl_1" + + def test_path(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + assert instance.path == "/projects/p_1/datasets/ds_1/tables/tbl_1" + + def test___eq___wrong_type(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + class TableWannabe: + pass + + wannabe_other = TableWannabe() + wannabe_other._properties = instance._properties + wannabe_other.project = "p_1" + wannabe_other.dataset_id = "ds_1" + wannabe_other.table_id = "tbl_1" + + assert instance != wannabe_other # Can't fake it. + assert instance == mock.ANY # ...but delegation to other object works. + + def test___eq___project_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "projectId": "p_2", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + assert instance != other + + def test___eq___dataset_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_2", + "tableId": "tbl_1", + } + } + assert instance != other + + def test___eq___table_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_2", + } + } + assert instance != other + + def test___eq___equality(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + assert instance == other + + def test___hash__set_equality(self): + instance_1 = self._make_one() + instance_1._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + instance_2 = self._make_one() + instance_2._properties = { + "tableReference": { + "projectId": "p_2", + "datasetId": "ds_2", + "tableId": "tbl_2", + } + } + + set_one = {instance_1, instance_2} + set_two = {instance_1, instance_2} + assert set_one == set_two + + def test___hash__sets_not_equal(self): + instance_1 = self._make_one() + instance_1._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + instance_2 = self._make_one() + instance_2._properties = { + "tableReference": { + "projectId": "p_2", + "datasetId": "ds_2", + "tableId": "tbl_2", + } + } + + set_one = {instance_1} + set_two = {instance_2} + assert set_one != set_two + + class TestTableReference(unittest.TestCase): @staticmethod def _get_target_class(): @@ -187,55 +370,6 @@ def test_from_string_ignores_default_project(self): self.assertEqual(got.dataset_id, "string_dataset") self.assertEqual(got.table_id, "string_table") - def test___eq___wrong_type(self): - dataset_ref = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset_ref, "table_1") - other = object() - self.assertNotEqual(table, other) - self.assertEqual(table, mock.ANY) - - def test___eq___project_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - other_dataset = DatasetReference("project_2", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(other_dataset, "table_1") - self.assertNotEqual(table, other) - - def test___eq___dataset_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - other_dataset = DatasetReference("project_1", "dataset_2") - table = self._make_one(dataset, "table_1") - other = self._make_one(other_dataset, "table_1") - self.assertNotEqual(table, other) - - def test___eq___table_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(dataset, "table_2") - self.assertNotEqual(table, other) - - def test___eq___equality(self): - dataset = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(dataset, "table_1") - self.assertEqual(table, other) - - def test___hash__set_equality(self): - dataset = DatasetReference("project_1", "dataset_1") - table1 = self._make_one(dataset, "table1") - table2 = self._make_one(dataset, "table2") - set_one = {table1, table2} - set_two = {table1, table2} - self.assertEqual(set_one, set_two) - - def test___hash__not_equals(self): - dataset = DatasetReference("project_1", "dataset_1") - table1 = self._make_one(dataset, "table1") - table2 = self._make_one(dataset, "table2") - set_one = {table1} - set_two = {table2} - self.assertNotEqual(set_one, set_two) - def test___repr__(self): dataset = DatasetReference("project1", "dataset1") table1 = self._make_one(dataset, "table1") @@ -549,44 +683,6 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") - def test__eq__wrong_type(self): - table = self._make_one("project_foo.dataset_bar.table_baz") - - class TableWannabe: - pass - - not_a_table = TableWannabe() - not_a_table._properties = table._properties - - assert table != not_a_table # Can't fake it. - - def test__eq__same_table_basic(self): - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_2 = self._make_one("project_foo.dataset_bar.table_baz") - assert table_1 == table_2 - - def test__eq__same_table_multiple_properties(self): - from google.cloud.bigquery import SchemaField - - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_1.require_partition_filter = True - table_1.labels = {"first": "one", "second": "two"} - - table_1.schema = [ - SchemaField("name", "STRING", "REQUIRED"), - SchemaField("age", "INTEGER", "NULLABLE"), - ] - - table_2 = self._make_one("project_foo.dataset_bar.table_baz") - table_2.require_partition_filter = True - table_2.labels = {"first": "one", "second": "two"} - table_2.schema = [ - SchemaField("name", "STRING", "REQUIRED"), - SchemaField("age", "INTEGER", "NULLABLE"), - ] - - assert table_1 == table_2 - def test__eq__same_table_property_different(self): table_1 = self._make_one("project_foo.dataset_bar.table_baz") table_1.description = "This is table baz" @@ -596,12 +692,6 @@ def test__eq__same_table_property_different(self): assert table_1 == table_2 # Still equal, only table reference is important. - def test__eq__different_table(self): - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_2 = self._make_one("project_foo.dataset_bar.table_baz_2") - - assert table_1 != table_2 - def test_hashable(self): table_1 = self._make_one("project_foo.dataset_bar.table_baz") table_1.description = "This is a table" @@ -1575,38 +1665,6 @@ def test_to_api_repr(self): table = self._make_one(resource) self.assertEqual(table.to_api_repr(), resource) - def test__eq__wrong_type(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table = self._make_one(resource) - - class FakeTableListItem: - project = "project_foo" - dataset_id = "dataset_bar" - table_id = "table_baz" - - not_a_table = FakeTableListItem() - - assert table != not_a_table # Can't fake it. - - def test__eq__same_table(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_1 = self._make_one(resource) - table_2 = self._make_one(resource) - - assert table_1 == table_2 - def test__eq__same_table_property_different(self): table_ref_resource = { "projectId": "project_foo", @@ -1622,40 +1680,6 @@ def test__eq__same_table_property_different(self): assert table_1 == table_2 # Still equal, only table reference is important. - def test__eq__different_table(self): - resource_1 = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_1 = self._make_one(resource_1) - - resource_2 = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_quux", - } - } - table_2 = self._make_one(resource_2) - - assert table_1 != table_2 - - def test_hashable(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_item = self._make_one(resource) - table_item_2 = self._make_one(resource) - - assert hash(table_item) == hash(table_item_2) - class TestTableClassesInterchangeability: @staticmethod @@ -1850,8 +1874,7 @@ def test_to_geodataframe(self): df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) self.assertEqual(len(df), 0) # verify the number of rows - self.assertEqual(df.crs.srs, "EPSG:4326") - self.assertEqual(df.crs.name, "WGS 84") + self.assertIsNone(df.crs) class TestRowIterator(unittest.TestCase): @@ -3890,8 +3913,14 @@ def test_to_geodataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.geog.dtype.name, "geometry") self.assertIsInstance(df.geog, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) - self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") self.assertEqual(df.crs.name, "WGS 84") self.assertEqual(df.geog.crs.srs, "EPSG:4326") @@ -3962,8 +3991,14 @@ def test_to_geodataframe_w_geography_column(self): self.assertEqual(df.geog.dtype.name, "geometry") self.assertEqual(df.geog2.dtype.name, "object") self.assertIsInstance(df.geog, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) - self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] ) @@ -3973,10 +4008,14 @@ def test_to_geodataframe_w_geography_column(self): self.assertEqual( [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] ) + # and can easily be converted to a GeoSeries - self.assertEqual( - list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), + ["0.0", "0.0", "0.0"], + ) @unittest.skipIf(geopandas is None, "Requires `geopandas`") @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") @@ -4028,8 +4067,14 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.g.dtype.name, "geometry") self.assertIsInstance(df.g, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0"]) - self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"])