From 3b70891135f5fe32dcd12210ff4faa51ac53742d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Jul 2021 13:04:10 -0500 Subject: [PATCH 01/89] chore: protect v3.x.x branch (#816) * chore: protect v3.x.x branch In preparation for breaking changes. * force pattern to be a string * simplify branch name --- .github/sync-repo-settings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index b18fb9c29..2697f214c 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -3,7 +3,7 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: master +- pattern: '{master,v3}' requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From 3c1be149e76b1d1d8879fdcf0924ddb1c1839e94 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 27 Jul 2021 20:08:37 +0200 Subject: [PATCH 02/89] fix: no longer raise a warning in `to_dataframe` if `max_results` set (#815) That warning should only be used when BQ Storage client is explicitly passed in to RowIterator methods when max_results value is also set. --- google/cloud/bigquery/table.py | 30 +++++-- tests/unit/test_table.py | 160 +++++++++++++++++++++++++++++++-- 2 files changed, 179 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 18d969a3f..daade1ac6 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1552,11 +1552,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False if self.max_results is not None: - warnings.warn( - "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the REST endpoint.", - stacklevel=2, - ) return False try: @@ -1604,6 +1599,25 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows + def _maybe_warn_max_results( + self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], + ): + """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set. + + This helper method should be used directly in the relevant top-level public + methods, so that the warning is issued for the correct line in user code. + + Args: + bqstorage_client: + The BigQuery Storage client intended to use for downloading result rows. + """ + if bqstorage_client is not None and self.max_results is not None: + warnings.warn( + "Cannot use bqstorage_client if max_results is set, " + "reverting to fetching data with the REST endpoint.", + stacklevel=3, + ) + def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): @@ -1700,6 +1714,8 @@ def to_arrow( if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1790,6 +1806,8 @@ def to_dataframe_iterable( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + column_names = [field.name for field in self._schema] bqstorage_download = functools.partial( _pandas_helpers.download_dataframe_bqstorage, @@ -1896,6 +1914,8 @@ def to_dataframe( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 37650cd27..4b1fd833b 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -15,6 +15,7 @@ import datetime import logging import time +import types import unittest import warnings @@ -1862,6 +1863,15 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + def test__validate_bqstorage_returns_false_if_max_results_set(self): + iterator = self._make_one( + max_results=10, first_page_response=None # not cached + ) + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached @@ -2105,7 +2115,7 @@ def test_to_arrow_w_empty_table(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test_to_arrow_max_results_w_create_bqstorage_warning(self): + def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2119,6 +2129,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2129,7 +2140,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_arrow(create_bqstorage_client=True) + row_iterator.to_arrow(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2139,6 +2150,49 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_arrow(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -2372,7 +2426,6 @@ def test_to_arrow_w_pyarrow_none(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2415,7 +2468,6 @@ def test_to_dataframe_iterable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_with_dtypes(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2527,6 +2579,61 @@ def test_to_dataframe_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + + iterator_schema = [ + schema.SchemaField("name", "STRING", mode="REQUIRED"), + schema.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + path = "/foo" + api_request = mock.Mock( + side_effect=[ + { + "rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}], + "pageToken": "NEXTPAGE", + }, + {"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]}, + ] + ) + row_iterator = mut.RowIterator( + _mock_client(), + api_request, + path, + iterator_schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=iterator_schema, + max_results=25, + ) + + with warnings.catch_warnings(record=True) as warned: + dfs = row_iterator.to_dataframe_iterable(bqstorage_client=bqstorage_client) + + # Was a warning emitted? + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + assert len(matches) == 1, "User warning was not emitted." + assert __file__ in str(matches[0]), "Warning emitted with incorrect stacklevel" + + # Basic check of what we got as a result. + dataframes = list(dfs) + assert len(dataframes) == 2 + assert isinstance(dataframes[0], pandas.DataFrame) + assert isinstance(dataframes[1], pandas.DataFrame) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): from google.cloud.bigquery.schema import SchemaField @@ -2926,7 +3033,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): self.assertEqual(len(matches), 1, msg="User warning was not emitted.") @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_max_results_w_create_bqstorage_warning(self): + def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2940,6 +3047,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2950,7 +3058,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_dataframe(create_bqstorage_client=True) + row_iterator.to_dataframe(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2960,6 +3068,46 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_dataframe(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") From fe7a902e8b3e723ace335c9b499aea6d180a025b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 27 Jul 2021 18:14:09 +0000 Subject: [PATCH 03/89] feat: Update proto definitions for bigquery/v2 to support new proto fields for BQML. (#817) PiperOrigin-RevId: 387137741 Source-Link: https://github.com/googleapis/googleapis/commit/8962c92e97495d0795b427d4aa4326b0d06e33eb Source-Link: https://github.com/googleapis/googleapis-gen/commit/102f1b4277cc5a049663535d9eeb77831b67de25 --- google/cloud/bigquery_v2/types/model.py | 104 ++++++++++++++++-- .../bigquery_v2/types/table_reference.py | 12 ++ 2 files changed, 107 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index 17e101d25..706418401 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -96,6 +96,8 @@ class Model(proto.Message): Output only. Label columns that were used to train this model. The output of the model will have a `predicted_` prefix to these columns. + best_trial_id (int): + The best trial_id across all training runs. """ class ModelType(proto.Enum): @@ -113,6 +115,7 @@ class ModelType(proto.Enum): ARIMA = 11 AUTOML_REGRESSOR = 12 AUTOML_CLASSIFIER = 13 + ARIMA_PLUS = 19 class LossType(proto.Enum): r"""Loss metric to evaluate model training performance.""" @@ -151,6 +154,7 @@ class DataFrequency(proto.Enum): WEEKLY = 5 DAILY = 6 HOURLY = 7 + PER_MINUTE = 8 class HolidayRegion(proto.Enum): r"""Type of supported holiday regions for time series forecasting @@ -285,7 +289,7 @@ class RegressionMetrics(proto.Message): median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Median absolute error. r_squared (google.protobuf.wrappers_pb2.DoubleValue): - R^2 score. + R^2 score. This corresponds to r2_score in ML.EVALUATE. """ mean_absolute_error = proto.Field( @@ -528,7 +532,7 @@ class ClusteringMetrics(proto.Message): Mean of squared distances between each sample to its cluster centroid. clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): - [Beta] Information for all clusters. + Information for all clusters. """ class Cluster(proto.Message): @@ -697,10 +701,29 @@ class ArimaSingleModelForecastingMetrics(proto.Message): Is arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -711,9 +734,19 @@ class ArimaSingleModelForecastingMetrics(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=3,) time_series_id = proto.Field(proto.STRING, number=4,) + time_series_ids = proto.RepeatedField(proto.STRING, number=9,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) non_seasonal_order = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.ArimaOrder", @@ -901,7 +934,7 @@ class TrainingRun(proto.Message): """ class TrainingOptions(proto.Message): - r""" + r"""Options used in model training. Attributes: max_iterations (int): The maximum number of iterations in training. @@ -972,8 +1005,9 @@ class TrainingOptions(proto.Message): num_clusters (int): Number of clusters for clustering models. model_uri (str): - [Beta] Google Cloud Storage URI from which the model was - imported. Only applicable for imported models. + Google Cloud Storage URI from which the model + was imported. Only applicable for imported + models. optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): Optimization strategy for training linear regression models. @@ -1030,8 +1064,11 @@ class TrainingOptions(proto.Message): If a valid value is specified, then holiday effects modeling is enabled. time_series_id_column (str): - The id column that will be used to indicate - different time series to forecast in parallel. + The time series id column that was used + during ARIMA model training. + time_series_id_columns (Sequence[str]): + The time series id columns that were used + during ARIMA model training. horizon (int): The number of periods ahead that need to be forecasted. @@ -1042,6 +1079,15 @@ class TrainingOptions(proto.Message): output feature name is A.b. auto_arima_max_order (int): The max value of non-seasonal p and q. + decompose_time_series (google.protobuf.wrappers_pb2.BoolValue): + If true, perform decompose time series and + save the results. + clean_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, clean spikes and dips in the input + time series. + adjust_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, detect step changes and make data + adjustment in the input time series. """ max_iterations = proto.Field(proto.INT64, number=1,) @@ -1120,9 +1166,19 @@ class TrainingOptions(proto.Message): proto.ENUM, number=42, enum="Model.HolidayRegion", ) time_series_id_column = proto.Field(proto.STRING, number=43,) + time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,) horizon = proto.Field(proto.INT64, number=44,) preserve_input_structs = proto.Field(proto.BOOL, number=45,) auto_arima_max_order = proto.Field(proto.INT64, number=46,) + decompose_time_series = proto.Field( + proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue, + ) + clean_spikes_and_dips = proto.Field( + proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue, + ) + adjust_step_changes = proto.Field( + proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue, + ) class IterationResult(proto.Message): r"""Information about a single iteration of the training run. @@ -1218,10 +1274,29 @@ class ArimaModelInfo(proto.Message): Whether Arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -1237,11 +1312,21 @@ class ArimaModelInfo(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=4,) time_series_id = proto.Field(proto.STRING, number=5,) + time_series_ids = proto.RepeatedField(proto.STRING, number=10,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue, + ) arima_model_info = proto.RepeatedField( proto.MESSAGE, @@ -1319,6 +1404,7 @@ class ArimaModelInfo(proto.Message): label_columns = proto.RepeatedField( proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, ) + best_trial_id = proto.Field(proto.INT64, number=19,) class GetModelRequest(proto.Message): diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py index a0a8ee4c9..d56e5b09f 100644 --- a/google/cloud/bigquery_v2/types/table_reference.py +++ b/google/cloud/bigquery_v2/types/table_reference.py @@ -36,11 +36,23 @@ class TableReference(proto.Message): maximum length is 1,024 characters. Certain operations allow suffixing of the table ID with a partition decorator, such as ``sample_table$20190123``. + project_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + dataset_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + table_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. """ project_id = proto.Field(proto.STRING, number=1,) dataset_id = proto.Field(proto.STRING, number=2,) table_id = proto.Field(proto.STRING, number=3,) + project_id_alternative = proto.RepeatedField(proto.STRING, number=4,) + dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,) + table_id_alternative = proto.RepeatedField(proto.STRING, number=6,) __all__ = tuple(sorted(__protobuf__.manifest)) From 02bbdaebb40be771124d397cb45545f1bf697548 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 27 Jul 2021 13:50:04 -0500 Subject: [PATCH 04/89] chore: release 2.23.0 (#819) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 18 ++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dbc5d4da..966a8744a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) + + +### Features + +* Update proto definitions for bigquery/v2 to support new proto fields for BQML. ([#817](https://www.github.com/googleapis/python-bigquery/issues/817)) ([fe7a902](https://www.github.com/googleapis/python-bigquery/commit/fe7a902e8b3e723ace335c9b499aea6d180a025b)) + + +### Bug Fixes + +* no longer raise a warning in `to_dataframe` if `max_results` set ([#815](https://www.github.com/googleapis/python-bigquery/issues/815)) ([3c1be14](https://www.github.com/googleapis/python-bigquery/commit/3c1be149e76b1d1d8879fdcf0924ddb1c1839e94)) +* retry ChunkedEncodingError by default ([#802](https://www.github.com/googleapis/python-bigquery/issues/802)) ([419d36d](https://www.github.com/googleapis/python-bigquery/commit/419d36d6b1887041e5795dbc8fc808890e91ab11)) + + +### Documentation + +* correct docs for `LoadJobConfig.destination_table_description` ([#810](https://www.github.com/googleapis/python-bigquery/issues/810)) ([da87fd9](https://www.github.com/googleapis/python-bigquery/commit/da87fd921cc8067b187d7985c978aac8eb58d107)) + ### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index dbc524478..416bf20ed 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.22.1" +__version__ = "2.23.0" From 42b66d34b979c87cc98b8984a8abe74edda753ac Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Jul 2021 16:30:44 +0200 Subject: [PATCH 05/89] chore(deps): update dependency google-cloud-bigquery to v2.23.0 (#820) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3a83eda64..0f9c3a2e3 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.22.1 +google-cloud-bigquery==2.23.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ffa689a9e..81ef4df2f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.22.1 +google-cloud-bigquery==2.23.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From d9378af13add879118a1d004529b811f72c325d6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 28 Jul 2021 17:18:18 +0200 Subject: [PATCH 06/89] fix: `insert_rows()` accepts float column values as strings again (#824) --- google/cloud/bigquery/_helpers.py | 12 +++++++----- tests/unit/test__helpers.py | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index bf0f80e22..0a1f71444 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import decimal import math import re +from typing import Union from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -338,14 +339,15 @@ def _int_to_json(value): return value -def _float_to_json(value): +def _float_to_json(value) -> Union[None, str, float]: """Coerce 'value' to an JSON-compatible representation.""" if value is None: return None - elif math.isnan(value) or math.isinf(value): - return str(value) - else: - return float(value) + + if isinstance(value, str): + value = float(value) + + return str(value) if (math.isnan(value) or math.isinf(value)) else float(value) def _decimal_to_json(value): diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index af026ccbe..f8d00e67d 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -690,21 +690,45 @@ def _call_fut(self, value): def test_w_none(self): self.assertEqual(self._call_fut(None), None) + def test_w_non_numeric(self): + with self.assertRaises(TypeError): + self._call_fut(object()) + + def test_w_integer(self): + result = self._call_fut(123) + self.assertIsInstance(result, float) + self.assertEqual(result, 123.0) + def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) + def test_w_float_as_string(self): + self.assertEqual(self._call_fut("1.23"), 1.23) + def test_w_nan(self): result = self._call_fut(float("nan")) self.assertEqual(result.lower(), "nan") + def test_w_nan_as_string(self): + result = self._call_fut("NaN") + self.assertEqual(result.lower(), "nan") + def test_w_infinity(self): result = self._call_fut(float("inf")) self.assertEqual(result.lower(), "inf") + def test_w_infinity_as_string(self): + result = self._call_fut("inf") + self.assertEqual(result.lower(), "inf") + def test_w_negative_infinity(self): result = self._call_fut(float("-inf")) self.assertEqual(result.lower(), "-inf") + def test_w_negative_infinity_as_string(self): + result = self._call_fut("-inf") + self.assertEqual(result.lower(), "-inf") + class Test_decimal_to_json(unittest.TestCase): def _call_fut(self, value): From a505440e810d377dbb97e33412580089d67db9ba Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 28 Jul 2021 10:45:37 -0500 Subject: [PATCH 07/89] chore: release 2.23.1 (#825) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 966a8744a..be4eab769 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) + + +### Bug Fixes + +* `insert_rows()` accepts float column values as strings again ([#824](https://www.github.com/googleapis/python-bigquery/issues/824)) ([d9378af](https://www.github.com/googleapis/python-bigquery/commit/d9378af13add879118a1d004529b811f72c325d6)) + ## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 416bf20ed..0195d572c 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.0" +__version__ = "2.23.1" From c541c69355cd4c3f37576b4f22955a1f8ebc82f0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Jul 2021 13:03:13 -0500 Subject: [PATCH 08/89] chore: add second protection rule for v3 branch (#828) --- .github/sync-repo-settings.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 2697f214c..cc69b2551 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -3,7 +3,16 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: '{master,v3}' +- pattern: master + requiredStatusCheckContexts: + - 'Kokoro' + - 'Kokoro snippets-3.8' + - 'cla/google' + - 'Samples - Lint' + - 'Samples - Python 3.6' + - 'Samples - Python 3.7' + - 'Samples - Python 3.8' +- pattern: v3 requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From 48e8a3535a13abe97ccc76e1fa42ca3a179ba496 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Jul 2021 21:43:43 +0200 Subject: [PATCH 09/89] chore(deps): update dependency google-cloud-bigquery to v2.23.1 (#827) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 0f9c3a2e3..6f6e670ab 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.0 +google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 81ef4df2f..dd36b5fe4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.0 +google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From d8c25ac139d53d0e689ee77ba46560dc63b4d9fa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 29 Jul 2021 03:59:03 -0500 Subject: [PATCH 10/89] test: retry getting rows after streaming them in `test_insert_rows_from_dataframe` (#832) --- tests/system/test_pandas.py | 40 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index ddf5eaf43..821b375e1 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -21,6 +21,7 @@ import io import operator +import google.api_core.retry import pkg_resources import pytest import pytz @@ -41,6 +42,10 @@ PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") +class MissingDataError(Exception): + pass + + def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id): """Test that a DataFrame with dtypes that map well to BigQuery types can be uploaded without specifying a schema. @@ -666,19 +671,6 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): ) for errors in chunk_errors: assert not errors - - # Use query to fetch rows instead of listing directly from the table so - # that we get values from the streaming buffer. - rows = list( - bigquery_client.query( - "SELECT * FROM `{}.{}.{}`".format( - table.project, table.dataset_id, table.table_id - ) - ) - ) - - sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) - row_tuples = [r.values() for r in sorted_rows] expected = [ # Pandas often represents NULL values as NaN. Convert to None for # easier comparison. @@ -686,7 +678,27 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): for data_row in dataframe.itertuples(index=False) ] - assert len(row_tuples) == len(expected) + # Use query to fetch rows instead of listing directly from the table so + # that we get values from the streaming buffer "within a few seconds". + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + @google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type(MissingDataError) + ) + def get_rows(): + rows = list( + bigquery_client.query( + "SELECT * FROM `{}.{}.{}`".format( + table.project, table.dataset_id, table.table_id + ) + ) + ) + if len(rows) != len(expected): + raise MissingDataError() + return rows + + rows = get_rows() + sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) + row_tuples = [r.values() for r in sorted_rows] for row, expected_row in zip(row_tuples, expected): assert ( From 8149d9e3116e6f5340b9a15eb2c46deaaa24920b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 13:00:02 +0200 Subject: [PATCH 11/89] chore(deps): update dependency pyarrow to v5 (#834) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index dd36b5fe4..73badd1f3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==4.0.1 +pyarrow==5.0.0 pytz==2021.1 From b9349adb2b54e26a45dbb69c10a948f5fc015a3c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 13:00:38 +0200 Subject: [PATCH 12/89] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.2 (#795) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6f6e670ab..eca0275a5 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.23.1 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 73badd1f3..8f4ea0406 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.23.1 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.4 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From 80e3a61c60419fb19b70b664c6415cd01ba82f5b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 29 Jul 2021 16:42:35 +0200 Subject: [PATCH 13/89] deps: expand pyarrow pins to support 5.x releases (#833) --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 0ca19b576..e9deaf117 100644 --- a/setup.py +++ b/setup.py @@ -54,10 +54,10 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 5.0dev", + "pyarrow >= 1.0.0, < 6.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"], + "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", From 40ef77f376db0db9be23de1a3657be9571f5b48f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 29 Jul 2021 10:04:05 -0500 Subject: [PATCH 14/89] chore: release 2.23.2 (#835) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index be4eab769..0c08e7910 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) + + +### Dependencies + +* expand pyarrow pins to support 5.x releases ([#833](https://www.github.com/googleapis/python-bigquery/issues/833)) ([80e3a61](https://www.github.com/googleapis/python-bigquery/commit/80e3a61c60419fb19b70b664c6415cd01ba82f5b)) + ### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0195d572c..0460e7bb9 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.1" +__version__ = "2.23.2" From 55687b89cc5ab04d1ff5ffeb31e6a4bf3b9eff79 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 19:57:59 +0200 Subject: [PATCH 15/89] chore(deps): update dependency google-auth-oauthlib to v0.4.5 (#839) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8f4ea0406..d7a99a8bd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.2 -google-auth-oauthlib==0.4.4 +google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From 85ce81cfd2e7199fa9016065c7329acb6079528c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 21:36:10 +0200 Subject: [PATCH 16/89] chore(deps): update dependency google-cloud-bigquery to v2.23.2 (#838) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index eca0275a5..5aa967b24 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.1 +google-cloud-bigquery==2.23.2 google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d7a99a8bd..4f2eaf90b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.1 +google-cloud-bigquery==2.23.2 google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 20df24b70e8934196200d0335c7f5afbdd08ea37 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 3 Aug 2021 03:14:34 +0200 Subject: [PATCH 17/89] chore(deps): update dependency google-cloud-testutils to v1 (#845) --- samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 9e9d4e40f..b8dee50d0 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==0.3.0 +google-cloud-testutils==1.0.0 pytest==6.2.4 mock==4.0.3 From 7016f69b6064be101a359bc093ea74fc2a305ac7 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Mon, 2 Aug 2021 19:20:21 -0600 Subject: [PATCH 18/89] chore: require CODEOWNER review and up to date branches (#846) These two lines bring the rules on this repo in line with the defaults: https://github.com/googleapis/repo-automation-bots/blob/63c858e539e1f4d9bb8ea66e12f9c0a0de5fef55/packages/sync-repo-settings/src/required-checks.json#L40-L50 --- .github/sync-repo-settings.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index cc69b2551..8634a3043 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -4,6 +4,8 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` - pattern: master + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' @@ -13,6 +15,8 @@ branchProtectionRules: - 'Samples - Python 3.7' - 'Samples - Python 3.8' - pattern: v3 + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From cf0b0d862e01e9309407b2ac1a48f0bfe23d520d Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Thu, 5 Aug 2021 08:59:15 -0600 Subject: [PATCH 19/89] chore: add api-bigquery as a samples owner (#852) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ae570eb01..76112476b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners From 30770fd0575fbd5aaa70c14196a4cc54627aecd2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 6 Aug 2021 12:14:24 -0500 Subject: [PATCH 20/89] fix: increase default retry deadline to 10 minutes (#859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The backend API has a timeout of 4 minutes, so the default of 2 minutes was not allowing for any retries to happen in some cases. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #853 🦕 --- google/cloud/bigquery/retry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 2df4de08b..bab28aacb 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -47,7 +47,7 @@ def _should_retry(exc): return reason in _RETRYABLE_REASONS -DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=600.0) """The default retry object. Any method with a ``retry`` parameter will be retried automatically, From e2cbcaa75a5da2bcd520d9116ead90b02d7326fd Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 6 Aug 2021 22:34:42 +0200 Subject: [PATCH 21/89] process: add yoshi-python to samples CODEOWNERS (#858) Closes #857. --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 76112476b..6763f258c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python From 9694a4dd1544e06209d091d9a36d086ea794b3b0 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 9 Aug 2021 12:24:20 -0500 Subject: [PATCH 22/89] chore: release 2.23.3 (#860) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c08e7910..856f1ecd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) + + +### Bug Fixes + +* increase default retry deadline to 10 minutes ([#859](https://www.github.com/googleapis/python-bigquery/issues/859)) ([30770fd](https://www.github.com/googleapis/python-bigquery/commit/30770fd0575fbd5aaa70c14196a4cc54627aecd2)) + ### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0460e7bb9..df992a051 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.2" +__version__ = "2.23.3" From 9c6614f939604d3ac99b2945c802df277b629d1b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 9 Aug 2021 20:10:11 +0200 Subject: [PATCH 23/89] chore(deps): update dependency google-cloud-bigquery to v2.23.3 (#866) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.23.2` -> `==2.23.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/compatibility-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/confidence-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery ### [`v2.23.3`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2233-httpswwwgithubcomgoogleapispython-bigquerycomparev2232v2233-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[ ] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5aa967b24..d55d0f254 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.2 +google-cloud-bigquery==2.23.3 google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4f2eaf90b..69f537de4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.2 +google-cloud-bigquery==2.23.3 google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 7f7b1a808d50558772a0deb534ca654da65d629e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 10 Aug 2021 19:21:41 +0200 Subject: [PATCH 24/89] feat: add support for transaction statistics (#849) * feat: add support for transaction statistics * Hoist transaction_info into base job class * Add versionadded directive to new property and class * Include new class in docs reference --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 ++ google/cloud/bigquery/job/__init__.py | 2 ++ google/cloud/bigquery/job/base.py | 29 +++++++++++++++++++++++ tests/system/test_client.py | 34 +++++++++++++++++++++++++++ tests/unit/job/helpers.py | 1 + tests/unit/job/test_base.py | 14 +++++++++++ tests/unit/job/test_query.py | 29 +++++++++++++++++++++++ 8 files changed, 112 insertions(+) diff --git a/docs/reference.rst b/docs/reference.rst index 8a5bff9a4..5ac596370 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -68,6 +68,7 @@ Job-Related Types job.SourceFormat job.WriteDisposition job.SchemaUpdateOption + job.TransactionInfo Dataset diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 222aadcc9..a7a0da3dd 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -70,6 +70,7 @@ from google.cloud.bigquery.job import ScriptOptions from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob +from google.cloud.bigquery.job import TransactionInfo from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference @@ -149,6 +150,7 @@ "GoogleSheetsOptions", "ParquetOptions", "ScriptOptions", + "TransactionInfo", "DEFAULT_RETRY", # Enum Constants "enums", diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 4c16d0e20..f51311b0b 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -22,6 +22,7 @@ from google.cloud.bigquery.job.base import ReservationUsage from google.cloud.bigquery.job.base import ScriptStatistics from google.cloud.bigquery.job.base import ScriptStackFrame +from google.cloud.bigquery.job.base import TransactionInfo from google.cloud.bigquery.job.base import UnknownJob from google.cloud.bigquery.job.copy_ import CopyJob from google.cloud.bigquery.job.copy_ import CopyJobConfig @@ -81,5 +82,6 @@ "QueryPriority", "SchemaUpdateOption", "SourceFormat", + "TransactionInfo", "WriteDisposition", ] diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 20ad81c0b..e5fc592a6 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -19,6 +19,7 @@ import http import threading import typing +from typing import Dict, Optional from google.api_core import exceptions import google.api_core.future.polling @@ -88,6 +89,22 @@ def _error_result_to_exception(error_result): ) +class TransactionInfo(typing.NamedTuple): + """[Alpha] Information of a multi-statement transaction. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#TransactionInfo + + .. versionadded:: 2.24.0 + """ + + transaction_id: str + """Output only. ID of the transaction.""" + + @classmethod + def from_api_repr(cls, transaction_info: Dict[str, str]) -> "TransactionInfo": + return cls(transaction_info["transactionId"]) + + class _JobReference(object): """A reference to a job. @@ -336,6 +353,18 @@ def reservation_usage(self): for usage in usage_stats_raw ] + @property + def transaction_info(self) -> Optional[TransactionInfo]: + """Information of the multi-statement transaction if this job is part of one. + + .. versionadded:: 2.24.0 + """ + info = self._properties.get("statistics", {}).get("transactionInfo") + if info is None: + return None + else: + return TransactionInfo.from_api_repr(info) + @property def error_result(self): """Error information about the job as a whole. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index baa2b6ad8..f540611a6 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1557,6 +1557,40 @@ def test_dml_statistics(self): assert query_job.dml_stats.updated_row_count == 0 assert query_job.dml_stats.deleted_row_count == 3 + def test_transaction_info(self): + table_schema = ( + bigquery.SchemaField("foo", "STRING"), + bigquery.SchemaField("bar", "INTEGER"), + ) + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = f"{Config.CLIENT.project}.{dataset_id}.test_dml_statistics" + + # Create the table before loading so that the column order is deterministic. + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # Insert a few rows and check the stats. + sql = f""" + BEGIN TRANSACTION; + INSERT INTO `{table_id}` + VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4); + + UPDATE `{table_id}` + SET bar = bar + 1 + WHERE bar > 2; + COMMIT TRANSACTION; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + # Transaction ID set by the server should be accessible + assert query_job.transaction_info is not None + assert query_job.transaction_info.transaction_id != "" + def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: Config.CURSOR.execute(sql) diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index ea071c5ac..c792214e7 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -162,6 +162,7 @@ def _verifyInitialReadonlyProperties(self, job): self.assertIsNone(job.created) self.assertIsNone(job.started) self.assertIsNone(job.ended) + self.assertIsNone(job.transaction_info) # derived from resource['status'] self.assertIsNone(job.error_result) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 405ad6ee5..0ac1d05b5 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -227,6 +227,20 @@ def test_script_statistics(self): self.assertEqual(stack_frame.end_column, 14) self.assertEqual(stack_frame.text, "QUERY TEXT") + def test_transaction_info(self): + from google.cloud.bigquery.job.base import TransactionInfo + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + assert job.transaction_info is None + + statistics = job._properties["statistics"] = {} + assert job.transaction_info is None + + statistics["transactionInfo"] = {"transactionId": "123-abc-xyz"} + assert isinstance(job.transaction_info, TransactionInfo) + assert job.transaction_info.transaction_id == "123-abc-xyz" + def test_num_child_jobs(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 482f7f3af..d41370520 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -128,6 +128,18 @@ def _verify_dml_stats_resource_properties(self, job, resource): else: assert job.dml_stats is None + def _verify_transaction_info_resource_properties(self, job, resource): + resource_stats = resource.get("statistics", {}) + + if "transactionInfo" in resource_stats: + resource_transaction_info = resource_stats["transactionInfo"] + job_transaction_info = job.transaction_info + assert job_transaction_info.transaction_id == resource_transaction_info.get( + "transactionId" + ) + else: + assert job.transaction_info is None + def _verify_configuration_properties(self, job, configuration): if "dryRun" in configuration: self.assertEqual(job.dry_run, configuration["dryRun"]) @@ -137,6 +149,7 @@ def _verify_configuration_properties(self, job, configuration): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) self._verify_dml_stats_resource_properties(job, resource) + self._verify_transaction_info_resource_properties(job, resource) configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) @@ -325,6 +338,22 @@ def test_from_api_repr_with_dml_stats(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_transaction_info(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + "statistics": {"transactionInfo": {"transactionId": "1a2b-3c4d"}}, + } + klass = self._get_target_class() + + job = klass.from_api_repr(RESOURCE, client=client) + + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SchemaUpdateOption From 443b8ab28c19bdd0bd3cad39db33cb7bc8ad8741 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 10 Aug 2021 20:02:10 +0200 Subject: [PATCH 25/89] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.3 (#863) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.2` -> `==2.6.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/compatibility-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/confidence-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery-storage ### [`v2.6.3`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​263-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev262v263-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[x] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d55d0f254..d3e599101 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.23.3 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 69f537de4..1545ed96e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.23.3 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From aee814c6a48758325609b6fdfc35e2378461786e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 11 Aug 2021 12:29:39 +0200 Subject: [PATCH 26/89] chore: fix INSTALL_LIBRARY_FROM_SOURCE in noxfile.py (#869) Source-Link: https://github.com/googleapis/synthtool/commit/6252f2cd074c38f37b44abe5e96d128733eb1b61 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- samples/geography/noxfile.py | 5 ++++- samples/snippets/noxfile.py | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9ee60f7e4..649877dc4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b + digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 9fc7f1782..7dbea0914 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 9fc7f1782..7dbea0914 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # From c1a3d4435739a21d25aa154145e36d3a7c42eeb6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 11 Aug 2021 16:28:43 +0200 Subject: [PATCH 27/89] feat: make the same `Table*` instances equal to each other (#867) * feat: make the same Table instances equal to each other * Table equality should ignore metadata differences * Compare instances through tableReference property * Make Table instances hashable * Make Table* classes interchangeable If these classes reference the same table, they are now considered equal. --- google/cloud/bigquery/table.py | 42 +++++- tests/unit/test_table.py | 225 ++++++++++++++++++++++++++++++--- 2 files changed, 244 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index daade1ac6..d23885ebf 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -255,9 +255,16 @@ def _key(self): return (self._project, self._dataset_id, self._table_id) def __eq__(self, other): - if not isinstance(other, TableReference): + if isinstance(other, (Table, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + elif isinstance(other, TableReference): + return self._key() == other._key() + else: return NotImplemented - return self._key() == other._key() def __ne__(self, other): return not self == other @@ -1011,6 +1018,24 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) + def __eq__(self, other): + if isinstance(other, Table): + return ( + self._properties["tableReference"] + == other._properties["tableReference"] + ) + elif isinstance(other, (TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def __repr__(self): return "Table({})".format(repr(self.reference)) @@ -1229,6 +1254,19 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) + def __eq__(self, other): + if isinstance(other, (Table, TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 4b1fd833b..a5badc66c 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -115,8 +115,6 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -124,8 +122,6 @@ def test_ctor_defaults(self): self.assertEqual(table_ref.table_id, "table_1") def test_to_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -137,7 +133,6 @@ def test_to_api_repr(self): ) def test_from_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference dataset_ref = DatasetReference("project_1", "dataset_1") @@ -204,8 +199,6 @@ def test_from_string_ignores_default_project(self): self.assertEqual(got.table_id, "string_table") def test___eq___wrong_type(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset_ref, "table_1") other = object() @@ -213,8 +206,6 @@ def test___eq___wrong_type(self): self.assertEqual(table, mock.ANY) def test___eq___project_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_2", "dataset_1") table = self._make_one(dataset, "table_1") @@ -222,8 +213,6 @@ def test___eq___project_mismatch(self): self.assertNotEqual(table, other) def test___eq___dataset_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_1", "dataset_2") table = self._make_one(dataset, "table_1") @@ -231,24 +220,18 @@ def test___eq___dataset_mismatch(self): self.assertNotEqual(table, other) def test___eq___table_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_2") self.assertNotEqual(table, other) def test___eq___equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_1") self.assertEqual(table, other) def test___hash__set_equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -257,8 +240,6 @@ def test___hash__set_equality(self): self.assertEqual(set_one, set_two) def test___hash__not_equals(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -294,8 +275,6 @@ def _get_target_class(): return Table def _make_one(self, *args, **kw): - from google.cloud.bigquery.dataset import DatasetReference - if len(args) == 0: dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -581,6 +560,68 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") + def test__eq__wrong_type(self): + table = self._make_one("project_foo.dataset_bar.table_baz") + + class TableWannabe: + pass + + not_a_table = TableWannabe() + not_a_table._properties = table._properties + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table_basic(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + assert table_1 == table_2 + + def test__eq__same_table_multiple_properties(self): + from google.cloud.bigquery import SchemaField + + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.require_partition_filter = True + table_1.labels = {"first": "one", "second": "two"} + + table_1.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.require_partition_filter = True + table_2.labels = {"first": "one", "second": "two"} + table_2.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is table baz" + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.description = "This is also table baz" + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz_2") + + assert table_1 != table_2 + + def test_hashable(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is a table" + + table_1b = self._make_one("project_foo.dataset_bar.table_baz") + table_1b.description = "Metadata is irrelevant for hashes" + + assert hash(table_1) == hash(table_1b) + def test_schema_setter_non_sequence(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1543,6 +1584,148 @@ def test_to_api_repr(self): table = self._make_one(resource) self.assertEqual(table.to_api_repr(), resource) + def test__eq__wrong_type(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table = self._make_one(resource) + + class FakeTableListItem: + project = "project_foo" + dataset_id = "dataset_bar" + table_id = "table_baz" + + not_a_table = FakeTableListItem() + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource) + table_2 = self._make_one(resource) + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_ref_resource = { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + + resource_1 = {"tableReference": table_ref_resource, "friendlyName": "Table One"} + table_1 = self._make_one(resource_1) + + resource_2 = {"tableReference": table_ref_resource, "friendlyName": "Table Two"} + table_2 = self._make_one(resource_2) + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + resource_1 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource_1) + + resource_2 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_quux", + } + } + table_2 = self._make_one(resource_2) + + assert table_1 != table_2 + + def test_hashable(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_item = self._make_one(resource) + table_item_2 = self._make_one(resource) + + assert hash(table_item) == hash(table_item_2) + + +class TestTableClassesInterchangeability: + @staticmethod + def _make_table(*args, **kwargs): + from google.cloud.bigquery.table import Table + + return Table(*args, **kwargs) + + @staticmethod + def _make_table_ref(*args, **kwargs): + from google.cloud.bigquery.table import TableReference + + return TableReference(*args, **kwargs) + + @staticmethod + def _make_table_list_item(*args, **kwargs): + from google.cloud.bigquery.table import TableListItem + + return TableListItem(*args, **kwargs) + + def test_table_eq_table_ref(self): + + table = self._make_table("project_foo.dataset_bar.table_baz") + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + + assert table == table_ref + assert table_ref == table + + def test_table_eq_table_list_item(self): + table = self._make_table("project_foo.dataset_bar.table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table == table_list_item + assert table_list_item == table + + def test_table_ref_eq_table_list_item(self): + + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table_ref == table_list_item + assert table_list_item == table_ref + class TestSnapshotDefinition: @staticmethod From 93d15e2e5405c2cc6d158c4e5737361344193dbc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Aug 2021 10:12:23 -0500 Subject: [PATCH 28/89] feat: support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor (#850) Follow-up to https://github.com/googleapis/python-bigquery/pull/840/files#r679880582 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) --- docs/conf.py | 1 + docs/reference.rst | 1 + google/cloud/bigquery/enums.py | 24 +++++++++---------- google/cloud/bigquery/query.py | 42 ++++++++++++++++++++++++---------- tests/unit/test_query.py | 13 +++++++++++ 5 files changed, 57 insertions(+), 24 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index cb347160d..09f7ea414 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/docs/reference.rst b/docs/reference.rst index 5ac596370..d8738e67b 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -138,6 +138,7 @@ Query query.ArrayQueryParameter query.ScalarQueryParameter + query.ScalarQueryParameterType query.StructQueryParameter query.UDFResource diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 0da01d665..d67cebd4c 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -259,23 +259,23 @@ class SqlTypeNames(str, enum.Enum): class SqlParameterScalarTypes: """Supported scalar SQL query parameter types as type objects.""" - STRING = ScalarQueryParameterType("STRING") + BOOL = ScalarQueryParameterType("BOOL") + BOOLEAN = ScalarQueryParameterType("BOOL") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") BYTES = ScalarQueryParameterType("BYTES") - INTEGER = ScalarQueryParameterType("INT64") - INT64 = ScalarQueryParameterType("INT64") + DATE = ScalarQueryParameterType("DATE") + DATETIME = ScalarQueryParameterType("DATETIME") + DECIMAL = ScalarQueryParameterType("NUMERIC") FLOAT = ScalarQueryParameterType("FLOAT64") FLOAT64 = ScalarQueryParameterType("FLOAT64") - NUMERIC = ScalarQueryParameterType("NUMERIC") - BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") - DECIMAL = ScalarQueryParameterType("NUMERIC") - BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") - BOOLEAN = ScalarQueryParameterType("BOOL") - BOOL = ScalarQueryParameterType("BOOL") GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") - TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") - DATE = ScalarQueryParameterType("DATE") + INT64 = ScalarQueryParameterType("INT64") + INTEGER = ScalarQueryParameterType("INT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + STRING = ScalarQueryParameterType("STRING") TIME = ScalarQueryParameterType("TIME") - DATETIME = ScalarQueryParameterType("DATETIME") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") class WriteDisposition(object): diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index d1e9a45a5..1f449f189 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -16,7 +16,9 @@ from collections import OrderedDict import copy -from typing import Union +import datetime +import decimal +from typing import Optional, Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -24,6 +26,11 @@ from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM +_SCALAR_VALUE_TYPE = Optional[ + Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date] +] + + class UDFResource(object): """Describe a single user-defined function (UDF) resource. @@ -325,35 +332,46 @@ class ScalarQueryParameter(_AbstractQueryParameter): """Named / positional query parameters for scalar values. Args: - name (Optional[str]): + name: Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - type_ (str): - Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or - 'DATE'. + type_: + Name of parameter type. See + :class:`google.cloud.bigquery.enums.SqlTypeNames` and + :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for + supported types. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. """ - def __init__(self, name, type_, value): + def __init__( + self, + name: Optional[str], + type_: Optional[Union[str, ScalarQueryParameterType]], + value: _SCALAR_VALUE_TYPE, + ): self.name = name - self.type_ = type_ + if isinstance(type_, ScalarQueryParameterType): + self.type_ = type_._type + else: + self.type_ = type_ self.value = value @classmethod - def positional(cls, type_: str, value) -> "ScalarQueryParameter": + def positional( + cls, type_: Union[str, ScalarQueryParameterType], value: _SCALAR_VALUE_TYPE + ) -> "ScalarQueryParameter": """Factory for positional paramater. Args: - type_ (str): + type_: Name of parameter type. One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. Returns: diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 9483fe8dd..69a6772e5 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +import decimal import unittest import mock @@ -430,6 +431,18 @@ def test_positional(self): self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) + def test_ctor_w_scalar_query_parameter_type(self): + from google.cloud.bigquery import enums + + param = self._make_one( + name="foo", + type_=enums.SqlParameterScalarTypes.BIGNUMERIC, + value=decimal.Decimal("123.456"), + ) + self.assertEqual(param.name, "foo") + self.assertEqual(param.type_, "BIGNUMERIC") + self.assertEqual(param.value, decimal.Decimal("123.456")) + def test_from_api_repr_w_name(self): RESOURCE = { "name": "foo", From 519d99c20e7d1101f76981f3de036fdf3c7a4ecc Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 11 Aug 2021 14:24:28 -0400 Subject: [PATCH 29/89] feat: retry failed query jobs in `result()` (#837) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #539 🦕 Previously, we only retried failed API requests. Now, we retry failed jobs (according to the predicate of the `Retry` object passed to `job.result()`). --- google/cloud/bigquery/client.py | 110 +++++++++---- google/cloud/bigquery/job/query.py | 84 ++++++++-- google/cloud/bigquery/retry.py | 20 +++ tests/system/test_job_retry.py | 72 +++++++++ tests/unit/test_job_retry.py | 247 +++++++++++++++++++++++++++++ tests/unit/test_retry.py | 24 +++ 6 files changed, 518 insertions(+), 39 deletions(-) create mode 100644 tests/system/test_job_retry.py create mode 100644 tests/unit/test_job_retry.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 742ecac2e..8142c59cd 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -86,7 +86,7 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -3163,6 +3163,7 @@ def query( project: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3192,21 +3193,52 @@ def query( Project ID of the project of where to run the job. Defaults to the client's project. retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. + How to retry the RPC. This only applies to making RPC + calls. It isn't used to retry failed jobs. This has + a reasonable default that should only be overridden + with care. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` is + provided, then the job returned by the query will not + be retryable, and an exception will be raised if a + non-``None`` (and non-default) value for ``job_retry`` + is also provided. + + Note that errors aren't detected until ``result()`` is + called on the job returned. The ``job_retry`` + specified here becomes the default ``job_retry`` for + ``result()``, where it can also be specified. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. Raises: TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.QueryJobConfig` + class, or if both ``job_id`` and non-``None`` non-default + ``job_retry`` are provided. """ job_id_given = job_id is not None - job_id = _make_job_id(job_id, job_id_prefix) + if ( + job_id_given + and job_retry is not None + and job_retry is not DEFAULT_JOB_RETRY + ): + raise TypeError( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ) + + job_id_save = job_id if project is None: project = self.project @@ -3214,8 +3246,6 @@ def query( if location is None: location = self.location - job_config = copy.deepcopy(job_config) - if self._default_query_job_config: if job_config: _verify_job_config_type( @@ -3225,6 +3255,8 @@ def query( # that is in the default, # should be filled in with the default # the incoming therefore has precedence + # + # Note that _fill_from_default doesn't mutate the receiver job_config = job_config._fill_from_default( self._default_query_job_config ) @@ -3233,34 +3265,54 @@ def query( self._default_query_job_config, google.cloud.bigquery.job.QueryJobConfig, ) - job_config = copy.deepcopy(self._default_query_job_config) + job_config = self._default_query_job_config - job_ref = job._JobReference(job_id, project=project, location=location) - query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) + # Note that we haven't modified the original job_config (or + # _default_query_job_config) up to this point. + job_config_save = job_config - try: - query_job._begin(retry=retry, timeout=timeout) - except core_exceptions.Conflict as create_exc: - # The thought is if someone is providing their own job IDs and they get - # their job ID generation wrong, this could end up returning results for - # the wrong query. We thus only try to recover if job ID was not given. - if job_id_given: - raise create_exc + def do_query(): + # Make a copy now, so that original doesn't get changed by the process + # below and to facilitate retry + job_config = copy.deepcopy(job_config_save) + + job_id = _make_job_id(job_id_save, job_id_prefix) + job_ref = job._JobReference(job_id, project=project, location=location) + query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) try: - query_job = self.get_job( - job_id, - project=project, - location=location, - retry=retry, - timeout=timeout, - ) - except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job else: return query_job - else: - return query_job + + future = do_query() + # The future might be in a failed state now, but if it's + # unrecoverable, we'll find out when we ask for it's result, at which + # point, we may retry. + if not job_id_given: + future._retry_do_query = do_query # in case we have to retry later + future._job_retry = job_retry + + return future def insert_rows( self, diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 2cb7ee28e..3ab47b0f9 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -36,7 +36,7 @@ from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning @@ -1260,6 +1260,7 @@ def result( retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None, start_index: int = None, + job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. @@ -1270,9 +1271,13 @@ def result( max_results (Optional[int]): The maximum total number of rows from this request. retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. If the job state is - ``DONE``, retrying is aborted early even if the results are not - available, as this will not change anymore. + How to retry the call that retrieves rows. This only + applies to making RPC calls. It isn't used to retry + failed jobs. This has a reasonable default that + should only be overridden with care. If the job state + is ``DONE``, retrying is aborted early even if the + results are not available, as this will not change + anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1280,6 +1285,16 @@ def result( applies to each individual request. start_index (Optional[int]): The zero-based index of the starting row to read. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` was + provided to the query that created this job, then the + job returned by the query will not be retryable, and + an exception will be raised if non-``None`` + non-default ``job_retry`` is also provided. Returns: google.cloud.bigquery.table.RowIterator: @@ -1295,17 +1310,66 @@ def result( Raises: google.cloud.exceptions.GoogleAPICallError: - If the job failed. + If the job failed and retries aren't successful. concurrent.futures.TimeoutError: If the job did not complete in the given timeout. + TypeError: + If Non-``None`` and non-default ``job_retry`` is + provided and the job is not retryable. """ try: - super(QueryJob, self).result(retry=retry, timeout=timeout) + retry_do_query = getattr(self, "_retry_do_query", None) + if retry_do_query is not None: + if job_retry is DEFAULT_JOB_RETRY: + job_retry = self._job_retry + else: + if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY: + raise TypeError( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ) + + first = True + + def do_get_result(): + nonlocal first + + if first: + first = False + else: + # Note that we won't get here if retry_do_query is + # None, because we won't use a retry. + + # The orinal job is failed. Create a new one. + job = retry_do_query() + + # If it's already failed, we might as well stop: + if job.done() and job.exception() is not None: + raise job.exception() + + # Become the new job: + self.__dict__.clear() + self.__dict__.update(job.__dict__) + + # This shouldn't be necessary, because once we have a good + # job, it should stay good,and we shouldn't have to retry. + # But let's be paranoid. :) + self._retry_do_query = retry_do_query + self._job_retry = job_retry + + super(QueryJob, self).result(retry=retry, timeout=timeout) + + # Since the job could already be "done" (e.g. got a finished job + # via client.get_job), the superclass call to done() might not + # set the self._query_results cache. + self._reload_query_results(retry=retry, timeout=timeout) + + if retry_do_query is not None and job_retry is not None: + do_get_result = job_retry(do_get_result) + + do_get_result() - # Since the job could already be "done" (e.g. got a finished job - # via client.get_job), the superclass call to done() might not - # set the self._query_results cache. - self._reload_query_results(retry=retry, timeout=timeout) except exceptions.GoogleAPICallError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index bab28aacb..e9286055c 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -32,6 +32,8 @@ auth_exceptions.TransportError, ) +_DEFAULT_JOB_DEADLINE = 60.0 * 10.0 # seconds + def _should_retry(exc): """Predicate for determining when to retry. @@ -56,3 +58,21 @@ def _should_retry(exc): on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + +job_retry_reasons = "rateLimitExceeded", "backendError" + + +def _job_should_retry(exc): + if not hasattr(exc, "errors") or len(exc.errors) == 0: + return False + + reason = exc.errors[0]["reason"] + return reason in job_retry_reasons + + +DEFAULT_JOB_RETRY = retry.Retry( + predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE +) +""" +The default job retry object. +""" diff --git a/tests/system/test_job_retry.py b/tests/system/test_job_retry.py new file mode 100644 index 000000000..520545493 --- /dev/null +++ b/tests/system/test_job_retry.py @@ -0,0 +1,72 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import threading +import time + +import google.api_core.exceptions +import google.cloud.bigquery +import pytest + + +def thread(func): + thread = threading.Thread(target=func, daemon=True) + thread.start() + return thread + + +@pytest.mark.parametrize("job_retry_on_query", [True, False]) +def test_query_retry_539(bigquery_client, dataset_id, job_retry_on_query): + """ + Test job_retry + + See: https://github.com/googleapis/python-bigquery/issues/539 + """ + from google.api_core import exceptions + from google.api_core.retry import if_exception_type, Retry + + table_name = f"{dataset_id}.t539" + + # Without a custom retry, we fail: + with pytest.raises(google.api_core.exceptions.NotFound): + bigquery_client.query(f"select count(*) from {table_name}").result() + + retry_notfound = Retry(predicate=if_exception_type(exceptions.NotFound)) + + job_retry = dict(job_retry=retry_notfound) if job_retry_on_query else {} + job = bigquery_client.query(f"select count(*) from {table_name}", **job_retry) + job_id = job.job_id + + # We can already know that the job failed, but we're not supposed + # to find out until we call result, which is where retry happend + assert job.done() + assert job.exception() is not None + + @thread + def create_table(): + time.sleep(1) # Give the first retry attempt time to fail. + with contextlib.closing(google.cloud.bigquery.Client()) as client: + client.query(f"create table {table_name} (id int64)").result() + + job_retry = {} if job_retry_on_query else dict(job_retry=retry_notfound) + [[count]] = list(job.result(**job_retry)) + assert count == 0 + + # The job was retried, and thus got a new job id + assert job.job_id != job_id + + # Make sure we don't leave a thread behind: + create_table.join() + bigquery_client.query(f"drop table {table_name}").result() diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py new file mode 100644 index 000000000..b2095d2f2 --- /dev/null +++ b/tests/unit/test_job_retry.py @@ -0,0 +1,247 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import re + +import mock +import pytest + +import google.api_core.exceptions +import google.api_core.retry + +from .helpers import make_connection + + +# With job_retry_on_query, we're testing 4 scenarios: +# - No `job_retry` passed, retry on default rateLimitExceeded. +# - Pass NotFound retry to `query`. +# - Pass NotFound retry to `result`. +# - Pass BadRequest retry to query, with the value passed to `result` overriding. +@pytest.mark.parametrize("job_retry_on_query", [None, "Query", "Result", "Both"]) +@mock.patch("time.sleep") +def test_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + + retry_notfound = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.NotFound + ) + ) + retry_badrequest = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.BadRequest + ) + ) + + if job_retry_on_query is None: + reason = "rateLimitExceeded" + else: + reason = "notFound" + + err = dict(reason=reason) + responses = [ + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=retry_notfound) + elif job_retry_on_query == "Both": + # This will be overridden in `result` + job_retry = dict(job_retry=retry_badrequest) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = ( + dict(job_retry=retry_notfound) + if job_retry_on_query in ("Result", "Both") + else {} + ) + result = job.result(**job_retry) + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # The job adjusts it's job id based on the id of the last attempt. + assert job.job_id != orig_job_id + assert job.job_id == conn.mock_calls[3][2]["data"]["jobReference"]["jobId"] + + # We had to sleep three times + assert len(sleep.mock_calls) == 3 + + # Sleeps are random, however they're more than 0 + assert min(c[1][0] for c in sleep.mock_calls) > 0 + + # They're at most 2 * (multiplier**(number of sleeps - 1)) * initial + # The default multiplier is 2 + assert max(c[1][0] for c in sleep.mock_calls) <= 8 + + # We can ask for the result again: + responses = [ + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + orig_job_id = job.job_id + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # We wouldn't (and didn't) fail, because we're dealing with a successful job. + # So the job id hasn't changed. + assert job.job_id == orig_job_id + + +# With job_retry_on_query, we're testing 4 scenarios: +# - Pass None retry to `query`. +# - Pass None retry to `result`. +@pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"]) +@mock.patch("time.sleep") +def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + err = dict(reason="rateLimitExceeded") + responses = [dict(status=dict(state="DONE", errors=[err], errorResult=err))] * 3 + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + response["jobReference"] = data["jobReference"] + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=None) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = dict(job_retry=None) if job_retry_on_query == "Result" else {} + with pytest.raises(google.api_core.exceptions.Forbidden): + job.result(**job_retry) + + assert job.job_id == orig_job_id + assert len(sleep.mock_calls) == 0 + + +@mock.patch("google.api_core.retry.datetime_helpers") +@mock.patch("time.sleep") +def test_retry_failed_jobs_after_retry_failed(sleep, datetime_helpers, client): + """ + If at first you don't succeed, maybe you will later. :) + """ + conn = client._connection = make_connection() + + datetime_helpers.utcnow.return_value = datetime.datetime(2021, 7, 29, 10, 43, 2) + + err = dict(reason="rateLimitExceeded") + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + if calls: + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = dict(status=dict(state="DONE", errors=[err], errorResult=err)) + response["jobReference"] = data["jobReference"] + return response + + conn.api_request.side_effect = api_request + + job = client.query("select 1") + orig_job_id = job.job_id + + with pytest.raises(google.api_core.exceptions.RetryError): + job.result() + + # We never got a successful job, so the job id never changed: + assert job.job_id == orig_job_id + + # We failed because we couldn't succeed after 120 seconds. + # But we can try again: + err2 = dict(reason="backendError") # We also retry on this + responses = [ + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn.api_request.side_effect = api_request + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + assert job.job_id != orig_job_id + + +def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ), + ): + client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) + + +def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): + client._connection = make_connection({}) + job = client.query("select 42", job_id=42) + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ), + ): + job.result(job_retry=google.api_core.retry.Retry()) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 6fb7f93fd..c7c25e036 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -86,3 +86,27 @@ def test_w_unstructured_bad_gateway(self): exc = BadGateway("testing") self.assertTrue(self._call_fut(exc)) + + +def test_DEFAULT_JOB_RETRY_predicate(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from google.api_core.exceptions import ClientError + + assert not DEFAULT_JOB_RETRY._predicate(TypeError()) + assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail")) + assert not DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="idk")]) + ) + + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="rateLimitExceeded")]) + ) + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="backendError")]) + ) + + +def test_DEFAULT_JOB_RETRY_deadline(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + + assert DEFAULT_JOB_RETRY._deadline == 600 From ad9c8026f0e667f13dd754279f9dc40d06f4fa78 Mon Sep 17 00:00:00 2001 From: Grimmer Date: Thu, 12 Aug 2021 03:23:48 +0800 Subject: [PATCH 30/89] fix: make unicode characters working well in load_table_from_json (#865) Co-authored-by: Tim Swast Co-authored-by: Tres Seaver --- google/cloud/bigquery/client.py | 2 +- tests/unit/test_client.py | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8142c59cd..cbac82548 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2762,7 +2762,7 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) - data_str = "\n".join(json.dumps(item) for item in json_rows) + data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows) encoded_str = data_str.encode() data_file = io.BytesIO(encoded_str) return self.load_table_from_file( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 535685511..671dd8da1 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7775,6 +7775,42 @@ def test_load_table_from_json_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_json_unicode_emoji_data_case(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + + emoji = "\U0001F3E6" + json_row = {"emoji": emoji} + json_rows = [json_row] + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json(json_rows, self.TABLE_REF) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=None, + ) + + sent_data_file = load_table_from_file.mock_calls[0][1][1] + + # make sure json_row's unicode characters are only encoded one time + expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}' + assert sent_data_file.getvalue() == expected_bytes + # Low-level tests @classmethod From cf6f0e923d385817c9aff447255ecfa4b9b4c72d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 11 Aug 2021 19:46:11 +0000 Subject: [PATCH 31/89] chore: release 2.24.0 (#868) :robot: I have created a release \*beep\* \*boop\* --- ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) ### Features * add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) * make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) * retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) * support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) ### Bug Fixes * make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 15 +++++++++++++++ docs/conf.py | 1 - google/cloud/bigquery/version.py | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 856f1ecd1..83b409015 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) + + +### Features + +* add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) +* make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) +* retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) +* support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) + + +### Bug Fixes + +* make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) + ### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..cb347160d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,7 +110,6 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", - "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index df992a051..84f6b4643 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.3" +__version__ = "2.24.0" From c44d45bc0481aeef2e39ba3392666125bdd2715d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 12 Aug 2021 12:15:45 +0200 Subject: [PATCH 32/89] chore(deps): update dependency google-cloud-bigquery to v2.24.0 (#873) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d3e599101..dfee339d4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.3 +google-cloud-bigquery==2.24.0 google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1545ed96e..264899dff 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.3 +google-cloud-bigquery==2.24.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From e3704c3494b90112cb30b091bcacb443bf148383 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 13 Aug 2021 02:00:19 -0500 Subject: [PATCH 33/89] test: refactor `list_rows` tests and add test for scalars (#829) * test: refactor `list_rows` tests and add test for scalars * fix JSON formatting * add TODO for INTERVAL Arrow support * format tests --- tests/data/scalars.jsonl | 4 +- tests/data/scalars_extreme.jsonl | 10 +-- tests/data/scalars_schema.json | 54 +++++++++------ tests/system/test_arrow.py | 36 ++++++++-- tests/system/test_client.py | 48 ------------- tests/system/test_list_rows.py | 112 +++++++++++++++++++++++++++++++ 6 files changed, 181 insertions(+), 83 deletions(-) create mode 100644 tests/system/test_list_rows.py diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl index 4419a6e9a..e06139e5c 100644 --- a/tests/data/scalars.jsonl +++ b/tests/data/scalars.jsonl @@ -1,2 +1,2 @@ -{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl index ceccd8dbc..d0a33fdba 100644 --- a/tests/data/scalars_extreme.jsonl +++ b/tests/data/scalars_extreme.jsonl @@ -1,5 +1,5 @@ -{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} -{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} -{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} -{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json index 00bd150fd..676d37d56 100644 --- a/tests/data/scalars_schema.json +++ b/tests/data/scalars_schema.json @@ -1,33 +1,33 @@ [ { "mode": "NULLABLE", - "name": "timestamp_col", - "type": "TIMESTAMP" + "name": "bool_col", + "type": "BOOLEAN" }, { "mode": "NULLABLE", - "name": "time_col", - "type": "TIME" + "name": "bignumeric_col", + "type": "BIGNUMERIC" }, { "mode": "NULLABLE", - "name": "float64_col", - "type": "FLOAT" + "name": "bytes_col", + "type": "BYTES" }, { "mode": "NULLABLE", - "name": "datetime_col", - "type": "DATETIME" + "name": "date_col", + "type": "DATE" }, { "mode": "NULLABLE", - "name": "bignumeric_col", - "type": "BIGNUMERIC" + "name": "datetime_col", + "type": "DATETIME" }, { "mode": "NULLABLE", - "name": "numeric_col", - "type": "NUMERIC" + "name": "float64_col", + "type": "FLOAT" }, { "mode": "NULLABLE", @@ -36,27 +36,37 @@ }, { "mode": "NULLABLE", - "name": "date_col", - "type": "DATE" + "name": "int64_col", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "string_col", - "type": "STRING" + "name": "interval_col", + "type": "INTERVAL" }, { "mode": "NULLABLE", - "name": "bool_col", - "type": "BOOLEAN" + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "REQUIRED", + "name": "rowindex", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "bytes_col", - "type": "BYTES" + "name": "string_col", + "type": "STRING" }, { "mode": "NULLABLE", - "name": "int64_col", - "type": "INTEGER" + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" } ] diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index f97488e39..12f7af9cb 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -14,8 +14,14 @@ """System tests for Arrow connector.""" +from typing import Optional + import pytest +from google.cloud import bigquery +from google.cloud.bigquery import enums + + pyarrow = pytest.importorskip( "pyarrow", minversion="3.0.0" ) # Needs decimal256 for BIGNUMERIC columns. @@ -31,17 +37,35 @@ ), ) def test_list_rows_nullable_scalars_dtypes( - bigquery_client, - scalars_table, - scalars_extreme_table, - max_results, - scalars_table_name, + bigquery_client: bigquery.Client, + scalars_table: str, + scalars_extreme_table: str, + max_results: Optional[int], + scalars_table_name: str, ): table_id = scalars_table if scalars_table_name == "scalars_extreme_table": table_id = scalars_extreme_table + + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + arrow_table = bigquery_client.list_rows( - table_id, max_results=max_results, + table_id, max_results=max_results, selected_fields=schema, ).to_arrow() schema = arrow_table.schema diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f540611a6..06ef40126 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2428,54 +2428,6 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - def test_list_rows_empty_table(self): - from google.cloud.bigquery.table import RowIterator - - dataset_id = _make_dataset_id("empty_table") - dataset = self.temp_dataset(dataset_id) - table_ref = dataset.table("empty_table") - table = Config.CLIENT.create_table(bigquery.Table(table_ref)) - - # It's a bit silly to list rows for an empty table, but this does - # happen as the result of a DDL query from an IPython magic command. - rows = Config.CLIENT.list_rows(table) - self.assertIsInstance(rows, RowIterator) - self.assertEqual(tuple(rows), ()) - - def test_list_rows_page_size(self): - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - num_items = 7 - page_size = 3 - num_pages, num_last_page = divmod(num_items, page_size) - - SF = bigquery.SchemaField - schema = [SF("string_col", "STRING", mode="NULLABLE")] - to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] - rows = [json.dumps(row) for row in to_insert] - body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("nested_df")) - table = dataset.table(table_id) - self.to_delete.insert(0, table) - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON - job_config.schema = schema - # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - - df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size) - pages = df.pages - - for i in range(num_pages): - page = next(pages) - self.assertEqual(page.num_items, page_size) - page = next(pages) - self.assertEqual(page.num_items, num_last_page) - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py new file mode 100644 index 000000000..70388059e --- /dev/null +++ b/tests/system/test_list_rows.py @@ -0,0 +1,112 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str): + from google.cloud.bigquery.table import RowIterator + + table = bigquery_client.create_table(table_id) + + # It's a bit silly to list rows for an empty table, but this does + # happen as the result of a DDL query from an IPython magic command. + rows = bigquery_client.list_rows(table) + assert isinstance(rows, RowIterator) + assert tuple(rows) == () + + +def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str): + num_items = 7 + page_size = 3 + num_pages, num_last_page = divmod(num_items, page_size) + + to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)] + bigquery_client.load_table_from_json(to_insert, table_id).result() + + df = bigquery_client.list_rows( + table_id, + selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)], + page_size=page_size, + ) + pages = df.pages + + for i in range(num_pages): + page = next(pages) + assert page.num_items == page_size + page = next(pages) + assert page.num_items == num_last_page + + +def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str): + rows = sorted( + bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"] + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"Hello, World!" + assert row["date_col"] == datetime.date(2021, 7, 21) + assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45) + assert row["geography_col"] == "POINT(-122.0838511 37.3860517)" + assert row["int64_col"] == 123456789 + assert row["numeric_col"] == decimal.Decimal("1.23456789") + assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819") + assert row["float64_col"] == 1.25 + assert row["string_col"] == "Hello, World!" + assert row["time_col"] == datetime.time(11, 41, 43, 76160) + assert row["timestamp_col"] == datetime.datetime( + 2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[1] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 1 + else: + assert value is None + + +def test_list_rows_scalars_extreme( + bigquery_client: bigquery.Client, scalars_extreme_table: str +): + rows = sorted( + bigquery_client.list_rows(scalars_extreme_table), + key=lambda row: row["rowindex"], + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"\r\n" + assert row["date_col"] == datetime.date(9999, 12, 31) + assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + assert row["geography_col"] == "POINT(-135 90)" + assert row["int64_col"] == 9223372036854775807 + assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28") + assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37") + assert row["float64_col"] == float("Inf") + assert row["string_col"] == "Hello, World" + assert row["time_col"] == datetime.time(23, 59, 59, 999999) + assert row["timestamp_col"] == datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[4] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 4 + else: + assert value is None From cd21df1716e9ab163c779a716d94a850a6b2d253 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 13 Aug 2021 11:17:31 -0400 Subject: [PATCH 34/89] chore: drop mention of Python 2.7 from templates (#877) Source-Link: https://github.com/googleapis/synthtool/commit/facee4cc1ea096cd8bcc008bb85929daa7c414c0 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- docs/conf.py | 1 + samples/geography/noxfile.py | 6 +++--- samples/snippets/noxfile.py | 6 +++--- scripts/readme-gen/templates/install_deps.tmpl.rst | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 649877dc4..a9fcd07cc 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc + digest: sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 diff --git a/docs/conf.py b/docs/conf.py index cb347160d..09f7ea414 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 7dbea0914..b008613f0 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 7dbea0914..b008613f0 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst index a0406dba8..275d64989 100644 --- a/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. +#. Create a virtualenv. Samples are compatible with Python 3.6+. .. code-block:: bash From 2cb3563ee863edef7eaf5d04d739bcfe7bc6438e Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 13 Aug 2021 12:54:09 -0600 Subject: [PATCH 35/89] fix: remove pytz dependency and require pyarrow>=3.0.0 (#875) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: remove pytz dependency * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix(deps): require pyarrow>=3.0.0 * remove version check for pyarrow * require pyarrow 3.0 in pandas extra * remove _BIGNUMERIC_SUPPORT references from tests Co-authored-by: Owl Bot Co-authored-by: Dina Graves Portman Co-authored-by: Tim Swast --- docs/snippets.py | 5 +- google/cloud/bigquery/_pandas_helpers.py | 17 ++---- google/cloud/bigquery/table.py | 3 +- samples/client_query_w_timestamp_params.py | 3 +- setup.py | 4 +- testing/constraints-3.6.txt | 2 +- tests/system/test_client.py | 14 ++--- tests/system/test_pandas.py | 42 ++++++------- tests/unit/job/test_base.py | 4 +- tests/unit/test__pandas_helpers.py | 69 ++++++++-------------- tests/unit/test_client.py | 21 ++++--- tests/unit/test_table.py | 17 ++---- 12 files changed, 78 insertions(+), 123 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 3f9b9a88c..c62001fc0 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -363,7 +363,6 @@ def test_update_table_expiration(client, to_delete): # [START bigquery_update_table_expiration] import datetime - import pytz # from google.cloud import bigquery # client = bigquery.Client() @@ -375,7 +374,9 @@ def test_update_table_expiration(client, to_delete): assert table.expires is None # set table to expire 5 days from now - expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5) + expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=5 + ) table.expires = expiration table = client.update_table(table, ["expires"]) # API request diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index b381fa5f7..f49980645 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,8 +20,6 @@ import queue import warnings -from packaging import version - try: import pandas except ImportError: # pragma: NO COVER @@ -110,6 +108,7 @@ def pyarrow_timestamp(): # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py # When modifying it be sure to update it there as well. BQ_TO_ARROW_SCALARS = { + "BIGNUMERIC": pyarrow_bignumeric, "BOOL": pyarrow.bool_, "BOOLEAN": pyarrow.bool_, "BYTES": pyarrow.binary, @@ -146,23 +145,15 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric # The exact decimal's scale and precision are not important, as only # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", + } else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER def bq_to_arrow_struct_data_type(field): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index d23885ebf..62f888001 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -20,7 +20,6 @@ import datetime import functools import operator -import pytz import typing from typing import Any, Dict, Iterable, Iterator, Optional, Tuple import warnings @@ -1969,7 +1968,7 @@ def to_dataframe( # Pandas, we set the timestamp_as_object parameter to True, if necessary. types_to_check = { pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=pytz.UTC), + pyarrow.timestamp("us", tz=datetime.timezone.utc), } for column in record_batch: diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py index ca8eec0b5..41a27770e 100644 --- a/samples/client_query_w_timestamp_params.py +++ b/samples/client_query_w_timestamp_params.py @@ -18,7 +18,6 @@ def client_query_w_timestamp_params(): # [START bigquery_query_params_timestamps] import datetime - import pytz from google.cloud import bigquery # Construct a BigQuery client object. @@ -30,7 +29,7 @@ def client_query_w_timestamp_params(): bigquery.ScalarQueryParameter( "ts_value", "TIMESTAMP", - datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC), + datetime.datetime(2016, 12, 7, 8, 0, tzinfo=datetime.timezone.utc), ) ] ) diff --git a/setup.py b/setup.py index e9deaf117..a1b3b61a0 100644 --- a/setup.py +++ b/setup.py @@ -54,9 +54,9 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 6.0dev", + "pyarrow >= 3.0.0, < 6.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index af6e82efd..ce012f0d7 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0 pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 -pyarrow==1.0.0 +pyarrow==3.0.0 requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 06ef40126..4250111b4 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -30,7 +30,6 @@ import psutil import pytest -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers try: @@ -1972,15 +1971,12 @@ def test_query_w_query_params(self): "expected": {"friends": [phred_name, bharney_name]}, "query_parameters": [with_friends_param], }, + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + }, ] - if _BIGNUMERIC_SUPPORT: - examples.append( - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - } - ) for example in examples: jconfig = QueryJobConfig() diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 821b375e1..371dcea71 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -24,10 +24,8 @@ import google.api_core.retry import pkg_resources import pytest -import pytz from google.cloud import bigquery -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers @@ -64,7 +62,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ( "dt_col", @@ -189,12 +187,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -216,12 +213,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), + ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -297,12 +293,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -340,6 +335,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -348,23 +351,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), ], ), ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) @@ -484,10 +478,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc ), ], ), diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 0ac1d05b5..c3f7854e3 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -295,11 +295,11 @@ def test_user_email(self): @staticmethod def _datetime_and_millis(): import datetime - import pytz from google.cloud._helpers import _millis now = datetime.datetime.utcnow().replace( - microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision + microsecond=123000, + tzinfo=datetime.timezone.utc, # stats timestamps have ms precision ) return now, _millis(now) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 0ba671cd9..b9cb56572 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -37,12 +37,10 @@ # used in test parameterization. pyarrow = mock.Mock() import pytest -import pytz from google import api_core from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT try: from google.cloud import bigquery_storage @@ -60,11 +58,6 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") -skip_if_no_bignumeric = pytest.mark.skipif( - not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", -) - - @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -153,9 +146,7 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - pytest.param( - "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, - ), + ("BIGNUMERIC", "NULLABLE", is_bignumeric), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -234,11 +225,10 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - pytest.param( + ( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), - marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -312,6 +302,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -321,9 +312,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -335,6 +323,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -343,8 +332,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected = pyarrow.struct(expected) assert pyarrow.types.is_struct(actual) @@ -363,6 +350,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -372,9 +360,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -386,6 +371,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -394,8 +380,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected_value_type = pyarrow.struct(expected) assert pyarrow.types.is_list(actual) @@ -441,7 +425,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - pytest.param( + ( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -449,17 +433,18 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), decimal.Decimal("3.141592653589793238462643383279"), ], - marks=skip_if_no_bignumeric, ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), ( "TIMESTAMP", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), ], ), ( @@ -938,6 +923,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), + schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"), schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), schema.SchemaField("field10", "BOOL", mode="REQUIRED"), schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), @@ -946,8 +932,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) - if _BIGNUMERIC_SUPPORT: - bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) data = { "field01": ["hello", "world"], @@ -957,11 +941,15 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): "field05": [1.25, 9.75], "field06": [-1.75, -3.5], "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field08": [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], "field09": [True, False], "field10": [False, True], "field11": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=datetime.timezone.utc), ], "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], @@ -971,11 +959,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): ], "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], } - if _BIGNUMERIC_SUPPORT: - data["field08"] = [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ] dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) @@ -1210,11 +1193,8 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), ) - if _BIGNUMERIC_SUPPORT: - current_schema += ( - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1236,13 +1216,10 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), ) - if _BIGNUMERIC_SUPPORT: - expected_schema += ( - schema.SchemaField( - "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" - ), - ) by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 671dd8da1..ca0dca975 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -30,7 +30,6 @@ import packaging import requests import pytest -import pytz import pkg_resources try: @@ -5018,16 +5017,24 @@ def test_insert_rows_w_repeated_fields(self): ( 12, [ - datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], [1.25, 2.5], ), { "score": 13, "times": [ - datetime.datetime(2018, 12, 2, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 2, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 2, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 2, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], "distances": [-1.25, -2.5], }, @@ -6974,7 +6981,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ] ) @@ -7306,7 +7313,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ("string_col", ["abc", None, "def"]), ("bytes_col", [b"abc", b"def", None]), diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a5badc66c..50d573345 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -20,9 +20,7 @@ import warnings import mock -import pkg_resources import pytest -import pytz import google.api_core.exceptions from test_utils.imports import maybe_fail_import @@ -44,11 +42,8 @@ try: import pyarrow import pyarrow.types - - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None - PYARROW_VERSION = pkg_resources.parse_version("0.0.1") try: from tqdm import tqdm @@ -58,9 +53,6 @@ from google.cloud.bigquery.dataset import DatasetReference -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - - def _mock_client(): from google.cloud.bigquery import client @@ -914,7 +906,9 @@ def test_mview_last_refresh_time(self): } self.assertEqual( table.mview_last_refresh_time, - datetime.datetime(2020, 11, 30, 15, 57, 22, 496000, tzinfo=pytz.utc), + datetime.datetime( + 2020, 11, 30, 15, 57, 22, 496000, tzinfo=datetime.timezone.utc + ), ) def test_mview_enable_refresh(self): @@ -2878,10 +2872,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) - tzinfo = None - if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: - tzinfo = datetime.timezone.utc - + tzinfo = datetime.timezone.utc self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows self.assertEqual(list(df.columns), ["some_timestamp"]) From 936660bdf48eb65844b39bc567146968895225d7 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 19 Aug 2021 10:01:53 -0400 Subject: [PATCH 36/89] chore: release 2.24.1 (#879) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83b409015..5a3e74fd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) + + +### Bug Fixes + +* remove pytz dependency and require pyarrow>=3.0.0 ([#875](https://www.github.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e)) + ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 84f6b4643..96f84438a 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.0" +__version__ = "2.24.1" From 5c5b4b852e8818f885014bca3769c4b7c13183cd Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 19 Aug 2021 17:29:00 +0200 Subject: [PATCH 37/89] chore(deps): update dependency google-cloud-bigquery to v2.24.1 (#887) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index dfee339d4..ac804c81c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.24.0 +google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 264899dff..484e10516 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.24.0 +google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 16f65e6ae15979217ceea6c6d398c9057a363a13 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 24 Aug 2021 10:29:04 -0400 Subject: [PATCH 38/89] feat: Support using GeoPandas for GEOGRAPHY columns (#848) --- docs/conf.py | 2 + docs/usage/pandas.rst | 15 ++ google/cloud/bigquery/_pandas_helpers.py | 71 ++++++- google/cloud/bigquery/job/query.py | 119 ++++++++++- google/cloud/bigquery/table.py | 196 +++++++++++++++++- owlbot.py | 4 + samples/geography/requirements.txt | 44 ++++ samples/geography/to_geodataframe.py | 32 +++ samples/geography/to_geodataframe_test.py | 25 +++ setup.py | 1 + testing/constraints-3.6.txt | 4 +- tests/system/test_client.py | 3 - tests/system/test_pandas.py | 143 +++++++++++++ tests/unit/job/test_query_pandas.py | 130 ++++++++++-- tests/unit/test__pandas_helpers.py | 100 +++++++++ tests/unit/test_table.py | 242 ++++++++++++++++++++++ 16 files changed, 1102 insertions(+), 29 deletions(-) create mode 100644 samples/geography/to_geodataframe.py create mode 100644 samples/geography/to_geodataframe_test.py diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..59a2d8fb3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -366,6 +366,8 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "geopandas": ("https://geopandas.org/", None), } diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst index 9db98dfbb..92eee67cf 100644 --- a/docs/usage/pandas.rst +++ b/docs/usage/pandas.rst @@ -37,6 +37,21 @@ To retrieve table rows as a :class:`pandas.DataFrame`: :start-after: [START bigquery_list_rows_dataframe] :end-before: [END bigquery_list_rows_dataframe] + +Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame +------------------------------------------------------------ + +`GeoPandas `_ adds geospatial analytics +capabilities to Pandas. To retrieve query results containing +GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`: + +.. literalinclude:: ../samples/geography/to_geodataframe.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_results_geodataframe] + :end-before: [END bigquery_query_results_geodataframe] + + Load a Pandas DataFrame to a BigQuery Table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index f49980645..ab58b1729 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -24,6 +24,36 @@ import pandas except ImportError: # pragma: NO COVER pandas = None +else: + import numpy + +try: + # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` + from shapely.geometry.base import BaseGeometry as _BaseGeometry +except ImportError: # pragma: NO COVER + # No shapely, use NoneType for _BaseGeometry as a placeholder. + _BaseGeometry = type(None) +else: + if pandas is not None: # pragma: NO COVER + + def _to_wkb(): + # Create a closure that: + # - Adds a not-null check. This allows the returned function to + # be used directly with apply, unlike `shapely.wkb.dumps`. + # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. + # - Caches the WKBWriter (and write method lookup :) ) + # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. + from shapely.geos import WKBWriter, lgeos + + write = WKBWriter(lgeos).write + notnull = pandas.notnull + + def _to_wkb(v): + return write(v) if notnull(v) else v + + return _to_wkb + + _to_wkb = _to_wkb() try: import pyarrow @@ -69,6 +99,7 @@ "uint8": "INTEGER", "uint16": "INTEGER", "uint32": "INTEGER", + "geometry": "GEOGRAPHY", } @@ -193,14 +224,16 @@ def bq_to_arrow_data_type(field): return data_type_constructor() -def bq_to_arrow_field(bq_field): +def bq_to_arrow_field(bq_field, array_type=None): """Return the Arrow field, corresponding to a given BigQuery column. Returns: None: if the Arrow type cannot be determined. """ arrow_type = bq_to_arrow_data_type(bq_field) - if arrow_type: + if arrow_type is not None: + if array_type is not None: + arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) @@ -225,7 +258,24 @@ def bq_to_arrow_schema(bq_schema): def bq_to_arrow_array(series, bq_field): - arrow_type = bq_to_arrow_data_type(bq_field) + if bq_field.field_type.upper() == "GEOGRAPHY": + arrow_type = None + first = _first_valid(series) + if first is not None: + if series.dtype.name == "geometry" or isinstance(first, _BaseGeometry): + arrow_type = pyarrow.binary() + # Convert shapey geometry to WKB binary format: + series = series.apply(_to_wkb) + elif isinstance(first, bytes): + arrow_type = pyarrow.binary() + elif series.dtype.name == "geometry": + # We have a GeoSeries containing all nulls, convert it to a pandas series + series = pandas.Series(numpy.array(series)) + + if arrow_type is None: + arrow_type = bq_to_arrow_data_type(bq_field) + else: + arrow_type = bq_to_arrow_data_type(bq_field) field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" @@ -279,6 +329,12 @@ def list_columns_and_indexes(dataframe): return columns_and_indexes +def _first_valid(series): + first_valid_index = series.first_valid_index() + if first_valid_index is not None: + return series.at[first_valid_index] + + def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. @@ -319,6 +375,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # Otherwise, try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) + if bq_type is None: + sample_data = _first_valid(dataframe[column]) + if ( + isinstance(sample_data, _BaseGeometry) + and sample_data is not None # Paranoia + ): + bq_type = "GEOGRAPHY" bq_field = schema.SchemaField(column, bq_type) bq_schema_out.append(bq_field) @@ -450,11 +513,11 @@ def dataframe_to_arrow(dataframe, bq_schema): arrow_names = [] arrow_fields = [] for bq_field in bq_schema: - arrow_fields.append(bq_to_arrow_field(bq_field)) arrow_names.append(bq_field.name) arrow_arrays.append( bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field) ) + arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type)) if all((field is not None for field in arrow_fields)): return pyarrow.Table.from_arrays( diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 3ab47b0f9..0cb4798be 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -53,6 +53,7 @@ # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. import pandas + import geopandas import pyarrow from google.api_core import retry as retries from google.cloud import bigquery_storage @@ -1487,6 +1488,7 @@ def to_dataframe( create_bqstorage_client: bool = True, date_as_object: bool = True, max_results: Optional[int] = None, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1538,13 +1540,27 @@ def to_dataframe( .. versionadded:: 2.21.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. + pandas.DataFrame: + A :class:`~pandas.DataFrame` populated with row data + and column headers from the query results. The column + headers are derived from the destination table's + schema. Raises: - ValueError: If the `pandas` library cannot be imported. + ValueError: + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_dataframe( @@ -1553,6 +1569,101 @@ def to_dataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, date_as_object=date_as_object, + geography_as_object=geography_as_object, + ) + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe(), except for the max_results parameter + # that should only exist here in the QueryJob method. + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + max_results: Optional[int] = None, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Return a GeoPandas GeoDataFrame from a QueryJob + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + .. versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + .. versionadded:: 1.24.0 + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + .. versionadded:: 1.26.0 + + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + .. versionadded:: 2.21.0 + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a GeoPandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) + return query_result.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, ) def __iter__(self): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 62f888001..609c0b57e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -29,6 +29,20 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import geopandas +except ImportError: + geopandas = None +else: + _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" + +try: + import shapely.geos +except ImportError: + shapely = None +else: + _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read + try: import pyarrow except ImportError: # pragma: NO COVER @@ -52,6 +66,7 @@ # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas + import geopandas import pyarrow from google.cloud import bigquery_storage @@ -60,6 +75,14 @@ "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) +_NO_GEOPANDAS_ERROR = ( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." +) +_NO_SHAPELY_ERROR = ( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." +) _NO_PYARROW_ERROR = ( "The pyarrow library is not installed, please install " "pyarrow to use the to_arrow() function." @@ -1878,6 +1901,7 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, date_as_object: bool = True, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1933,6 +1957,13 @@ def to_dataframe( .. versionadded:: 1.26.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1941,13 +1972,18 @@ def to_dataframe( Raises: ValueError: - If the :mod:`pandas` library cannot be imported, or the - :mod:`google.cloud.bigquery_storage_v1` module is - required but cannot be imported. + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) + if geography_as_object and shapely is None: + raise ValueError(_NO_SHAPELY_ERROR) + if dtypes is None: dtypes = {} @@ -1988,8 +2024,136 @@ def to_dataframe( for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) + if geography_as_object: + for field in self.schema: + if field.field_type.upper() == "GEOGRAPHY": + df[field.name] = df[field.name].dropna().apply(_read_wkt) + return df + # If changing the signature of this method, make sure to apply the same + # changes to job.QueryJob.to_geodataframe() + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Create a GeoPandas GeoDataFrame by loading all pages of a query. + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a geopandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + + geography_columns = set( + field.name + for field in self.schema + if field.field_type.upper() == "GEOGRAPHY" + ) + if not geography_columns: + raise TypeError( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ) + + if geography_column: + if geography_column not in geography_columns: + raise ValueError( + f"The given geography column, {geography_column}, doesn't name" + f" a GEOGRAPHY column in the result." + ) + elif len(geography_columns) == 1: + [geography_column] = geography_columns + else: + raise ValueError( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ) + + df = self.to_dataframe( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + return geopandas.GeoDataFrame( + df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column + ) + class _EmptyRowIterator(RowIterator): """An empty row iterator. @@ -2042,6 +2206,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, + geography_as_object=False, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2059,6 +2224,31 @@ def to_dataframe( raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() + def to_geodataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=True, + date_as_object=True, + geography_column: Optional[str] = None, + ) -> "pandas.DataFrame": + """Create an empty dataframe. + + Args: + bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. + dtypes (Any): Ignored. Added for compatibility with RowIterator. + progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. + create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + date_as_object (bool): Ignored. Added for compatibility with RowIterator. + + Returns: + pandas.DataFrame: An empty :class:`~pandas.DataFrame`. + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + def to_dataframe_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, diff --git a/owlbot.py b/owlbot.py index 09845480a..ea9904cdb 100644 --- a/owlbot.py +++ b/owlbot.py @@ -97,6 +97,10 @@ samples=True, microgenerator=True, split_system_tests=True, + intersphinx_dependencies={ + "pandas": 'http://pandas.pydata.org/pandas-docs/dev', + "geopandas": "https://geopandas.org/", + } ) # BigQuery has a custom multiprocessing note diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ac804c81c..7a76b4033 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,48 @@ +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +cffi==1.14.6 +charset-normalizer==2.0.4 +click==8.0.1 +click-plugins==1.1.1 +cligj==0.7.2 +dataclasses==0.6; python_version < '3.7' +Fiona==1.8.20 geojson==2.5.0 +geopandas==0.9.0 +google-api-core==1.31.2 +google-auth==1.35.0 google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 +google-cloud-core==1.7.2 +google-crc32c==1.1.2 +google-resumable-media==1.3.3 +googleapis-common-protos==1.53.0 +grpcio==1.39.0 +idna==3.2 +importlib-metadata==4.6.4 +libcst==0.3.20 +munch==2.5.0 +mypy-extensions==0.4.3 +numpy==1.19.5 +packaging==21.0 +pandas==1.1.5 +proto-plus==1.19.0 +protobuf==3.17.3 +pyarrow==5.0.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pyparsing==2.4.7 +pyproj==3.0.1 +python-dateutil==2.8.2 +pytz==2021.1 +PyYAML==5.4.1 +requests==2.26.0 +rsa==4.7.2 Shapely==1.7.1 +six==1.16.0 +typing-extensions==3.10.0.0 +typing-inspect==0.7.1 +urllib3==1.26.6 +zipp==3.5.0 diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py new file mode 100644 index 000000000..fa8073fef --- /dev/null +++ b/samples/geography/to_geodataframe.py @@ -0,0 +1,32 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +client = bigquery.Client() + + +def get_austin_service_requests_as_geography(): + # [START bigquery_query_results_geodataframe] + + sql = """ + SELECT created_date, complaint_description, + ST_GEOGPOINT(longitude, latitude) as location + FROM bigquery-public-data.austin_311.311_service_requests + LIMIT 10 + """ + + df = client.query(sql).to_geodataframe() + # [END bigquery_query_results_geodataframe] + return df diff --git a/samples/geography/to_geodataframe_test.py b/samples/geography/to_geodataframe_test.py new file mode 100644 index 000000000..7a2ba6937 --- /dev/null +++ b/samples/geography/to_geodataframe_test.py @@ -0,0 +1,25 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .to_geodataframe import get_austin_service_requests_as_geography + + +def test_get_austin_service_requests_as_geography(): + geopandas = pytest.importorskip("geopandas") + df = get_austin_service_requests_as_geography() + assert isinstance(df, geopandas.GeoDataFrame) + assert len(list(df)) == 3 # verify the number of columns + assert len(df) == 10 # verify the number of rows diff --git a/setup.py b/setup.py index a1b3b61a0..e7515493d 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,7 @@ "grpcio >= 1.38.1, < 2.0dev", "pyarrow >= 3.0.0, < 6.0dev", ], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index ce012f0d7..be1a992fa 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +geopandas==0.9.0 google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 @@ -13,10 +14,11 @@ grpcio==1.38.1 opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 -pandas==0.23.0 +pandas==0.24.2 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 +shapely==1.6.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 4250111b4..9da45ee6e 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2360,9 +2360,6 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) - def _fetch_dataframe(self, query): - return Config.CLIENT.query(query).result().to_dataframe() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 371dcea71..836f93210 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -798,3 +798,146 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) assert len(dataframe.index) == 100 + + +def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + bigquery_client.query( + f"create table {dataset_id}.lake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.lake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('point(0 1)')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.lake order by name" + ).to_dataframe(geography_as_object=True) + assert list(df["name"]) == ["bar", "baz", "foo"] + assert df["geog"][0] == wkt.loads("point(0 1)") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + + +def test_to_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + from shapely import wkt + + bigquery_client.query( + f"create table {dataset_id}.geolake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.geolake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('polygon((0 0, 1 0, 1 1, 0 0))')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.geolake order by name" + ).to_geodataframe() + assert df["geog"][0] == wkt.loads("polygon((0 0, 1 0, 1 1, 0 0))") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + assert isinstance(df, geopandas.GeoDataFrame) + assert isinstance(df["geog"], geopandas.GeoSeries) + assert df.area[0] == 0.5 + assert pandas.isna(df.area[1]) + assert df.area[2] == 0.0 + assert df.crs.srs == "EPSG:4326" + assert df.crs.name == "WGS 84" + assert df.geog.crs.srs == "EPSG:4326" + assert df.geog.crs.name == "WGS 84" + + +def test_load_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + import pandas + from shapely import wkt + from google.cloud.bigquery.schema import SchemaField + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + + table_id = f"{dataset_id}.lake_from_gp" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo1", "GEOGRAPHY", "NULLABLE"), + SchemaField("geo2", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", None, "POINT(1 1)"], + ["foo", None, None], + ] + + +def test_load_dataframe_w_shapely(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkt.loads("Point(1 1)")]) + ) + + table_id = f"{dataset_id}.lake_from_shapes" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] + + bigquery_client.load_table_from_dataframe(df, table_id).result() + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["bar", "POINT(1 1)"], + ["foo", None], + ["foo", None], + ] + + +def test_load_dataframe_w_wkb(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from shapely import wkb + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkb.dumps(wkt.loads("Point(1 1)"))]) + ) + + table_id = f"{dataset_id}.lake_from_wkb" + # We create the table first, to inform the interpretation of the wkb data + bigquery_client.query( + f"create table {table_id} (name string, geo GEOGRAPHY)" + ).result() + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index c537802f4..b5af90c0b 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -23,6 +23,14 @@ import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import shapely +except (ImportError, AttributeError): # pragma: NO COVER + shapely = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None try: import pyarrow except (ImportError, AttributeError): # pragma: NO COVER @@ -425,38 +433,41 @@ def test_to_arrow_w_tqdm_wo_query_plan(): result_patch_tqdm.assert_called() -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_to_dataframe(): +def _make_job(schema=(), rows=()): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") query_resource = { "jobComplete": True, "jobReference": begun_resource["jobReference"], - "totalRows": "4", + "totalRows": str(len(rows)), "schema": { "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + dict(name=field[0], type=field[1], mode=field[2]) for field in schema ] }, } - tabledata_resource = { - "rows": [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - } + tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]} done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} connection = _make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) - job = target_class.from_api_repr(begun_resource, client) + return target_class.from_api_repr(begun_resource, client) + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")), + ( + ("Phred Phlyntstone", "32"), + ("Bharney Rhubble", "33"), + ("Wylma Phlyntstone", "29"), + ("Bhettye Rhubble", "27"), + ), + ) df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) @@ -868,3 +879,94 @@ def test_to_dataframe_w_tqdm_max_results(): result_patch_tqdm.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(shapely is None, reason="Requires `shapely`") +def test_to_dataframe_geography_as_object(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "float", + ] # float because nan + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_to_geodataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_geodataframe(create_bqstorage_client=False) + + assert isinstance(df, geopandas.GeoDataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "NoneType", + ] # float because nan + assert isinstance(df.geog, geopandas.GeoSeries) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@mock.patch("google.cloud.bigquery.job.query.wait_for_query") +def test_query_job_to_geodataframe_delegation(wait_for_query): + """ + QueryJob.to_geodataframe just delegates to RowIterator.to_geodataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for RowIterator. + """ + import numpy + + job = _make_job() + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + max_results = 42 + geography_column = "g" + + df = job.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + max_results=max_results, + geography_column=geography_column, + ) + + wait_for_query.assert_called_once_with( + job, progress_bar_type, max_results=max_results + ) + row_iterator = wait_for_query.return_value + row_iterator.to_geodataframe.assert_called_once_with( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + assert df is row_iterator.to_geodataframe.return_value diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index b9cb56572..a9b0ae21f 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -36,6 +36,11 @@ # Mock out pyarrow when missing, because methods from pyarrow.types are # used in test parameterization. pyarrow = mock.Mock() +try: + import geopandas +except ImportError: # pragma: NO COVER + geopandas = None + import pytest from google import api_core @@ -584,6 +589,60 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_dtype(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = geopandas.GeoSeries([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = geopandas.GeoSeries([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = pandas.Series([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkb.dumps(wkt.loads("point(0 0)"))]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == list(series) + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( @@ -1158,6 +1217,28 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): assert "struct_field" in str(expected_warnings[0]) +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_dataframe_to_bq_schema_geography(module_under_test): + from shapely import wkt + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( + schema.SchemaField("name", "STRING"), + schema.SchemaField("geo1", "GEOGRAPHY"), + schema.SchemaField("geo2", "GEOGRAPHY"), + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): @@ -1554,3 +1635,22 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) + + +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_field_type_override(module_under_test): + # When loading pandas data, we may need to override the type + # decision based on data contents, because GEOGRAPHY data can be + # stored as either text or binary. + + assert ( + module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type + == pyarrow.string() + ) + + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(), + ).type + == pyarrow.binary() + ) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 50d573345..1ce930ee4 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -14,6 +14,7 @@ import datetime import logging +import re import time import types import unittest @@ -39,6 +40,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None + try: import pyarrow import pyarrow.types @@ -1842,6 +1848,27 @@ def test_to_dataframe_iterable(self): self.assertEqual(len(df), 0) # Verify the number of rows. self.assertEqual(len(df.columns), 0) + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_if_geopandas_is_none(self): + row_iterator = self._make_one() + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one() + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + class TestRowIterator(unittest.TestCase): def _class_under_test(self): @@ -1879,6 +1906,16 @@ def _make_one( client, api_request, path, schema, table=table, **kwargs ) + def _make_one_from_data(self, schema=(), rows=()): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField(*a) for a in schema] + rows = [{"f": [{"v": v} for v in row]} for row in rows] + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + return self._make_one(_mock_client(), api_request, path, schema) + def test_constructor(self): from google.cloud.bigquery.table import _item_to_row from google.cloud.bigquery.table import _rows_page_start @@ -3170,6 +3207,18 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.shapely", new=None) + def test_to_dataframe_error_if_shapely_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." + ), + ): + self._make_one_from_data().to_dataframe(geography_as_object=True) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -3927,6 +3976,199 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_dataframe_geography_as_object(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_dataframe( + create_bqstorage_client=False, geography_as_object=True, + ) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "object") + self.assertIsInstance(df.geog, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "float", "Polygon"] + ) + + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_error_if_geopandas_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + self._make_one_from_data().to_geodataframe() + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + self.assertEqual(df.geog.crs.srs, "EPSG:4326") + self.assertEqual(df.geog.crs.name, "WGS 84") + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_ambiguous_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_bad_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "The given geography column, xxx, doesn't name" + " a GEOGRAPHY column in the result." + ), + ): + row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="xxx" + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_no_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "STRING")), () + ) + with self.assertRaisesRegex( + TypeError, + re.escape( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_w_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)", "Point(1 1)"), + ("bar", None, "Point(2 2)"), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"), + ), + ) + df = row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="geog" + ) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog", "geog2"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertEqual(df.geog2.dtype.name, "object") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] + ) + + # Geog2 isn't a GeoSeries, but it contains geomentries: + self.assertIsInstance(df.geog2, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] + ) + # and can easily be converted to a GeoSeries + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") + def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): + """ + RowIterator.to_geodataframe just delegates to RowIterator.to_dataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for to_dataframe. + """ + import numpy + from shapely import wkt + + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("g", "GEOGRAPHY")) + ) + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + geography_column = "g" + + to_dataframe.return_value = pandas.DataFrame( + dict(name=["foo"], g=[wkt.loads("point(0 0)")],) + ) + + df = row_iterator.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + + to_dataframe.assert_called_once_with( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + self.assertEqual(list(df), ["name", "g"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.g.dtype.name, "geometry") + self.assertIsInstance(df.g, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"]) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From aa4876e226aa54a43d3e20d401675403a41d71f8 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 24 Aug 2021 09:33:04 -0600 Subject: [PATCH 39/89] test: Add test of datetime and time pandas load (#895) --- tests/system/test_pandas.py | 64 ++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 836f93210..93ce23481 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -279,8 +279,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): # Schema with all scalar types. - # TODO: Uploading DATETIME columns currently fails, thus that field type - # is temporarily removed from the test. # See: # https://github.com/googleapis/python-bigquery/issues/61 # https://issuetracker.google.com/issues/151765076 @@ -288,7 +286,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), - # bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("dt_col", "DATETIME"), bigquery.SchemaField("float_col", "FLOAT"), bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), @@ -313,14 +311,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ("bool_col", [True, None, False]), ("bytes_col", [b"abc", None, b"def"]), ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), ("float_col", [float("-inf"), float("nan"), float("inf")]), ( "geo_col", @@ -800,6 +798,50 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): assert len(dataframe.index) == 100 +def test_upload_time_and_datetime_56(bigquery_client, dataset_id): + df = pandas.DataFrame( + dict( + dt=[ + datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime( + 2020, + 1, + 8, + 8, + 0, + 0, + tzinfo=datetime.timezone(datetime.timedelta(hours=-7)), + ), + ], + t=[datetime.time(0, 0, 10, 100001), None], + ) + ) + table = f"{dataset_id}.test_upload_time_and_datetime" + bigquery_client.load_table_from_dataframe(df, table).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [ + datetime.datetime(2020, 1, 8, 8, 0, tzinfo=datetime.timezone.utc), + datetime.time(0, 0, 10, 100001), + ], + [datetime.datetime(2020, 1, 8, 15, 0, tzinfo=datetime.timezone.utc), None], + ] + + from google.cloud.bigquery import job, schema + + table = f"{dataset_id}.test_upload_time_and_datetime_dt" + config = job.LoadJobConfig( + schema=[schema.SchemaField("dt", "DATETIME"), schema.SchemaField("t", "TIME")] + ) + + bigquery_client.load_table_from_dataframe(df, table, job_config=config).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [datetime.datetime(2020, 1, 8, 8, 0), datetime.time(0, 0, 10, 100001)], + [datetime.datetime(2020, 1, 8, 15, 0), None], + ] + + def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): wkt = pytest.importorskip("shapely.wkt") bigquery_client.query( From f319d2596e7146ef355053a2a178d2e6a921e651 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 24 Aug 2021 15:36:00 -0600 Subject: [PATCH 40/89] chore: release 2.25.0 (#898) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Jim Fulton --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3e74fd0..7a5727ee7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) + + +### Features + +* Support using GeoPandas for GEOGRAPHY columns ([#848](https://www.github.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) + ### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 96f84438a..f882cac3a 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.1" +__version__ = "2.25.0" From fbbf72cd8d9629594b32ae981f7b6f4815fc3647 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 15:44:47 +0200 Subject: [PATCH 41/89] chore(deps): update dependency numpy to v1.21.2 (#899) * chore(deps): update dependency numpy to v1.21.2 * Update samples/geography/requirements.txt Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7a76b4033..82a45e3e8 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,8 @@ importlib-metadata==4.6.4 libcst==0.3.20 munch==2.5.0 mypy-extensions==0.4.3 -numpy==1.19.5 +numpy==1.19.5; python_version < "3.7" +numpy==1.21.2; python_version > "3.6" packaging==21.0 pandas==1.1.5 proto-plus==1.19.0 From 72a52f0253125a45e3162c5a32c0dbfe9e127466 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 16:28:49 +0200 Subject: [PATCH 42/89] chore(deps): update dependency google-cloud-core to v2 (#904) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 82a45e3e8..853306d71 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ google-api-core==1.31.2 google-auth==1.35.0 google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 -google-cloud-core==1.7.2 +google-cloud-core==2.0.0 google-crc32c==1.1.2 google-resumable-media==1.3.3 googleapis-common-protos==1.53.0 From 1cb3e55253e824e3a1da5201f6ec09065fb6b627 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 25 Aug 2021 16:52:09 +0200 Subject: [PATCH 43/89] fix: use REST API in cell magic when requested (#892) Fixes #876. The `--use_rest_api` option did not work as expected and this commit fixes it. **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- google/cloud/bigquery/magics/magics.py | 12 +++++++++--- tests/unit/test_magics.py | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 2b8c2928e..d368bbeaa 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -671,7 +671,9 @@ def _cell_magic(line, query): _handle_error(ex, args.destination_var) return - result = rows.to_dataframe(bqstorage_client=bqstorage_client) + result = rows.to_dataframe( + bqstorage_client=bqstorage_client, create_bqstorage_client=False, + ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return @@ -728,11 +730,15 @@ def _cell_magic(line, query): if max_results: result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) else: result = query_job.to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=bqstorage_client, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) if args.destination_var: diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index d030482cc..88c92a070 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -660,7 +660,9 @@ def warning_match(warning): assert client_info.user_agent == "ipython-" + IPython.__version__ query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm" + bqstorage_client=bqstorage_instance_mock, + create_bqstorage_client=mock.ANY, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -703,7 +705,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type="tqdm" + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -757,7 +761,12 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): client_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", "--max_results=5", sql) - query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.return_value.to_dataframe.assert_called_once_with( + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=mock.ANY, + ) @pytest.mark.usefixtures("ipython_interactive") @@ -929,7 +938,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip.run_cell_magic("bigquery", "--max_results=5", table_id) row_iterator_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock + bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, ) @@ -1246,7 +1255,9 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=magics.context.progress_bar_type, ) assert isinstance(return_value, pandas.DataFrame) From b508809c0f887575274309a463e763c56ddd017d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 25 Aug 2021 10:12:25 -0500 Subject: [PATCH 44/89] fix: populate default `timeout` and retry after client-side timeout (#896) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This addresses internal issue 195337762 where sometimes query job creation can take longer than expected and retrying the API call can be faster than waiting for the first query job request to fail. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #889 Towards https://github.com/googleapis/python-bigquery/issues/779 🦕 --- google/cloud/bigquery/client.py | 123 +++++++-------- google/cloud/bigquery/retry.py | 8 + noxfile.py | 4 - tests/unit/test_client.py | 242 +++++++++++++++++------------- tests/unit/test_create_dataset.py | 19 +-- tests/unit/test_delete_dataset.py | 7 +- tests/unit/test_list_datasets.py | 11 +- tests/unit/test_list_jobs.py | 19 +-- tests/unit/test_list_models.py | 12 +- tests/unit/test_list_projects.py | 11 +- tests/unit/test_list_routines.py | 12 +- tests/unit/test_list_tables.py | 16 +- tests/unit/test_magics.py | 5 +- tests/unit/test_retry.py | 12 ++ 14 files changed, 282 insertions(+), 219 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cbac82548..023346ffa 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -76,17 +76,24 @@ from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( + CopyJob, + CopyJobConfig, + ExtractJob, + ExtractJobConfig, + LoadJob, LoadJobConfig, QueryJob, QueryJobConfig, - CopyJobConfig, - ExtractJobConfig, ) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY +from google.cloud.bigquery.retry import ( + DEFAULT_JOB_RETRY, + DEFAULT_RETRY, + DEFAULT_TIMEOUT, +) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -245,7 +252,7 @@ def get_service_account_email( self, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> str: """Get the email address of the project's BigQuery service account @@ -292,7 +299,7 @@ def list_projects( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -358,7 +365,7 @@ def list_datasets( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -549,7 +556,7 @@ def create_dataset( dataset: Union[str, Dataset, DatasetReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """API call: create the dataset via a POST request. @@ -624,7 +631,7 @@ def create_routine( routine: Routine, exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Create a routine via a POST request. @@ -679,7 +686,7 @@ def create_table( table: Union[str, Table, TableReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """API call: create a table via a PUT request @@ -751,7 +758,7 @@ def get_dataset( self, dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -795,7 +802,7 @@ def get_iam_policy( table: Union[Table, TableReference], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -825,7 +832,7 @@ def set_iam_policy( policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -858,7 +865,7 @@ def test_iam_permissions( table: Union[Table, TableReference], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -883,7 +890,7 @@ def get_model( self, model_ref: Union[ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. @@ -926,7 +933,7 @@ def get_routine( self, routine_ref: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. @@ -970,7 +977,7 @@ def get_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Fetch the table referenced by ``table``. @@ -1012,7 +1019,7 @@ def update_dataset( dataset: Dataset, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Change some fields of a dataset. @@ -1082,7 +1089,7 @@ def update_model( model: Model, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Change some fields of a model. @@ -1146,7 +1153,7 @@ def update_routine( routine: Routine, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Change some fields of a routine. @@ -1220,7 +1227,7 @@ def update_table( table: Table, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Change some fields of a table. @@ -1286,7 +1293,7 @@ def list_models( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1363,7 +1370,7 @@ def list_routines( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1440,7 +1447,7 @@ def list_tables( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1515,7 +1522,7 @@ def delete_dataset( dataset: Union[Dataset, DatasetReference, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a dataset. @@ -1574,7 +1581,7 @@ def delete_model( self, model: Union[Model, ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a model @@ -1624,12 +1631,12 @@ def delete_model( def delete_job_metadata( self, - job_id, - project=None, - location=None, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, + job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob], + project: Optional[str] = None, + location: Optional[str] = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = DEFAULT_TIMEOUT, + not_found_ok: bool = False, ): """[Beta] Delete job metadata from job history. @@ -1637,26 +1644,20 @@ def delete_job_metadata( :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. Args: - job_id (Union[ \ - str, \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]): Job identifier. + job_id: Job or job identifier. Keyword Arguments: - project (Optional[str]): + project: ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): + location: Location where the job was run. Ignored if ``job_id`` is a job object. - retry (Optional[google.api_core.retry.Retry]): + retry: How to retry the RPC. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (Optional[bool]): + not_found_ok: Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the job. """ @@ -1697,7 +1698,7 @@ def delete_routine( self, routine: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a routine. @@ -1751,7 +1752,7 @@ def delete_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a table @@ -1804,7 +1805,7 @@ def _get_query_results( project: str = None, timeout_ms: int = None, location: str = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1893,7 +1894,7 @@ def create_job( self, job_config: dict, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: @@ -1990,7 +1991,7 @@ def get_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. @@ -2064,7 +2065,7 @@ def cancel_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. @@ -2141,7 +2142,7 @@ def list_jobs( all_users: bool = None, state_filter: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, page_size: int = None, @@ -2256,7 +2257,7 @@ def load_table_from_uri( project: str = None, job_config: LoadJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. @@ -2340,7 +2341,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2443,7 +2444,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2678,7 +2679,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2961,7 +2962,7 @@ def copy_table( project: str = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.CopyJob: """Copy one or more tables to another table. @@ -3064,7 +3065,7 @@ def extract_table( project: str = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, source_type: str = "Table", ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. @@ -3162,7 +3163,7 @@ def query( location: str = None, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3444,7 +3445,7 @@ def insert_rows_json( ignore_unknown_values: bool = None, template_suffix: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. @@ -3579,7 +3580,7 @@ def list_partitions( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[str]: """List the partitions in a table. @@ -3629,7 +3630,7 @@ def list_rows( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of the table. @@ -3741,7 +3742,7 @@ def _list_rows_from_query_results( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of a completed query. See diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index e9286055c..830582322 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -29,6 +29,7 @@ exceptions.BadGateway, requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, + requests.exceptions.Timeout, auth_exceptions.TransportError, ) @@ -59,6 +60,13 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ +DEFAULT_TIMEOUT = 5.0 * 60.0 +"""The default API timeout. + +This is the time to wait per request. To adjust the total wait time, set a +deadline on the retry object. +""" + job_retry_reasons = "rateLimitExceeded", "backendError" diff --git a/noxfile.py b/noxfile.py index 0dfe7bf93..9077924e9 100644 --- a/noxfile.py +++ b/noxfile.py @@ -160,10 +160,6 @@ def snippets(session): if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") - # Sanity check: Only run snippets tests if the environment variable is set. - if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): - session.skip("Credentials must be set via environment variable.") - constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ca0dca975..e9204f1de 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -56,6 +56,7 @@ import google.cloud._helpers from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT try: from google.cloud import bigquery_storage @@ -367,7 +368,7 @@ def test__get_query_results_miss_w_client_location(self): method="GET", path="/projects/PROJECT/queries/nothere", query_params={"maxResults": 0, "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test__get_query_results_hit(self): @@ -428,7 +429,9 @@ def test_get_service_account_email_w_alternate_project(self): service_account_email = client.get_service_account_email(project=project) final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None) + conn.api_request.assert_called_once_with( + method="GET", path=path, timeout=DEFAULT_TIMEOUT + ) self.assertEqual(service_account_email, email) def test_get_service_account_email_w_custom_retry(self): @@ -771,7 +774,7 @@ def test_create_routine_w_conflict(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") @@ -807,7 +810,7 @@ def test_span_status_is_set(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) def test_create_routine_w_conflict_exists_ok(self): @@ -843,11 +846,13 @@ def test_create_routine_w_conflict_exists_ok(self): self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + ), mock.call( method="GET", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -923,7 +928,7 @@ def test_create_table_w_custom_property(self): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") self.assertEqual(got.table_id, self.TABLE_ID) @@ -964,7 +969,7 @@ def test_create_table_w_encryption_configuration(self): "labels": {}, "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1000,7 +1005,7 @@ def test_create_table_w_day_partition_and_expire(self): "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(table.time_partitioning.expiration_ms, 100) @@ -1081,7 +1086,7 @@ def test_create_table_w_schema_and_query(self): "view": {"query": query, "useLegacySql": False}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1136,7 +1141,7 @@ def test_create_table_w_external(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1175,7 +1180,7 @@ def test_create_table_w_reference(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1209,7 +1214,7 @@ def test_create_table_w_fully_qualified_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1241,7 +1246,7 @@ def test_create_table_w_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1276,7 +1281,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_table_alreadyexists_w_exists_ok_true(self): @@ -1319,9 +1324,9 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) @@ -1394,7 +1399,7 @@ def test_get_model_w_string(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=None + method="GET", path="/%s" % path, timeout=DEFAULT_TIMEOUT ) self.assertEqual(got.model_id, self.MODEL_ID) @@ -1503,7 +1508,7 @@ def test_get_table_sets_user_agent(self): "User-Agent": expected_user_agent, }, data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIn("my-application/1.2.3", expected_user_agent) @@ -1846,7 +1851,7 @@ def test_update_dataset_w_custom_property(self): data={"newAlphaProperty": "unreleased property"}, path=path, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -2136,7 +2141,7 @@ def test_update_table_w_custom_property(self): path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2171,7 +2176,7 @@ def test_update_table_only_use_legacy_sql(self): path="/%s" % path, data={"view": {"useLegacySql": True}}, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2269,7 +2274,7 @@ def test_update_table_w_query(self): "schema": schema_resource, }, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_update_table_w_schema_None(self): @@ -2398,7 +2403,7 @@ def test_delete_job_metadata_not_found(self): method="DELETE", path="/projects/client-proj/jobs/my-job/delete", query_params={"location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_id(self): @@ -2412,7 +2417,7 @@ def test_delete_job_metadata_with_id(self): method="DELETE", path="/projects/param-proj/jobs/my-job/delete", query_params={"location": "param-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_resource(self): @@ -2437,7 +2442,7 @@ def test_delete_job_metadata_with_resource(self): method="DELETE", path="/projects/job-based-proj/jobs/query_job/delete", query_params={"location": "us-east1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_model(self): @@ -2492,7 +2497,9 @@ def test_delete_model_w_not_found_ok_false(self): with self.assertRaises(google.api_core.exceptions.NotFound): client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID)) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_model_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/models/{}".format( @@ -2513,7 +2520,9 @@ def test_delete_model_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_routine(self): from google.cloud.bigquery.routine import Routine @@ -2567,7 +2576,7 @@ def test_delete_routine_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2589,7 +2598,7 @@ def test_delete_routine_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_table(self): @@ -2653,7 +2662,9 @@ def test_delete_table_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_table_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/tables/{}".format( @@ -2675,7 +2686,9 @@ def test_delete_table_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def _create_job_helper(self, job_config): from google.cloud.bigquery import _helpers @@ -2697,7 +2710,7 @@ def _create_job_helper(self, job_config): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_job_load_config(self): @@ -2846,7 +2859,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self): method="POST", path="/projects/PROJECT/jobs", data=data_without_destination, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ) @@ -2886,7 +2899,7 @@ def test_get_job_miss_w_explict_project(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_miss_w_client_location(self): @@ -2904,7 +2917,7 @@ def test_get_job_miss_w_client_location(self): method="GET", path="/projects/client-proj/jobs/NONESUCH", query_params={"projection": "full", "location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_hit_w_timeout(self): @@ -2973,7 +2986,7 @@ def test_cancel_job_miss_w_explict_project(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_miss_w_client_location(self): @@ -2992,7 +3005,7 @@ def test_cancel_job_miss_w_client_location(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_hit(self): @@ -3028,7 +3041,7 @@ def test_cancel_job_hit(self): method="POST", path="/projects/job-based-proj/jobs/query_job/cancel", query_params={"projection": "full", "location": "asia-northeast1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_w_timeout(self): @@ -3154,7 +3167,7 @@ def test_load_table_from_uri_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_client_location(self): @@ -3198,7 +3211,7 @@ def test_load_table_from_uri_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_invalid_job_config(self): @@ -3486,7 +3499,7 @@ def test_copy_table_w_multiple_sources(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=expected_resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -3548,7 +3561,7 @@ def test_copy_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_client_location(self): @@ -3598,7 +3611,7 @@ def test_copy_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_source_strings(self): @@ -3691,7 +3704,7 @@ def test_copy_table_w_valid_job_config(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job._configuration, CopyJobConfig) @@ -3797,7 +3810,7 @@ def test_extract_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_w_client_location(self): @@ -3841,7 +3854,7 @@ def test_extract_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_generated_job_id(self): @@ -3884,7 +3897,7 @@ def test_extract_table_generated_job_id(self): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsInstance(req["data"]["jobReference"]["jobId"], str) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -3929,7 +3942,7 @@ def test_extract_table_w_destination_uris(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -4099,7 +4112,7 @@ def test_query_defaults(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4152,7 +4165,7 @@ def test_query_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_explicit_job_config(self): @@ -4208,7 +4221,10 @@ def test_query_w_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4252,7 +4268,10 @@ def test_query_preserving_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4304,7 +4323,10 @@ def test_query_preserving_explicit_default_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original default config object should not have been modified @@ -4389,7 +4411,10 @@ def test_query_w_explicit_job_config_override(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_client_default_config_no_incoming(self): @@ -4430,7 +4455,10 @@ def test_query_w_client_default_config_no_incoming(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_invalid_default_job_config(self): @@ -4475,7 +4503,7 @@ def test_query_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_detect_location(self): @@ -4546,7 +4574,7 @@ def test_query_w_udf_resources(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4602,7 +4630,7 @@ def test_query_w_query_parameters(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertEqual(sent["jobReference"]["jobId"], JOB) sent_config = sent["configuration"]["query"] @@ -4794,7 +4822,7 @@ def _row_data(row): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/%s" % PATH) self.assertEqual(req["data"], SENT) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) def test_insert_rows_w_list_of_dictionaries(self): import datetime @@ -4862,7 +4890,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_list_of_Rows(self): @@ -4907,7 +4935,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): @@ -4984,7 +5012,7 @@ def _row_data(row): errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0] ) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_repeated_fields(self): @@ -5085,7 +5113,7 @@ def test_insert_rows_w_repeated_fields(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None, + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_record_schema(self): @@ -5151,7 +5179,7 @@ def test_insert_rows_w_record_schema(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_explicit_none_insert_ids(self): @@ -5185,7 +5213,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/{}".format(PATH), data=SENT, timeout=None, + method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_errors(self): @@ -5269,7 +5297,7 @@ def test_insert_rows_w_numeric(self): project, ds_id, table_id ), data=sent, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -5461,7 +5489,10 @@ def test_insert_rows_from_dataframe_many_columns(self): ] } expected_call = mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) actual_calls = conn.api_request.call_args_list @@ -5514,7 +5545,10 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): actual_calls = conn.api_request.call_args_list assert len(actual_calls) == 1 assert actual_calls[0] == mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_default_behavior(self): @@ -5594,7 +5628,7 @@ def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): @@ -5624,7 +5658,7 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_iterator_row_ids(self): @@ -5651,7 +5685,7 @@ def test_insert_rows_json_with_iterator_row_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_non_iterable_row_ids(self): @@ -5704,7 +5738,7 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_none_insert_ids_sequence(self): @@ -5743,7 +5777,7 @@ def test_insert_rows_json_w_none_insert_ids_sequence(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_wrong_arg(self): @@ -5938,7 +5972,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), mock.call( method="GET", @@ -5948,7 +5982,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -6099,7 +6133,7 @@ def test_list_rows_repeated_fields(self): "selectedFields": "color,struct", "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_w_record_schema(self): @@ -6169,7 +6203,7 @@ def test_list_rows_w_record_schema(self): method="GET", path="/%s" % PATH, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_with_missing_schema(self): @@ -6224,7 +6258,7 @@ def test_list_rows_with_missing_schema(self): row_iter = client.list_rows(table) conn.api_request.assert_called_once_with( - method="GET", path=table_path, timeout=None + method="GET", path=table_path, timeout=DEFAULT_TIMEOUT ) conn.api_request.reset_mock() self.assertEqual(row_iter.total_rows, 2, msg=repr(table)) @@ -6234,7 +6268,7 @@ def test_list_rows_with_missing_schema(self): method="GET", path=tabledata_path, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) @@ -6407,7 +6441,7 @@ def test_load_table_from_file_resumable(self): file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6440,7 +6474,7 @@ def test_load_table_from_file_w_explicit_project(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6474,7 +6508,7 @@ def test_load_table_from_file_w_client_location(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6536,7 +6570,7 @@ def test_load_table_from_file_resumable_metadata(self): file_obj, expected_config, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6565,7 +6599,7 @@ def test_load_table_from_file_multipart(self): self.EXPECTED_CONFIGURATION, file_obj_size, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.PROJECT, ) @@ -6590,7 +6624,7 @@ def test_load_table_from_file_with_retries(self): file_obj, self.EXPECTED_CONFIGURATION, num_retries, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6627,7 +6661,7 @@ def test_load_table_from_file_with_readable_gzip(self): gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6750,7 +6784,7 @@ def test_load_table_from_dataframe(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6808,7 +6842,7 @@ def test_load_table_from_dataframe_w_client_location(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6862,7 +6896,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6918,7 +6952,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7012,7 +7046,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7073,7 +7107,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7120,7 +7154,7 @@ def test_load_table_from_dataframe_unknown_table(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf( @@ -7162,7 +7196,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7210,7 +7244,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7272,7 +7306,7 @@ def test_load_table_from_dataframe_struct_fields(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7347,7 +7381,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7442,7 +7476,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) assert warned # there should be at least one warning @@ -7592,7 +7626,7 @@ def test_load_table_from_dataframe_w_nulls(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7658,7 +7692,7 @@ def test_load_table_from_dataframe_with_csv_source_format(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7696,7 +7730,7 @@ def test_load_table_from_json_basic_use(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7749,7 +7783,7 @@ def test_load_table_from_json_non_default_args(self): location="EU", project="project-x", job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7809,7 +7843,7 @@ def test_load_table_from_json_unicode_emoji_data_case(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_data_file = load_table_from_file.mock_calls[0][1][1] diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index d07aaed4f..67b21225d 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -15,6 +15,7 @@ from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import mock import pytest @@ -111,7 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "labels": LABELS, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -143,7 +144,7 @@ def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -176,7 +177,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LO "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -211,7 +212,7 @@ def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOC "labels": {}, "location": OTHER_LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -241,7 +242,7 @@ def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -271,7 +272,7 @@ def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -306,7 +307,7 @@ def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -356,8 +357,8 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py index 3a65e031c..b48beb147 100644 --- a/tests/unit/test_delete_dataset.py +++ b/tests/unit/test_delete_dataset.py @@ -14,6 +14,7 @@ from .helpers import make_connection, make_client, dataset_polymorphic import google.api_core.exceptions +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import pytest @@ -40,7 +41,7 @@ def test_delete_dataset_delete_contents( method="DELETE", path="/%s" % PATH, query_params={"deleteContents": "true"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -61,7 +62,7 @@ def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): client.delete_dataset(DS_ID) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -74,5 +75,5 @@ def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): ) client.delete_dataset(DS_ID, not_found_ok=True) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py index 7793a7ba6..6f0b55c5e 100644 --- a/tests/unit/test_list_datasets.py +++ b/tests/unit/test_list_datasets.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -65,7 +66,7 @@ def test_list_datasets_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -120,5 +121,5 @@ def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): "maxResults": 3, "pageToken": TOKEN, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py index f348be724..1fb40d446 100644 --- a/tests/unit/test_list_jobs.py +++ b/tests/unit/test_list_jobs.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -136,7 +137,7 @@ def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): method="GET", path="/%s" % PATH, query_params=dict({"projection": "full"}, **query), - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -185,7 +186,7 @@ def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -220,7 +221,7 @@ def test_list_jobs_explicit_missing(client, PROJECT): "allUsers": True, "stateFilter": "done", }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -233,7 +234,7 @@ def test_list_jobs_w_project(client, PROJECT): method="GET", path="/projects/other-project/jobs", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -269,7 +270,7 @@ def test_list_jobs_w_time_filter(client, PROJECT): "minCreationTime": "1", "maxCreationTime": str(end_time_millis), }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -286,6 +287,6 @@ def test_list_jobs_w_parent_job_filter(client, PROJECT): method="GET", path="/projects/%s/jobs" % PROJECT, query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) conn.api_request.reset_mock() diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 4ede9a7dd..b14852338 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) @@ -82,7 +84,7 @@ def test_list_models_defaults( assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py index a88540dd5..190612b44 100644 --- a/tests/unit/test_list_projects.py +++ b/tests/unit/test_list_projects.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -66,7 +67,7 @@ def test_list_projects_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params=query, timeout=None + method="GET", path="/projects", query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -115,5 +116,5 @@ def test_list_projects_explicit_response_missing_projects_key(client): method="GET", path="/projects", query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 069966542..80e62d6bd 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_routines_empty_w_timeout(client): conn = client._connection = make_connection({}) @@ -85,7 +87,7 @@ def test_list_routines_defaults( assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params=query, timeout=None + method="GET", path=path, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index 45d15bed3..8360f6605 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -1,21 +1,23 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic -import google.cloud.bigquery.dataset import pytest +import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + @dataset_polymorphic def test_list_tables_empty_w_timeout( @@ -89,7 +91,7 @@ def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -150,7 +152,7 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 88c92a070..36cbf4993 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -32,6 +32,7 @@ from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery.magics import magics +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from tests.unit.helpers import make_connection from test_utils.imports import maybe_fail_import @@ -185,7 +186,7 @@ def test_context_with_default_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", @@ -249,7 +250,7 @@ def test_context_with_custom_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index c7c25e036..e0a992f78 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -55,6 +55,18 @@ def test_w_unstructured_requests_chunked_encoding_error(self): exc = requests.exceptions.ChunkedEncodingError() self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_requests_connecttimeout(self): + exc = requests.exceptions.ConnectTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_readtimeout(self): + exc = requests.exceptions.ReadTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_timeout(self): + exc = requests.exceptions.Timeout() + self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): from google.auth.exceptions import TransportError From 4fc7c693283e94b44d388f8c7991a1ad78fcde45 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 17:34:24 +0200 Subject: [PATCH 45/89] chore(deps): update dependency google-cloud-bigquery to v2.25.0 (#907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.24.1` -> `==2.25.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/compatibility-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/confidence-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2250-httpswwwgithubcomgoogleapispython-bigquerycomparev2241v2250-2021-08-24) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) ##### Features - Support using GeoPandas for GEOGRAPHY columns ([#​848](https://www.togithub.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) ##### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) ##### Bug Fixes - remove pytz dependency and require pyarrow>=3.0.0 ([#​875](https://www.togithub.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 853306d71..d810e1241 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==2.5.0 geopandas==0.9.0 google-api-core==1.31.2 google-auth==1.35.0 -google-cloud-bigquery==2.24.1 +google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 484e10516..07760b666 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.24.1 +google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 121c2c2005225fae8a89ed231026e7ac64625532 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 17:56:26 +0200 Subject: [PATCH 46/89] chore(deps): update dependency pandas to v1.3.2 (#900) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [pandas](https://pandas.pydata.org) ([source](https://togithub.com/pandas-dev/pandas)) | `==1.1.5` -> `==1.3.2` | [![age](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/compatibility-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/confidence-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
pandas-dev/pandas ### [`v1.3.2`](https://togithub.com/pandas-dev/pandas/releases/v1.3.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.1...v1.3.2) This is a patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.2/whatsnew/v1.3.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.1`](https://togithub.com/pandas-dev/pandas/releases/v1.3.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.0...v1.3.1) This is the first patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.1/whatsnew/v1.3.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.0`](https://togithub.com/pandas-dev/pandas/releases/v1.3.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.5...v1.3.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.0/whatsnew/v1.3.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.5`](https://togithub.com/pandas-dev/pandas/releases/v1.2.5) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.4...v1.2.5) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.5/whatsnew/v1.2.5.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.4`](https://togithub.com/pandas-dev/pandas/releases/v1.2.4) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.3...v1.2.4) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.4/whatsnew/v1.2.4.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.3`](https://togithub.com/pandas-dev/pandas/releases/v1.2.3) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.2...v1.2.3) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.3/whatsnew/v1.2.3.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.2`](https://togithub.com/pandas-dev/pandas/releases/v1.2.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.1...v1.2.2) This is a patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.2/whatsnew/v1.2.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.1`](https://togithub.com/pandas-dev/pandas/releases/v1.2.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.0...v1.2.1) This is the first patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.1/whatsnew/v1.2.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.0`](https://togithub.com/pandas-dev/pandas/releases/v1.2.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.1.5...v1.2.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.0/whatsnew/v1.2.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues).
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 3 ++- samples/snippets/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d810e1241..b5fe247cb 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -27,7 +27,8 @@ mypy-extensions==0.4.3 numpy==1.19.5; python_version < "3.7" numpy==1.21.2; python_version > "3.6" packaging==21.0 -pandas==1.1.5 +pandas==1.1.5; python_version < '3.7' +pandas==1.3.2; python_version >= '3.7' proto-plus==1.19.0 protobuf==3.17.3 pyarrow==5.0.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 07760b666..d75c747fb 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -7,6 +7,6 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.2.0; python_version >= '3.7' +pandas==1.3.2; python_version >= '3.7' pyarrow==5.0.0 pytz==2021.1 From a3a85dac90211599b2260da0d514d19647085575 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 25 Aug 2021 11:38:24 -0500 Subject: [PATCH 47/89] chore: group all renovate PRs together (#911) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This excludes `renovate.json` from templated updates. If this works well, we can update the core templates (perhaps with a configuration option to `py_library`). Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- owlbot.py | 28 +++++++++++++++++----------- renovate.json | 2 +- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/owlbot.py b/owlbot.py index ea9904cdb..8664b658a 100644 --- a/owlbot.py +++ b/owlbot.py @@ -63,7 +63,7 @@ s.replace( library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", r"type_ ", - "type " + "type ", ) s.move( @@ -78,8 +78,8 @@ "noxfile.py", "setup.py", f"scripts/fixup_bigquery_{library.name}_keywords.py", - f"google/cloud/bigquery/__init__.py", - f"google/cloud/bigquery/py.typed", + "google/cloud/bigquery/__init__.py", + "google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, # thus there's no point in generating it and its tests. f"google/cloud/bigquery_{library.name}/services/**", @@ -98,9 +98,9 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": 'http://pandas.pydata.org/pandas-docs/dev', + "pandas": "http://pandas.pydata.org/pandas-docs/dev", "geopandas": "https://geopandas.org/", - } + }, ) # BigQuery has a custom multiprocessing note @@ -113,7 +113,11 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - ] + # Group all renovate PRs together. If this works well, remove this and + # update the shared templates (possibly with configuration option to + # py_library.) + "renovate.json", + ], ) # ---------------------------------------------------------------------------- @@ -125,14 +129,14 @@ s.replace( "docs/conf.py", r'\{"members": True\}', - '{"members": True, "inherited-members": True}' + '{"members": True, "inherited-members": True}', ) # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", r'"samples/snippets/README\.rst",', - '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', + '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) # ---------------------------------------------------------------------------- @@ -140,13 +144,14 @@ # ---------------------------------------------------------------------------- # Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") +s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") # Add pytype config to setup.cfg s.replace( "setup.cfg", r"universal = 1", - textwrap.dedent(""" \g<0> + textwrap.dedent( + """ \\g<0> [pytype] python_version = 3.8 @@ -160,7 +165,8 @@ # There's some issue with finding some pyi files, thus disabling. # The issue https://github.com/google/pytype/issues/150 is closed, but the # error still occurs for some reason. - pyi-error""") + pyi-error""" + ), ) s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/renovate.json b/renovate.json index c04895563..713c60bb4 100644 --- a/renovate.json +++ b/renovate.json @@ -1,6 +1,6 @@ { "extends": [ - "config:base", ":preserveSemverRanges" + "config:base", "group:all", ":preserveSemverRanges" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { From 109a5365d7c1e388a49809e653a51c1d77ddb0a2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 25 Aug 2021 17:34:14 +0000 Subject: [PATCH 48/89] chore: release 2.25.1 (#912) :robot: I have created a release \*beep\* \*boop\* --- ### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) ### Bug Fixes * populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) * use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 8 ++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a5727ee7..8a21df6fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) + + +### Bug Fixes + +* populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) +* use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) + ## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index f882cac3a..21cbec9fe 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.25.0" +__version__ = "2.25.1" From e2d12b795ef2dc51b0ee36f1b3000edb1e64ce05 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Aug 2021 12:03:02 +0200 Subject: [PATCH 49/89] docs: update docstring for bigquery_create_routine sample (#883) (#917) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed language issues. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 Co-authored-by: pallabiwrites <87546424+pallabiwrites@users.noreply.github.com> --- samples/create_routine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/create_routine.py b/samples/create_routine.py index 012c7927a..1cb4a80b4 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -22,7 +22,7 @@ def create_routine(routine_id): # Construct a BigQuery client object. client = bigquery.Client() - # TODO(developer): Choose a fully-qualified ID for the routine. + # TODO(developer): Choose a fully qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" routine = bigquery.Routine( From 679e8edc27f24a312243f5587e1c71798902fce8 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 26 Aug 2021 12:39:23 +0200 Subject: [PATCH 50/89] chore: migrate default branch to main (#910) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: migrate default branch to main * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Add owlbot replacements to persist changes * Manually apply new replacements from owlbot.py * Move temp replacement rules after s.move() Co-authored-by: Owl Bot --- .github/sync-repo-settings.yaml | 8 +++--- .kokoro/build.sh | 2 +- .kokoro/test-samples-impl.sh | 2 +- CONTRIBUTING.rst | 12 ++++----- docs/conf.py | 10 +++---- owlbot.py | 48 +++++++++++++++++++++++++++++++++ 6 files changed, 65 insertions(+), 17 deletions(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 8634a3043..6572e5982 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -1,9 +1,9 @@ -# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings -# Rules for master branch protection +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings +# Rules for main branch protection branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. -# Defaults to `master` -- pattern: master +# Defaults to `main` +- pattern: main requiresCodeOwnerReviews: true requiresStrictStatusChecks: true requiredStatusCheckContexts: diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 302cc1e1a..4d6a1d0f6 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -41,7 +41,7 @@ python3 -m pip install --upgrade --quiet nox python3 -m nox --version # If this is a continuous build, send the test log to the FlakyBot. -# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. +# See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then cleanup() { chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 311a8d54b..8a324c9c7 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -80,7 +80,7 @@ for file in samples/**/requirements.txt; do EXIT=$? # If this is a periodic build, send the test log to the FlakyBot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. + # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 2faf5aed3..5b87973dd 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -50,9 +50,9 @@ You'll have to create a development environment using a Git checkout: # Configure remotes such that you can pull changes from the googleapis/python-bigquery # repository into your local repository. $ git remote add upstream git@github.com:googleapis/python-bigquery.git - # fetch and merge changes from upstream into master + # fetch and merge changes from upstream into main $ git fetch upstream - $ git merge upstream/master + $ git merge upstream/main Now your local repo is set up such that you will push changes to your GitHub repo, from which you can submit a pull request. @@ -110,12 +110,12 @@ Coding Style variables:: export GOOGLE_CLOUD_TESTING_REMOTE="upstream" - export GOOGLE_CLOUD_TESTING_BRANCH="master" + export GOOGLE_CLOUD_TESTING_BRANCH="main" By doing this, you are specifying the location of the most up-to-date version of ``python-bigquery``. The the suggested remote name ``upstream`` should point to the official ``googleapis`` checkout and the - the branch should be the main branch on that remote (``master``). + the branch should be the main branch on that remote (``main``). - This repository contains configuration for the `pre-commit `__ tool, which automates checking @@ -209,7 +209,7 @@ The `description on PyPI`_ for the project comes directly from the ``README``. Due to the reStructuredText (``rst``) parser used by PyPI, relative links which will work on GitHub (e.g. ``CONTRIBUTING.rst`` instead of -``https://github.com/googleapis/python-bigquery/blob/master/CONTRIBUTING.rst``) +``https://github.com/googleapis/python-bigquery/blob/main/CONTRIBUTING.rst``) may cause problems creating links or rendering the description. .. _description on PyPI: https://pypi.org/project/google-cloud-bigquery @@ -234,7 +234,7 @@ We support: Supported versions can be found in our ``noxfile.py`` `config`_. -.. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py +.. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py We also explicitly decided to support Python 3 beginning with version 3.6. diff --git a/docs/conf.py b/docs/conf.py index 59a2d8fb3..07e5d8c30 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -76,8 +76,8 @@ # The encoding of source files. # source_encoding = 'utf-8-sig' -# The master toctree document. -master_doc = "index" +# The root toctree document. +root_doc = "index" # General information about the project. project = "google-cloud-bigquery" @@ -281,7 +281,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ ( - master_doc, + root_doc, "google-cloud-bigquery.tex", "google-cloud-bigquery Documentation", author, @@ -316,7 +316,7 @@ # (source start file, name, description, authors, manual section). man_pages = [ ( - master_doc, + root_doc, "google-cloud-bigquery", "google-cloud-bigquery Documentation", [author], @@ -335,7 +335,7 @@ # dir menu entry, description, category) texinfo_documents = [ ( - master_doc, + root_doc, "google-cloud-bigquery", "google-cloud-bigquery Documentation", author, diff --git a/owlbot.py b/owlbot.py index 8664b658a..09aa8ca6f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -169,4 +169,52 @@ ), ) + +# Remove the replacements below once +# https://github.com/googleapis/synthtool/pull/1188 is merged + +# Update googleapis/repo-automation-bots repo to main in .kokoro/*.sh files +s.replace( + ".kokoro/*.sh", + "repo-automation-bots/tree/master", + "repo-automation-bots/tree/main", +) + +# Customize CONTRIBUTING.rst to replace master with main +s.replace( + "CONTRIBUTING.rst", + "fetch and merge changes from upstream into master", + "fetch and merge changes from upstream into main", +) + +s.replace( + "CONTRIBUTING.rst", "git merge upstream/master", "git merge upstream/main", +) + +s.replace( + "CONTRIBUTING.rst", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"master\"""", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"main\"""", +) + +s.replace( + "CONTRIBUTING.rst", r"remote \(``master``\)", "remote (``main``)", +) + +s.replace( + "CONTRIBUTING.rst", "blob/master/CONTRIBUTING.rst", "blob/main/CONTRIBUTING.rst", +) + +s.replace( + "CONTRIBUTING.rst", "blob/master/noxfile.py", "blob/main/noxfile.py", +) + +s.replace( + "docs/conf.py", "master_doc", "root_doc", +) + +s.replace( + "docs/conf.py", "# The master toctree document.", "# The root toctree document.", +) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) From 4f229cb973dff6a71b322806a85e950a57aa6582 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 27 Aug 2021 16:20:50 -0600 Subject: [PATCH 51/89] chore: invalid docstrings broke docfx (#924) --- google/cloud/bigquery/job/copy_.py | 2 +- google/cloud/bigquery/job/extract.py | 2 +- google/cloud/bigquery/job/load.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py index c6ee98944..f0dd3d668 100644 --- a/google/cloud/bigquery/job/copy_.py +++ b/google/cloud/bigquery/job/copy_.py @@ -240,7 +240,7 @@ def to_api_repr(self): def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py index 3373bcdef..52aa036c9 100644 --- a/google/cloud/bigquery/job/extract.py +++ b/google/cloud/bigquery/job/extract.py @@ -244,7 +244,7 @@ def to_api_repr(self): def from_api_repr(cls, resource: dict, client) -> "ExtractJob": """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index aee055c1c..b12c3e621 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -800,7 +800,7 @@ def to_api_repr(self): def from_api_repr(cls, resource: dict, client) -> "LoadJob": """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. From 84489223e5c57837e54978caec930115c87bd986 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 30 Aug 2021 21:37:44 +0200 Subject: [PATCH 52/89] chore(deps): update all dependencies (#914) * chore(deps): update all dependencies * Python version modifiers for pyproj Co-authored-by: Tim Swast --- samples/geography/requirements.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index b5fe247cb..aaf1bbfd1 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -10,13 +10,13 @@ dataclasses==0.6; python_version < '3.7' Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 -google-api-core==1.31.2 -google-auth==1.35.0 +google-api-core==2.0.0 +google-auth==2.0.1 google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.2 -google-resumable-media==1.3.3 +google-resumable-media==2.0.0 googleapis-common-protos==1.53.0 grpcio==1.39.0 idna==3.2 @@ -36,7 +36,8 @@ pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.20 pyparsing==2.4.7 -pyproj==3.0.1 +pyproj==3.0.1; python_version < "3.7" +pyproj==3.1.0; python_version > "3.6" python-dateutil==2.8.2 pytz==2021.1 PyYAML==5.4.1 From 656d2fa6f870573a21235c83463752a2d084caba Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 31 Aug 2021 10:13:38 +0200 Subject: [PATCH 53/89] fix: error inserting DataFrame with REPEATED field (#925) Co-authored-by: Tim Swast --- google/cloud/bigquery/_pandas_helpers.py | 8 ++- tests/unit/test__pandas_helpers.py | 63 ++++++++++++++++++------ 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index ab58b1729..29139ae09 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -844,7 +844,13 @@ def dataframe_to_json_generator(dataframe): output = {} for column, value in zip(dataframe.columns, row): # Omit NaN values. - if pandas.isna(value): + is_nan = pandas.isna(value) + + # isna() can also return an array-like of bools, but the latter's boolean + # value is ambiguous, hence an extra check. An array-like value is *not* + # considered a NaN, however. + if isinstance(is_nan, bool) and is_nan: continue output[column] = value + yield output diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index a9b0ae21f..f0975ef65 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -821,6 +821,41 @@ def test_dataframe_to_json_generator(module_under_test): assert list(rows) == expected +def test_dataframe_to_json_generator_repeated_field(module_under_test): + pytest.importorskip( + "pandas", + minversion=str(PANDAS_MINIUM_VERSION), + reason=( + f"Requires `pandas version >= {PANDAS_MINIUM_VERSION}` " + "which introduces pandas.NA" + ), + ) + + df_data = [ + collections.OrderedDict( + [("repeated_col", [pandas.NA, 2, None, 4]), ("not_repeated_col", "first")] + ), + collections.OrderedDict( + [ + ("repeated_col", ["a", "b", mock.sentinel.foo, "d"]), + ("not_repeated_col", "second"), + ] + ), + ] + dataframe = pandas.DataFrame(df_data) + + rows = module_under_test.dataframe_to_json_generator(dataframe) + + expected = [ + {"repeated_col": [pandas.NA, 2, None, 4], "not_repeated_col": "first"}, + { + "repeated_col": ["a", "b", mock.sentinel.foo, "d"], + "not_repeated_col": "second", + }, + ] + assert list(rows) == expected + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_columns_and_indexes_with_named_index(module_under_test): df_data = collections.OrderedDict( @@ -882,7 +917,7 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test): def test_dataframe_to_bq_schema_dict_sequence(module_under_test): df_data = collections.OrderedDict( [ - ("str_column", [u"hello", u"world"]), + ("str_column", ["hello", "world"]), ("int_column", [42, 8]), ("bool_column", [True, False]), ] @@ -1070,7 +1105,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): ] dataframe = pandas.DataFrame( - {"field01": [u"hello", u"world"], "field02": [True, False]} + {"field01": ["hello", "world"], "field02": [True, False]} ) arrow_table = module_under_test.dataframe_to_arrow(dataframe, dict_schema) @@ -1139,8 +1174,8 @@ def test_dataframe_to_parquet_compression_method(module_under_test): def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ - {"id": 10, "status": u"FOO", "execution_date": datetime.date(2019, 5, 10)}, - {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, + {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, + {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, ] ) @@ -1167,8 +1202,8 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ - {"id": 10, "status": u"FOO", "created_at": datetime.date(2019, 5, 10)}, - {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, + {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, + {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, ] ) @@ -1197,8 +1232,8 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ - {"struct_field": {"one": 2}, "status": u"FOO"}, - {"struct_field": {"two": u"222"}, "status": u"BAR"}, + {"struct_field": {"one": 2}, "status": "FOO"}, + {"struct_field": {"two": "222"}, "status": "BAR"}, ] ) @@ -1252,7 +1287,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): "timestamp_field": datetime.datetime(2005, 5, 31, 14, 25, 55), "date_field": datetime.date(2005, 5, 31), "bytes_field": b"some bytes", - "string_field": u"some characters", + "string_field": "some characters", "numeric_field": decimal.Decimal("123.456"), "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), } @@ -1312,13 +1347,13 @@ def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( data=[ { - "status": u"FOO", + "status": "FOO", "struct_field": {"one": 1}, - "struct_field_2": {"foo": u"123"}, + "struct_field_2": {"foo": "123"}, }, { - "status": u"BAR", - "struct_field": {"two": u"111"}, + "status": "BAR", + "struct_field": {"two": "111"}, "struct_field_2": {"bar": 27}, }, ] @@ -1351,7 +1386,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): ] dataframe = pandas.DataFrame( - {"field01": [u"hello", u"world"], "field02": [True, False]} + {"field01": ["hello", "world"], "field02": [True, False]} ) write_table_patch = mock.patch.object( From bd417d31116a6f00a1e62f68bb36d142579ec3e7 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 31 Aug 2021 16:46:24 +0200 Subject: [PATCH 54/89] chore(deps): update all dependencies (#926) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.25.0` -> `==2.25.1` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/compatibility-slim/2.25.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/confidence-slim/2.25.0)](https://docs.renovatebot.com/merge-confidence/) | | [google-cloud-testutils](https://togithub.com/googleapis/python-test-utils) | `==1.0.0` -> `==1.1.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/compatibility-slim/1.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/confidence-slim/1.0.0)](https://docs.renovatebot.com/merge-confidence/) | | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.2` -> `==1.1.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/compatibility-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/confidence-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | | [importlib-metadata](https://togithub.com/python/importlib_metadata) | `==4.6.4` -> `==4.8.1` | [![age](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/compatibility-slim/4.6.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/confidence-slim/4.6.4)](https://docs.renovatebot.com/merge-confidence/) | | [pytest](https://docs.pytest.org/en/latest/) ([source](https://togithub.com/pytest-dev/pytest), [changelog](https://docs.pytest.org/en/stable/changelog.html)) | `==6.2.4` -> `==6.2.5` | [![age](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/compatibility-slim/6.2.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/confidence-slim/6.2.4)](https://docs.renovatebot.com/merge-confidence/) | | [typing-extensions](https://togithub.com/python/typing) | `==3.10.0.0` -> `==3.10.0.2` | [![age](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/compatibility-slim/3.10.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/confidence-slim/3.10.0.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.1`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2251-httpswwwgithubcomgoogleapispython-bigquerycomparev2250v2251-2021-08-25) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1)
googleapis/python-test-utils ### [`v1.1.0`](https://togithub.com/googleapis/python-test-utils/compare/v1.0.0...v1.1.0) [Compare Source](https://togithub.com/googleapis/python-test-utils/compare/v1.0.0...v1.1.0)
googleapis/python-crc32c ### [`v1.1.3`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​113-httpswwwgithubcomgoogleapispython-crc32ccomparev112v113-2021-08-30) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.2...v1.1.3)
python/importlib_metadata ### [`v4.8.1`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v481) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.8.0...v4.8.1) \====== - [#​348](https://togithub.com/python/importlib_metadata/issues/348): Restored support for `EntryPoint` access by item, deprecating support in the process. Users are advised to use direct member access instead of item-based access:: - ep\[0] -> ep.name - ep\[1] -> ep.value - ep\[2] -> ep.group - ep\[:] -> ep.name, ep.value, ep.group ### [`v4.8.0`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v480) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.7.1...v4.8.0) \====== - [#​337](https://togithub.com/python/importlib_metadata/issues/337): Rewrote `EntryPoint` as a simple class, still immutable and still with the attributes, but without any expectation for `namedtuple` functionality such as `_asdict`. ### [`v4.7.1`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v471) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.7.0...v4.7.1) \====== - [#​344](https://togithub.com/python/importlib_metadata/issues/344): Fixed regression in `packages_distributions` when neither top-level.txt nor a files manifest is present. ### [`v4.7.0`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v470) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.6.4...v4.7.0) \====== - [#​330](https://togithub.com/python/importlib_metadata/issues/330): In `packages_distributions`, now infer top-level names from `.files()` when a `top-level.txt` (Setuptools-specific metadata) is not present.
pytest-dev/pytest ### [`v6.2.5`](https://togithub.com/pytest-dev/pytest/compare/6.2.4...6.2.5) [Compare Source](https://togithub.com/pytest-dev/pytest/compare/6.2.4...6.2.5)
python/typing ### [`v3.10.0.2`](https://togithub.com/python/typing/compare/3.10.0.1...3.10.0.2) [Compare Source](https://togithub.com/python/typing/compare/3.10.0.1...3.10.0.2) ### [`v3.10.0.1`](https://togithub.com/python/typing/compare/3.10.0.0...3.10.0.1) [Compare Source](https://togithub.com/python/typing/compare/3.10.0.0...3.10.0.1)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 8 ++++---- samples/snippets/requirements-test.txt | 4 ++-- samples/snippets/requirements.txt | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index b0cf76724..5d836a5c5 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.4 +pytest==6.2.5 mock==4.0.3 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index aaf1bbfd1..c325ee5e4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,15 +12,15 @@ geojson==2.5.0 geopandas==0.9.0 google-api-core==2.0.0 google-auth==2.0.1 -google-cloud-bigquery==2.25.0 +google-cloud-bigquery==2.25.1 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 -google-crc32c==1.1.2 +google-crc32c==1.1.3 google-resumable-media==2.0.0 googleapis-common-protos==1.53.0 grpcio==1.39.0 idna==3.2 -importlib-metadata==4.6.4 +importlib-metadata==4.8.1 libcst==0.3.20 munch==2.5.0 mypy-extensions==0.4.3 @@ -45,7 +45,7 @@ requests==2.26.0 rsa==4.7.2 Shapely==1.7.1 six==1.16.0 -typing-extensions==3.10.0.0 +typing-extensions==3.10.0.2 typing-inspect==0.7.1 urllib3==1.26.6 zipp==3.5.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index b8dee50d0..caa48813a 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.0.0 -pytest==6.2.4 +google-cloud-testutils==1.1.0 +pytest==6.2.5 mock==4.0.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d75c747fb..3b30ceebf 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.25.0 +google-cloud-bigquery==2.25.1 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From fcb0bc68c972c2c98bb8542f54e9228308177ecb Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 31 Aug 2021 16:56:40 -0600 Subject: [PATCH 55/89] fix: underscores weren't allowed in struct field names when passing parameters to the DB API (#930) --- google/cloud/bigquery/dbapi/_helpers.py | 2 +- google/cloud/bigquery/dbapi/cursor.py | 2 +- tests/unit/test_dbapi__helpers.py | 8 ++++---- tests/unit/test_dbapi_cursor.py | 4 ++++ 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 9c134b47c..72e711bcf 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -173,7 +173,7 @@ def _parse_type( \s* (ARRAY|STRUCT|RECORD) # Type \s* - <([A-Z0-9<> ,()]+)> # Subtype(s) + <([A-Z0-9_<> ,()]+)> # Subtype(s) \s*$ """, re.IGNORECASE | re.VERBOSE, diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 587598d5f..b1239ff57 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -494,7 +494,7 @@ def _extract_types( ([^:)]*) # name (?:: # ':' introduces type ( # start of type group - [a-zA-Z0-9<>, ]+ # First part, no parens + [a-zA-Z0-9_<>, ]+ # First part, no parens (?: # start sets of parens + non-paren text \([0-9 ,]+\) # comma-separated groups of digits in parens diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index b33203354..5965a4817 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -612,8 +612,8 @@ def test_complex_query_parameter_type_errors(type_, value, expect): "parameters,parameter_types,expect", [ ( - [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))], - ["ARRAY", "struct"], + [[], dict(name="ch1", b_date=datetime.date(2021, 1, 1))], + ["ARRAY", "struct"], [ { "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, @@ -623,13 +623,13 @@ def test_complex_query_parameter_type_errors(type_, value, expect): "parameterType": { "structTypes": [ {"name": "name", "type": {"type": "STRING"}}, - {"name": "bdate", "type": {"type": "DATE"}}, + {"name": "b_date", "type": {"type": "DATE"}}, ], "type": "STRUCT", }, "parameterValue": { "structValues": { - "bdate": {"value": "2021-01-01"}, + "b_date": {"value": "2021-01-01"}, "name": {"value": "ch1"}, } }, diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 026810aaf..cb55da889 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -809,6 +809,10 @@ def test__format_operation_no_placeholders(self): "values(%%%%%(foo:INT64)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), ( "values(%%%%%(foo:struct)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), From 936686b680f810558afee9834b201c25c6d0437f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 1 Sep 2021 06:56:59 -0600 Subject: [PATCH 56/89] chore: release 2.25.2 (#916) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a21df6fe..b4c8e5fb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.25.2](https://www.github.com/googleapis/python-bigquery/compare/v2.25.1...v2.25.2) (2021-08-31) + + +### Bug Fixes + +* error inserting DataFrame with REPEATED field ([#925](https://www.github.com/googleapis/python-bigquery/issues/925)) ([656d2fa](https://www.github.com/googleapis/python-bigquery/commit/656d2fa6f870573a21235c83463752a2d084caba)) +* underscores weren't allowed in struct field names when passing parameters to the DB API ([#930](https://www.github.com/googleapis/python-bigquery/issues/930)) ([fcb0bc6](https://www.github.com/googleapis/python-bigquery/commit/fcb0bc68c972c2c98bb8542f54e9228308177ecb)) + + +### Documentation + +* update docstring for bigquery_create_routine sample ([#883](https://www.github.com/googleapis/python-bigquery/issues/883)) ([#917](https://www.github.com/googleapis/python-bigquery/issues/917)) ([e2d12b7](https://www.github.com/googleapis/python-bigquery/commit/e2d12b795ef2dc51b0ee36f1b3000edb1e64ce05)) + ### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 21cbec9fe..e8672849f 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.25.1" +__version__ = "2.25.2" From 10fee52c95f3080e8c788f8d3e1c8284d3f380ab Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 1 Sep 2021 16:23:46 +0200 Subject: [PATCH 57/89] chore(deps): update all dependencies (#928) --- samples/geography/requirements.txt | 8 ++++---- samples/snippets/requirements.txt | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c325ee5e4..cfccab8b0 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -10,13 +10,13 @@ dataclasses==0.6; python_version < '3.7' Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 -google-api-core==2.0.0 -google-auth==2.0.1 -google-cloud-bigquery==2.25.1 +google-api-core==2.0.1 +google-auth==2.0.2 +google-cloud-bigquery==2.25.2 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.3 -google-resumable-media==2.0.0 +google-resumable-media==2.0.1 googleapis-common-protos==1.53.0 grpcio==1.39.0 idna==3.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3b30ceebf..57f55d186 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ -google-cloud-bigquery==2.25.1 +google-cloud-bigquery==2.25.2 google-cloud-bigquery-storage==2.6.3 -google-auth-oauthlib==0.4.5 +google-auth-oauthlib==0.4.6 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From b28907693bbe889becc1b9c8963f0a7e1ee6c35a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 1 Sep 2021 14:58:09 -0500 Subject: [PATCH 58/89] fix: guard imports against unsupported pyarrow versions (#934) * fix: guard imports against unsupported pyarrow versions * add unit tests * fix pytype * second try at fixing pytype --- google/cloud/bigquery/_helpers.py | 72 +++++++++++++++++++++++- google/cloud/bigquery/_pandas_helpers.py | 19 ++----- google/cloud/bigquery/exceptions.py | 4 ++ noxfile.py | 9 ++- testing/constraints-3.6.txt | 2 +- tests/unit/job/test_query_pandas.py | 8 +-- tests/unit/test__helpers.py | 68 ++++++++++++++++++++++ tests/unit/test__pandas_helpers.py | 29 ++++++---- tests/unit/test_table.py | 12 ++-- 9 files changed, 184 insertions(+), 39 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 0a1f71444..9df0f3d0a 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,7 +19,7 @@ import decimal import math import re -from typing import Union +from typing import Any, Union from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -29,7 +29,10 @@ from google.cloud._helpers import _to_bytes import packaging.version -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.exceptions import ( + LegacyBigQueryStorageError, + LegacyPyarrowError, +) _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" @@ -42,6 +45,7 @@ re.VERBOSE, ) +_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -95,12 +99,74 @@ def verify_version(self): if self.installed_version < _MIN_BQ_STORAGE_VERSION: msg = ( "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= 2.0.0 (version found: {self.installed_version})." + f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." ) raise LegacyBigQueryStorageError(msg) +class PyarrowVersions: + """Version comparisons for pyarrow package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pyarrow.""" + if self._installed_version is None: + import pyarrow + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pyarrow, "__version__", "0.0.0") + ) + + return self._installed_version + + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pyarrow extra is + installed. + + The function assumes that pyarrow extra is installed, and should thus + be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Returns: + The ``pyarrow`` module or ``None``. + + Raises: + LegacyPyarrowError: + If the pyarrow package is outdated and ``raise_if_error`` is ``True``. + """ + try: + import pyarrow + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise LegacyPyarrowError( + f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}." + ) from exc + return None + + if self.installed_version < _MIN_PYARROW_VERSION: + if raise_if_error: + msg = ( + "Dependency pyarrow is outdated, please upgrade " + f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})." + ) + raise LegacyPyarrowError(msg) + return None + + return pyarrow + + BQ_STORAGE_VERSIONS = BQStorageVersions() +PYARROW_VERSIONS = PyarrowVersions() def _not_null(value, field): diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 29139ae09..0a22043a3 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -55,12 +55,6 @@ def _to_wkb(v): _to_wkb = _to_wkb() -try: - import pyarrow - import pyarrow.parquet -except ImportError: # pragma: NO COVER - pyarrow = None - try: from google.cloud.bigquery_storage import ArrowSerializationOptions except ImportError: @@ -73,12 +67,10 @@ def _to_wkb(v): from google.cloud.bigquery import schema -_LOGGER = logging.getLogger(__name__) +pyarrow = _helpers.PYARROW_VERSIONS.try_import() -_NO_BQSTORAGE_ERROR = ( - "The google-cloud-bigquery-storage library is not installed, " - "please install google-cloud-bigquery-storage to use bqstorage features." -) + +_LOGGER = logging.getLogger(__name__) _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. @@ -548,8 +540,9 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN serializing method. Defaults to "SNAPPY". https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table """ - if pyarrow is None: - raise ValueError("pyarrow is required for BigQuery schema conversion.") + pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) + + import pyarrow.parquet bq_schema = schema._to_schema_fields(bq_schema) arrow_table = dataframe_to_arrow(dataframe, bq_schema) diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py index 6e5c27eb1..fb1188eee 100644 --- a/google/cloud/bigquery/exceptions.py +++ b/google/cloud/bigquery/exceptions.py @@ -19,3 +19,7 @@ class BigQueryError(Exception): class LegacyBigQueryStorageError(BigQueryError): """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" + + +class LegacyPyarrowError(BigQueryError): + """Raised when too old a version of pyarrow package is detected at runtime.""" diff --git a/noxfile.py b/noxfile.py index 9077924e9..d53b33121 100644 --- a/noxfile.py +++ b/noxfile.py @@ -94,9 +94,16 @@ def unit(session): default(session) -@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) def unit_noextras(session): """Run the unit test suite.""" + + # Install optional dependencies that are out-of-date. + # https://github.com/googleapis/python-bigquery/issues/933 + # There is no pyarrow 1.0.0 package for Python 3.9. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + session.install("pyarrow==1.0.0") + default(session, install_extras=False) diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index be1a992fa..23d2724f7 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -19,6 +19,6 @@ proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 -shapely==1.6.0 +Shapely==1.6.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index b5af90c0b..580b41c78 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -31,10 +31,6 @@ import geopandas except (ImportError, AttributeError): # pragma: NO COVER geopandas = None -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None try: from google.cloud import bigquery_storage except (ImportError, AttributeError): # pragma: NO COVER @@ -44,11 +40,15 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +from google.cloud.bigquery import _helpers from .helpers import _make_client from .helpers import _make_connection from .helpers import _make_job_resource +pyarrow = _helpers.PYARROW_VERSIONS.try_import() + + @pytest.fixture def table_read_options_kwarg(): # Create a BigQuery Storage table read options object with pyarrow compression diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index f8d00e67d..945b95d1b 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -24,9 +24,20 @@ except ImportError: # pragma: NO COVER bigquery_storage = None +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + @unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): + def tearDown(self): + from google.cloud.bigquery import _helpers + + # Reset any cached versions since it may not match reality. + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + def _object_under_test(self): from google.cloud.bigquery import _helpers @@ -89,6 +100,63 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional +@unittest.skipIf(pyarrow is None, "Requires `pyarrow`") +class TestPyarrowVersions(unittest.TestCase): + def tearDown(self): + from google.cloud.bigquery import _helpers + + # Reset any cached versions since it may not match reality. + _helpers.PYARROW_VERSIONS._installed_version = None + + def _object_under_test(self): + from google.cloud.bigquery import _helpers + + return _helpers.PyarrowVersions() + + def _call_try_import(self, **kwargs): + from google.cloud.bigquery import _helpers + + _helpers.PYARROW_VERSIONS._installed_version = None + return _helpers.PYARROW_VERSIONS.try_import(**kwargs) + + def test_try_import_raises_no_error_w_recent_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="5.0.0"): + try: + pyarrow = self._call_try_import(raise_if_error=True) + self.assertIsNotNone(pyarrow) + except LegacyPyarrowError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_try_import_returns_none_w_legacy_pyarrow(self): + with mock.patch("pyarrow.__version__", new="2.0.0"): + pyarrow = self._call_try_import() + self.assertIsNone(pyarrow) + + def test_try_import_raises_error_w_legacy_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="2.0.0"): + with self.assertRaises(LegacyPyarrowError): + self._call_try_import(raise_if_error=True) + + def test_installed_version_returns_cached(self): + versions = self._object_under_test() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + def test_installed_version_returns_parsed_version(self): + versions = self._object_under_test() + + with mock.patch("pyarrow.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 + + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _not_null diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index f0975ef65..80b226a3a 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -29,13 +29,6 @@ import pandas.testing except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - # Mock out pyarrow when missing, because methods from pyarrow.types are - # used in test parameterization. - pyarrow = mock.Mock() try: import geopandas except ImportError: # pragma: NO COVER @@ -44,9 +37,19 @@ import pytest from google import api_core +from google.cloud.bigquery import exceptions from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +if pyarrow: + import pyarrow.types +else: # pragma: NO COVER + # Mock out pyarrow when missing, because methods from pyarrow.types are + # used in test parameterization. + pyarrow = mock.Mock() + try: from google.cloud import bigquery_storage @@ -1120,15 +1123,19 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): - monkeypatch.setattr(module_under_test, "pyarrow", None) - with pytest.raises(ValueError) as exc_context: + mock_pyarrow_import = mock.Mock() + mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError( + "pyarrow not installed" + ) + monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import) + + with pytest.raises(exceptions.LegacyPyarrowError): module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) - assert "pyarrow is required" in str(exc_context.value) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): +def test_dataframe_to_parquet_w_extra_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( pandas.DataFrame(), (schema.SchemaField("not_in_df", "STRING"),), None diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 1ce930ee4..c64620a48 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -45,18 +45,18 @@ except (ImportError, AttributeError): # pragma: NO COVER geopandas = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - pyarrow = None - try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER tqdm = None from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery import _helpers + + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +if pyarrow: + import pyarrow.types def _mock_client(): From ba02f248ba9c449c34859579a4011f4bfd2f4a93 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 14:22:16 -0600 Subject: [PATCH 59/89] feat: set the X-Server-Timeout header when timeout is set (#927) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #919 🦕 --- google/cloud/bigquery/client.py | 27 ++++++++++++++++++++++++++- tests/unit/conftest.py | 19 +++++++++++++++++++ tests/unit/test_client.py | 27 +++++++++++++++++++-------- 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 023346ffa..47ff83c5d 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -131,6 +131,8 @@ # https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 _PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) +TIMEOUT_HEADER = "X-Server-Timeout" + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -742,16 +744,26 @@ def create_table( return self.get_table(table.reference, retry=retry) def _call_api( - self, retry, span_name=None, span_attributes=None, job_ref=None, **kwargs + self, + retry, + span_name=None, + span_attributes=None, + job_ref=None, + headers: Optional[Dict[str, str]] = None, + **kwargs, ): + kwargs = _add_server_timeout_header(headers, kwargs) call = functools.partial(self._connection.api_request, **kwargs) + if retry: call = retry(call) + if span_name is not None: with create_span( name=span_name, attributes=span_attributes, client=self, job_ref=job_ref ): return call() + return call() def get_dataset( @@ -4045,3 +4057,16 @@ def _get_upload_headers(user_agent): "User-Agent": user_agent, "content-type": "application/json", } + + +def _add_server_timeout_header(headers: Optional[Dict[str, str]], kwargs): + timeout = kwargs.get("timeout") + if timeout is not None: + if headers is None: + headers = {} + headers[TIMEOUT_HEADER] = str(timeout) + + if headers: + kwargs["headers"] = headers + + return kwargs diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 7a67ea6b5..feba65aa5 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import mock import pytest from .helpers import make_client @@ -35,3 +36,21 @@ def DS_ID(): @pytest.fixture def LOCATION(): yield "us-central" + + +def noop_add_server_timeout_header(headers, kwargs): + if headers: + kwargs["headers"] = headers + return kwargs + + +@pytest.fixture(autouse=True) +def disable_add_server_timeout_header(request): + if "enable_add_server_timeout_header" in request.keywords: + yield + else: + with mock.patch( + "google.cloud.bigquery.client._add_server_timeout_header", + noop_add_server_timeout_header, + ): + yield diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index e9204f1de..d2a75413f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1806,7 +1806,6 @@ def test_update_dataset(self): "access": ACCESS, }, path="/" + PATH, - headers=None, timeout=7.5, ) self.assertEqual(ds2.description, ds.description) @@ -1850,7 +1849,6 @@ def test_update_dataset_w_custom_property(self): method="PATCH", data={"newAlphaProperty": "unreleased property"}, path=path, - headers=None, timeout=DEFAULT_TIMEOUT, ) @@ -1909,7 +1907,7 @@ def test_update_model(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5 ) self.assertEqual(updated_model.model_id, model.model_id) self.assertEqual(updated_model.description, model.description) @@ -1982,7 +1980,6 @@ def test_update_routine(self): method="PUT", data=sent, path="/projects/routines-project/datasets/test_routines/routines/updated_routine", - headers=None, timeout=7.5, ) self.assertEqual(actual_routine.arguments, routine.arguments) @@ -2090,7 +2087,7 @@ def test_update_table(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5 ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) @@ -2140,7 +2137,6 @@ def test_update_table_w_custom_property(self): method="PATCH", path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, - headers=None, timeout=DEFAULT_TIMEOUT, ) self.assertEqual( @@ -2175,7 +2171,6 @@ def test_update_table_only_use_legacy_sql(self): method="PATCH", path="/%s" % path, data={"view": {"useLegacySql": True}}, - headers=None, timeout=DEFAULT_TIMEOUT, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2273,7 +2268,6 @@ def test_update_table_w_query(self): "expirationTime": str(_millis(exp_time)), "schema": schema_resource, }, - headers=None, timeout=DEFAULT_TIMEOUT, ) @@ -8173,3 +8167,20 @@ def transmit_next_chunk(transport): chunk_size = RU.call_args_list[0][0][1] assert chunk_size == 100 * (1 << 20) + + +@pytest.mark.enable_add_server_timeout_header +@pytest.mark.parametrize("headers", [None, {}]) +def test__call_api_add_server_timeout_w_timeout(client, headers): + client._connection = make_connection({}) + client._call_api(None, method="GET", path="/", headers=headers, timeout=42) + client._connection.api_request.assert_called_with( + method="GET", path="/", timeout=42, headers={"X-Server-Timeout": "42"} + ) + + +@pytest.mark.enable_add_server_timeout_header +def test__call_api_no_add_server_timeout_wo_timeout(client): + client._connection = make_connection({}) + client._call_api(None, method="GET", path="/") + client._connection.api_request.assert_called_with(method="GET", path="/") From db57c1c361a80302628965304aa8f9079ff22afc Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 1 Sep 2021 20:44:16 +0000 Subject: [PATCH 60/89] chore: release 2.26.0 (#937) :robot: I have created a release \*beep\* \*boop\* --- ## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) ### Features * set the X-Server-Timeout header when timeout is set ([#927](https://www.github.com/googleapis/python-bigquery/issues/927)) ([ba02f24](https://www.github.com/googleapis/python-bigquery/commit/ba02f248ba9c449c34859579a4011f4bfd2f4a93)) ### Bug Fixes * guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4c8e5fb7..5a3cb6bee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) + + +### Features + +* set the X-Server-Timeout header when timeout is set ([#927](https://www.github.com/googleapis/python-bigquery/issues/927)) ([ba02f24](https://www.github.com/googleapis/python-bigquery/commit/ba02f248ba9c449c34859579a4011f4bfd2f4a93)) + + +### Bug Fixes + +* guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a)) + ### [2.25.2](https://www.github.com/googleapis/python-bigquery/compare/v2.25.1...v2.25.2) (2021-08-31) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index e8672849f..1f7d79ab9 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.25.2" +__version__ = "2.26.0" From 65cb29d13748b976390dda61793407ef7e2ed386 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 2 Sep 2021 02:29:35 +0200 Subject: [PATCH 61/89] chore(deps): update dependency google-cloud-bigquery to v2.26.0 (#938) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index cfccab8b0..aa4b05325 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==2.5.0 geopandas==0.9.0 google-api-core==2.0.1 google-auth==2.0.2 -google-cloud-bigquery==2.25.2 +google-cloud-bigquery==2.26.0 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 57f55d186..cf123794e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.25.2 +google-cloud-bigquery==2.26.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.6 grpcio==1.39.0 From 1c2bd019874cb6d880f3c99a7498e284a25265d3 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 2 Sep 2021 11:10:38 -0600 Subject: [PATCH 62/89] chore: update system tests and samples to use and @google.com email address (#942) * chore: update system tests and samples to use and @google.com email address * Add group prefix * fixed access entry some more --- samples/update_dataset_access.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/update_dataset_access.py b/samples/update_dataset_access.py index 6e844cc90..a5c2670e7 100644 --- a/samples/update_dataset_access.py +++ b/samples/update_dataset_access.py @@ -28,8 +28,8 @@ def update_dataset_access(dataset_id): entry = bigquery.AccessEntry( role="READER", - entity_type="userByEmail", - entity_id="sample.bigquery.dev@gmail.com", + entity_type="groupByEmail", + entity_id="cloud-developer-relations@google.com", ) entries = list(dataset.access_entries) From c29c7236a5fc5d134ca0f145bf3b38a2d60883a3 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 3 Sep 2021 06:14:46 -0400 Subject: [PATCH 63/89] chore(python): rename default branch to main (#935) Source-Link: https://github.com/googleapis/synthtool/commit/5c0fa62eea9c33ebe61e582424b659eb264e1ba4 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d Co-authored-by: Owl Bot Co-authored-by: Tim Swast Co-authored-by: Peter Lamut Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 2 +- CONTRIBUTING.rst | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index a9fcd07cc..c07f148f0 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 + digest: sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 5b87973dd..8aecf9dd2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -113,9 +113,9 @@ Coding Style export GOOGLE_CLOUD_TESTING_BRANCH="main" By doing this, you are specifying the location of the most up-to-date - version of ``python-bigquery``. The the suggested remote name ``upstream`` - should point to the official ``googleapis`` checkout and the - the branch should be the main branch on that remote (``main``). + version of ``python-bigquery``. The + remote name ``upstream`` should point to the official ``googleapis`` + checkout and the branch should be the default branch on that remote (``main``). - This repository contains configuration for the `pre-commit `__ tool, which automates checking From 503d360e81c5c8e3f1a5828bcb3fa13001dc8c14 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 3 Sep 2021 10:19:42 -0600 Subject: [PATCH 64/89] chore: Reduce duplicated code betweem tests/unit and tests/unit/job (#940) * chore: Reduce duplicated code betweem tests/unit and tests/unit/job * reuse parent make_client --- tests/unit/job/helpers.py | 22 ++---------- tests/unit/job/test_base.py | 13 +++---- tests/unit/job/test_copy.py | 21 +++++------ tests/unit/job/test_extract.py | 21 +++++------ tests/unit/job/test_load.py | 41 ++++++++++----------- tests/unit/job/test_query.py | 56 +++++++++++++++-------------- tests/unit/job/test_query_pandas.py | 36 ++++++++++--------- 7 files changed, 101 insertions(+), 109 deletions(-) diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index c792214e7..3642c7229 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -14,36 +14,20 @@ import unittest -import mock from google.api_core import exceptions - -def _make_credentials(): - import google.auth.credentials - - return mock.Mock(spec=google.auth.credentials.Credentials) +from ..helpers import make_connection, make_client as __make_client def _make_client(project="test-project", connection=None): - from google.cloud.bigquery.client import Client - + client = __make_client(project) if connection is None: - connection = _make_connection() + connection = make_connection() - client = Client(project=project, credentials=_make_credentials(), _http=object()) client._connection = connection return client -def _make_connection(*responses): - import google.cloud.bigquery._http - from google.cloud.exceptions import NotFound - - mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] - return mock_conn - - def _make_retriable_exception(): return exceptions.TooManyRequests( "retriable exception", errors=[{"reason": "rateLimitExceeded"}] diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index c3f7854e3..aa8e9c045 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -21,8 +21,9 @@ import mock import pytest +from ..helpers import make_connection + from .helpers import _make_client -from .helpers import _make_connection from .helpers import _make_retriable_exception from .helpers import _make_job_resource @@ -740,7 +741,7 @@ def test_cancel_defaults(self): response = {"job": resource} job = self._set_properties_job() job._properties["jobReference"]["location"] = self.LOCATION - connection = job._client._connection = _make_connection(response) + connection = job._client._connection = make_connection(response) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -769,7 +770,7 @@ def test_cancel_explicit(self): response = {"job": resource} job = self._set_properties_job() client = _make_client(project=other_project) - connection = client._connection = _make_connection(response) + connection = client._connection = make_connection(response) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -930,7 +931,7 @@ def test_result_default_wo_state(self): started=True, ended=True, ) - conn = _make_connection( + conn = make_connection( _make_retriable_exception(), begun_job_resource, _make_retriable_exception(), @@ -968,7 +969,7 @@ def test_result_w_retry_wo_state(self): started=True, ended=True, ) - conn = _make_connection( + conn = make_connection( exceptions.NotFound("not normally retriable"), begun_job_resource, exceptions.NotFound("not normally retriable"), @@ -1008,7 +1009,7 @@ def test_result_w_retry_wo_state(self): ) def test_result_explicit_w_state(self): - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) # Use _set_properties() instead of directly modifying _properties so diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py index 992efcf6b..d94e5bc88 100644 --- a/tests/unit/job/test_copy.py +++ b/tests/unit/job/test_copy.py @@ -14,9 +14,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestCopyJobConfig(_Base): @@ -333,7 +334,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -396,9 +397,9 @@ def test_begin_w_alternate_client(self): "writeDisposition": WriteDisposition.WRITE_TRUNCATE, } RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -427,7 +428,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) @@ -446,9 +447,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -468,7 +469,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -488,9 +489,9 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py index 4c9411d0d..8bada51af 100644 --- a/tests/unit/job/test_extract.py +++ b/tests/unit/job/test_extract.py @@ -14,9 +14,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestExtractJobConfig(_Base): @@ -265,7 +266,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -318,9 +319,9 @@ def test_begin_w_alternate_client(self): "printHeader": False, } RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -353,7 +354,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client @@ -371,9 +372,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 @@ -395,7 +396,7 @@ def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -416,9 +417,9 @@ def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 70e7860a7..cf2096b8b 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -16,9 +16,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestLoadJob(_Base): @@ -238,7 +239,7 @@ def test_result_invokes_begin(self): begun_resource = self._make_resource() done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, done_resource) + connection = make_connection(begun_resource, done_resource) client = _make_client(self.PROJECT) client._connection = connection @@ -421,7 +422,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_already_running(self): - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties["status"] = {"state": "RUNNING"} @@ -436,7 +437,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) path = "/projects/{}/jobs".format(self.PROJECT) @@ -478,7 +479,7 @@ def test_begin_w_autodetect(self): del resource["etag"] del resource["selfLink"] del resource["user_email"] - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True @@ -559,9 +560,9 @@ def test_begin_w_alternate_client(self): "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") @@ -611,7 +612,7 @@ def test_begin_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -634,7 +635,7 @@ def test_begin_w_job_reference(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -654,9 +655,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -679,7 +680,7 @@ def test_exists_miss_w_job_reference(self): from google.cloud.bigquery import job job_ref = job._JobReference("my-job-id", "other-project", "US") - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -701,7 +702,7 @@ def test_exists_miss_w_job_reference(self): def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -719,9 +720,9 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -744,7 +745,7 @@ def test_reload_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -769,7 +770,7 @@ def test_cancel_w_bound_client(self): PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {"job": RESOURCE} - conn = _make_connection(RESPONSE) + conn = make_connection(RESPONSE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -788,9 +789,9 @@ def test_cancel_w_alternate_client(self): PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {"job": RESOURCE} - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESPONSE) + conn2 = make_connection(RESPONSE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -813,7 +814,7 @@ def test_cancel_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection({"job": resource}) + conn = make_connection({"job": resource}) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index d41370520..4c598d797 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -26,9 +26,11 @@ from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query + +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestQueryJob(_Base): @@ -943,7 +945,7 @@ def test_result(self): "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection( + conn = make_connection( query_resource, query_resource_done, job_resource_done, query_page_resource ) client = _make_client(self.PROJECT, connection=conn) @@ -1005,7 +1007,7 @@ def test_result_with_done_job_calls_get_query_results(self): "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection(query_resource_done, results_page_resource) + conn = make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1052,7 +1054,7 @@ def test_result_with_max_results(self): {"f": [{"v": "ghi"}]}, ], } - connection = _make_connection(query_resource, query_page_resource) + connection = make_connection(query_resource, query_page_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1096,7 +1098,7 @@ def test_result_w_retry(self): "tableId": "dest_table", } - connection = _make_connection( + connection = make_connection( exceptions.NotFound("not normally retriable"), query_resource, exceptions.NotFound("not normally retriable"), @@ -1144,7 +1146,7 @@ def test_result_w_empty_schema(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": []}, } - connection = _make_connection(query_resource, query_resource) + connection = make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1165,7 +1167,7 @@ def test_result_invokes_begins(self): query_resource["jobComplete"] = True done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, incomplete_resource, query_resource, @@ -1196,7 +1198,7 @@ def test_result_w_timeout(self): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -1245,7 +1247,7 @@ def test_result_w_page_size(self): ], } query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} - conn = _make_connection( + conn = make_connection( query_results_resource, query_page_resource, query_page_resource_2 ) client = _make_client(self.PROJECT, connection=conn) @@ -1303,7 +1305,7 @@ def test_result_with_start_index(self): {"f": [{"v": "jkl"}]}, ], } - connection = _make_connection(query_resource, tabledata_resource) + connection = make_connection(query_resource, tabledata_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1428,7 +1430,7 @@ def test__begin_w_timeout(self): PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch( @@ -1462,7 +1464,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() @@ -1530,9 +1532,9 @@ def test_begin_w_alternate_client(self): } RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION RESOURCE["configuration"]["dryRun"] = True - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(TABLE) @@ -1588,7 +1590,7 @@ def test_begin_w_udf(self): {"resourceUri": RESOURCE_URI}, {"inlineCode": INLINE_UDF_CODE}, ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) udf_resources = [ UDFResource("resourceUri", RESOURCE_URI), @@ -1647,7 +1649,7 @@ def test_begin_w_named_query_parameter(self): "parameterValue": {"value": "123"}, } ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -1695,7 +1697,7 @@ def test_begin_w_positional_query_parameter(self): config["queryParameters"] = [ {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -1774,7 +1776,7 @@ def test_begin_w_table_defs(self): csv_table: CSV_CONFIG_RESOURCE, } want_resource = copy.deepcopy(RESOURCE) - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.table_definitions = {bt_table: bt_config, csv_table: csv_config} @@ -1818,7 +1820,7 @@ def test_dry_run_query(self): del RESOURCE["selfLink"] del RESOURCE["user_email"] RESOURCE["configuration"]["dryRun"] = True - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.dry_run = True @@ -1846,7 +1848,7 @@ def test_dry_run_query(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch( @@ -1862,9 +1864,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) with mock.patch( @@ -1887,7 +1889,7 @@ def test_reload_w_bound_client(self): DS_ID = "DATASET" DEST_TABLE = "dest_table" RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) @@ -1919,9 +1921,9 @@ def test_reload_w_alternate_client(self): "datasetId": DS_ID, "tableId": DEST_TABLE, } - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) with mock.patch( @@ -1945,7 +1947,7 @@ def test_reload_w_timeout(self): DS_ID = "DATASET" DEST_TABLE = "dest_table" RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) @@ -1975,7 +1977,7 @@ def test_iter(self): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 580b41c78..1b44f65d3 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -41,8 +41,10 @@ tqdm = None from google.cloud.bigquery import _helpers + +from ..helpers import make_connection + from .helpers import _make_client -from .helpers import _make_connection from .helpers import _make_job_resource @@ -125,7 +127,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): }, "totalRows": "4", } - connection = _make_connection(get_query_results_resource, job_resource) + connection = make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(job_resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -207,7 +209,7 @@ def test_to_arrow(): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) @@ -252,7 +254,7 @@ def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) begun_resource = _make_job_resource(job_type="query") job = target_class.from_api_repr(begun_resource, client) @@ -299,7 +301,7 @@ def test_to_arrow_w_tqdm_w_query_plan(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -356,7 +358,7 @@ def test_to_arrow_w_tqdm_w_pending_status(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -408,7 +410,7 @@ def test_to_arrow_w_tqdm_wo_query_plan(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -450,7 +452,7 @@ def _make_job(schema=(), rows=()): tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]} done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) @@ -486,7 +488,7 @@ def test_to_dataframe_ddl_query(): "jobReference": resource["jobReference"], "schema": {"fields": []}, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) @@ -514,7 +516,7 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): ] }, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -562,7 +564,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): "totalRows": "4", "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -632,7 +634,7 @@ def test_to_dataframe_column_dtypes(): query_resource["rows"] = rows done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource ) client = _make_client(connection=connection) @@ -673,7 +675,7 @@ def test_to_dataframe_column_date_dtypes(): query_resource["rows"] = rows done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource ) client = _make_client(connection=connection) @@ -702,7 +704,7 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource, query_resource, ) client = _make_client(connection=connection) @@ -735,7 +737,7 @@ def test_to_dataframe_w_tqdm_pending(): {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -791,7 +793,7 @@ def test_to_dataframe_w_tqdm(): {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -846,7 +848,7 @@ def test_to_dataframe_w_tqdm_max_results(): ] rows = [{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) From e3b26d8deb872ac1ee2df802e635793eb60703d4 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 3 Sep 2021 19:51:10 +0200 Subject: [PATCH 65/89] test: fix routine DDL sample test exits too early (#932) Co-authored-by: Tres Seaver Co-authored-by: Tim Swast --- samples/tests/test_routine_samples.py | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/tests/test_routine_samples.py b/samples/tests/test_routine_samples.py index 59ec1fae9..c1b0bb5a7 100644 --- a/samples/tests/test_routine_samples.py +++ b/samples/tests/test_routine_samples.py @@ -32,7 +32,6 @@ def test_create_routine_ddl(capsys, random_routine_id, client): out, err = capsys.readouterr() assert "Created routine {}".format(random_routine_id) in out - return routine assert routine.type_ == "SCALAR_FUNCTION" assert routine.language == "SQL" expected_arguments = [ From c9068e4191dbe3632fe399a0b777e8bc54a183a6 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 3 Sep 2021 20:12:26 +0200 Subject: [PATCH 66/89] chore(deps): update all dependencies (#939) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.3` -> `==2.7.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/compatibility-slim/2.6.3)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/confidence-slim/2.6.3)](https://docs.renovatebot.com/merge-confidence/) | | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.3` -> `==1.1.4` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/compatibility-slim/1.1.3)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/confidence-slim/1.1.3)](https://docs.renovatebot.com/merge-confidence/) | | [google-resumable-media](https://togithub.com/googleapis/google-resumable-media-python) | `==2.0.1` -> `==2.0.2` | [![age](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/compatibility-slim/2.0.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/confidence-slim/2.0.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.7.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​270-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev263v270-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.3...v2.7.0) ##### Features - **v1beta2:** Align ReadRows timeout with other versions of the API ([#​293](https://www.togithub.com/googleapis/python-bigquery-storage/issues/293)) ([43e36a1](https://www.github.com/googleapis/python-bigquery-storage/commit/43e36a13ece8d876763d88bad0252a1b2421c52a)) ##### Documentation - **v1beta2:** Align session length with public documentation ([43e36a1](https://www.github.com/googleapis/python-bigquery-storage/commit/43e36a13ece8d876763d88bad0252a1b2421c52a)) ##### [2.6.3](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3) (2021-08-06) ##### Bug Fixes - resume read stream on `Unknown` transport-layer exception ([#​263](https://www.togithub.com/googleapis/python-bigquery-storage/issues/263)) ([127caa0](https://www.github.com/googleapis/python-bigquery-storage/commit/127caa06144b9cec04b23914b561be6a264bcb36)) ##### [2.6.2](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.1...v2.6.2) (2021-07-28) ##### Bug Fixes - enable self signed jwt for grpc ([#​249](https://www.togithub.com/googleapis/python-bigquery-storage/issues/249)) ([a7e8d91](https://www.github.com/googleapis/python-bigquery-storage/commit/a7e8d913fc3de67a3f38ecbd35af2f9d1a33aa8d)) ##### Documentation - remove duplicate code samples ([#​246](https://www.togithub.com/googleapis/python-bigquery-storage/issues/246)) ([303f273](https://www.github.com/googleapis/python-bigquery-storage/commit/303f2732ced38e491df92e965dd37bac24a61d2f)) - add Samples section to CONTRIBUTING.rst ([#​241](https://www.togithub.com/googleapis/python-bigquery-storage/issues/241)) ([5d02358](https://www.github.com/googleapis/python-bigquery-storage/commit/5d02358fbd397cafcc1169d829859fe2dd568645)) ##### [2.6.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.0...v2.6.1) (2021-07-20) ##### Bug Fixes - **deps:** pin 'google-{api,cloud}-core', 'google-auth' to allow 2.x versions ([#​240](https://www.togithub.com/googleapis/python-bigquery-storage/issues/240)) ([8f848e1](https://www.github.com/googleapis/python-bigquery-storage/commit/8f848e18379085160492cdd2d12dc8de50a46c8e)) ##### Documentation - pandas DataFrame samples are more standalone ([#​224](https://www.togithub.com/googleapis/python-bigquery-storage/issues/224)) ([4026997](https://www.github.com/googleapis/python-bigquery-storage/commit/4026997d7a286b63ed2b969c0bd49de59635326d))
googleapis/python-crc32c ### [`v1.1.4`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​114-httpswwwgithubcomgoogleapispython-crc32ccomparev114v114-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.3...v1.1.4)
googleapis/google-resumable-media-python ### [`v2.0.2`](https://togithub.com/googleapis/google-resumable-media-python/blob/master/CHANGELOG.md#​202-httpswwwgithubcomgoogleapisgoogle-resumable-media-pythoncomparev201v202-2021-09-02) [Compare Source](https://togithub.com/googleapis/google-resumable-media-python/compare/v2.0.1...v2.0.2)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 6 +++--- samples/snippets/requirements.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index aa4b05325..8fb578018 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,10 +13,10 @@ geopandas==0.9.0 google-api-core==2.0.1 google-auth==2.0.2 google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.6.3 +google-cloud-bigquery-storage==2.7.0 google-cloud-core==2.0.0 -google-crc32c==1.1.3 -google-resumable-media==2.0.1 +google-crc32c==1.1.2 +google-resumable-media==2.0.2 googleapis-common-protos==1.53.0 grpcio==1.39.0 idna==3.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index cf123794e..e096af157 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.6.3 +google-cloud-bigquery-storage==2.7.0 google-auth-oauthlib==0.4.6 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From 1a6ab12273d1063425c2b54d1cc3ee4fa88063a9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 7 Sep 2021 16:38:33 +0200 Subject: [PATCH 67/89] tests: add more dependencies to pre-release tests (#948) --- noxfile.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index d53b33121..4b12e8138 100644 --- a/noxfile.py +++ b/noxfile.py @@ -214,7 +214,15 @@ def prerelease_deps(session): session.install( "--extra-index-url", "https://pypi.fury.io/arrow-nightlies/", "--pre", "pyarrow" ) - session.install("--pre", "grpcio", "pandas") + session.install( + "--pre", + "google-api-core", + "google-cloud-bigquery-storage", + "google-cloud-core", + "google-resumable-media", + "grpcio", + "pandas", + ) session.install( "freezegun", "google-cloud-datacatalog", From 864383bc01636b3774f7da194587b8b7edd0383d Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 7 Sep 2021 09:16:10 -0600 Subject: [PATCH 68/89] fix: Arrow extension-type metadata was not set when calling the REST API or when there are no rows (#946) --- google/cloud/bigquery/_pandas_helpers.py | 14 +++++- google/cloud/bigquery/table.py | 8 +++- tests/system/conftest.py | 17 +++++++ tests/system/test_arrow.py | 59 ++++++++++++++++++++++++ tests/unit/test__pandas_helpers.py | 23 +++++++++ 5 files changed, 118 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 0a22043a3..869c0215d 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -173,6 +173,13 @@ def pyarrow_timestamp(): pyarrow.decimal128(38, scale=9).id: "NUMERIC", pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", } + BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { + "GEOGRAPHY": { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + }, + "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"}, + } else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER @@ -227,7 +234,12 @@ def bq_to_arrow_field(bq_field, array_type=None): if array_type is not None: arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" - return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) + metadata = BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA.get( + bq_field.field_type.upper() if bq_field.field_type else "" + ) + return pyarrow.field( + bq_field.name, arrow_type, nullable=is_nullable, metadata=metadata + ) warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) return None diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 609c0b57e..c4a45dc83 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1810,10 +1810,14 @@ def to_arrow( if owns_bqstorage_client: bqstorage_client._transport.grpc_channel.close() - if record_batches: + if record_batches and bqstorage_client is not None: return pyarrow.Table.from_batches(record_batches) else: - # No records, use schema based on BigQuery schema. + # No records (not record_batches), use schema based on BigQuery schema + # **or** + # we used the REST API (bqstorage_client is None), + # which doesn't add arrow extension metadata, so we let + # `bq_to_arrow_schema` do it. arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index cc2c2a4dc..7eec76a32 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. import pathlib +import re import pytest import test_utils.prefixer @@ -61,6 +62,17 @@ def dataset_id(bigquery_client): bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) +@pytest.fixture() +def dataset_client(bigquery_client, dataset_id): + import google.cloud.bigquery.job + + return bigquery.Client( + default_query_job_config=google.cloud.bigquery.job.QueryJobConfig( + default_dataset=f"{bigquery_client.project}.{dataset_id}", + ) + ) + + @pytest.fixture def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" @@ -98,3 +110,8 @@ def scalars_extreme_table( job.result() yield full_table_id bigquery_client.delete_table(full_table_id) + + +@pytest.fixture +def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub): + return replace_non_anum("_", request.node.name) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index 12f7af9cb..96f9dea25 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -110,3 +110,62 @@ def test_list_rows_nullable_scalars_dtypes( timestamp_type = schema.field("timestamp_col").type assert timestamp_type.unit == "us" assert timestamp_type.tz is not None + + +@pytest.mark.parametrize("do_insert", [True, False]) +def test_arrow_extension_types_same_for_storage_and_REST_APIs_894( + dataset_client, test_table_name, do_insert +): + types = dict( + astring=("STRING", "'x'"), + astring9=("STRING(9)", "'x'"), + abytes=("BYTES", "b'x'"), + abytes9=("BYTES(9)", "b'x'"), + anumeric=("NUMERIC", "42"), + anumeric9=("NUMERIC(9)", "42"), + anumeric92=("NUMERIC(9,2)", "42"), + abignumeric=("BIGNUMERIC", "42e30"), + abignumeric49=("BIGNUMERIC(37)", "42e30"), + abignumeric492=("BIGNUMERIC(37,2)", "42e30"), + abool=("BOOL", "true"), + adate=("DATE", "'2021-09-06'"), + adatetime=("DATETIME", "'2021-09-06T09:57:26'"), + ageography=("GEOGRAPHY", "ST_GEOGFROMTEXT('point(0 0)')"), + # Can't get arrow data for interval :( + # ainterval=('INTERVAL', "make_interval(1, 2, 3, 4, 5, 6)"), + aint64=("INT64", "42"), + afloat64=("FLOAT64", "42.0"), + astruct=("STRUCT", "struct(42)"), + atime=("TIME", "'1:2:3'"), + atimestamp=("TIMESTAMP", "'2021-09-06T09:57:26'"), + ) + columns = ", ".join(f"{k} {t[0]}" for k, t in types.items()) + dataset_client.query(f"create table {test_table_name} ({columns})").result() + if do_insert: + names = list(types) + values = ", ".join(types[name][1] for name in names) + names = ", ".join(names) + dataset_client.query( + f"insert into {test_table_name} ({names}) values ({values})" + ).result() + at = dataset_client.query(f"select * from {test_table_name}").result().to_arrow() + storage_api_metadata = { + at.field(i).name: at.field(i).metadata for i in range(at.num_columns) + } + at = ( + dataset_client.query(f"select * from {test_table_name}") + .result() + .to_arrow(create_bqstorage_client=False) + ) + rest_api_metadata = { + at.field(i).name: at.field(i).metadata for i in range(at.num_columns) + } + + assert rest_api_metadata == storage_api_metadata + assert rest_api_metadata["adatetime"] == { + b"ARROW:extension:name": b"google:sqlType:datetime" + } + assert rest_api_metadata["ageography"] == { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + } diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 80b226a3a..ef8c80c81 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1696,3 +1696,26 @@ def test_bq_to_arrow_field_type_override(module_under_test): ).type == pyarrow.binary() ) + + +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +@pytest.mark.parametrize( + "field_type, metadata", + [ + ("datetime", {b"ARROW:extension:name": b"google:sqlType:datetime"}), + ( + "geography", + { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + }, + ), + ], +) +def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata): + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", field_type) + ).metadata + == metadata + ) From 76c06168451b5eb37ca87dd4004deda8b9088c4b Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 7 Sep 2021 10:12:26 -0600 Subject: [PATCH 69/89] chore: reference main branch of google-cloud-python (#953) Adjust google-cloud-python links to reference main branch. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 8454cf9c0..d0ad059a2 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ processing power of Google's infrastructure. - `Product Documentation`_ .. |GA| image:: https://img.shields.io/badge/support-GA-gold.svg - :target: https://github.com/googleapis/google-cloud-python/blob/master/README.rst#general-availability + :target: https://github.com/googleapis/google-cloud-python/blob/main/README.rst#general-availability .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg From 9157537d47670e8093bbb52462d6aa8bcb218508 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 11 Sep 2021 00:04:13 +0200 Subject: [PATCH 70/89] chore(deps): update all dependencies (#943) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.2` -> `==1.1.5` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.5/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.5/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.5/compatibility-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.5/confidence-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | | [grpcio](https://grpc.io) | `==1.39.0` -> `==1.40.0` | [![age](https://badges.renovateapi.com/packages/pypi/grpcio/1.40.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/grpcio/1.40.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/grpcio/1.40.0/compatibility-slim/1.39.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/grpcio/1.40.0/confidence-slim/1.39.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-crc32c ### [`v1.1.5`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​115-httpswwwgithubcomgoogleapispython-crc32ccomparev114v115-2021-09-07) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.4...v1.1.5) ### [`v1.1.4`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​114-httpswwwgithubcomgoogleapispython-crc32ccomparev114v114-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.3...v1.1.4) ### [`v1.1.3`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​113-httpswwwgithubcomgoogleapispython-crc32ccomparev112v113-2021-08-30) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.2...v1.1.3)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8fb578018..a4b4ed692 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -18,7 +18,7 @@ google-cloud-core==2.0.0 google-crc32c==1.1.2 google-resumable-media==2.0.2 googleapis-common-protos==1.53.0 -grpcio==1.39.0 +grpcio==1.40.0 idna==3.2 importlib-metadata==4.8.1 libcst==0.3.20 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index e096af157..f575e41b1 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.26.0 google-cloud-bigquery-storage==2.7.0 google-auth-oauthlib==0.4.6 -grpcio==1.39.0 +grpcio==1.40.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From acca1cb7baaa3b00508246c994ade40314d421c3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 13 Sep 2021 16:28:53 +0200 Subject: [PATCH 71/89] feat: include key metadata in Job representation (#964) --- google/cloud/bigquery/job/base.py | 8 ++++++++ tests/unit/job/test_base.py | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index e5fc592a6..72db5a63c 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -722,6 +722,14 @@ def cancelled(self): and self.error_result.get("reason") == _STOPPED_REASON ) + def __repr__(self): + result = ( + f"{self.__class__.__name__}<" + f"project={self.project}, location={self.location}, id={self.job_id}" + ">" + ) + return result + class _JobConfig(object): """Abstract base class for job configuration objects. diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index aa8e9c045..e320c72cb 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -1043,6 +1043,12 @@ def test_cancelled_w_error_result_w_stopped(self): self.assertTrue(job.cancelled()) + def test_repr(self): + client = _make_client(project="project-foo") + job = self._make_one("job-99", client) + job._properties.setdefault("jobReference", {})["location"] = "ABC" + assert repr(job) == "_AsyncJob" + class Test_JobConfig(unittest.TestCase): JOB_TYPE = "testing" From 4f723598b3d6958b51a5f74db1f8003e91bf8ef1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 14 Sep 2021 09:32:46 -0500 Subject: [PATCH 72/89] test: ensure prerelease versions of pandas and arrow are tested nightly (#961) * test: ensure prerelease versions of pandas and arrow are tested nightly * use regex to find package names rather than filter out comment lines --- .kokoro/continuous/prerelease-deps-3.8.cfg | 7 ++++ noxfile.py | 44 ++++++++++++++++++++-- 2 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 .kokoro/continuous/prerelease-deps-3.8.cfg diff --git a/.kokoro/continuous/prerelease-deps-3.8.cfg b/.kokoro/continuous/prerelease-deps-3.8.cfg new file mode 100644 index 000000000..3595fb43f --- /dev/null +++ b/.kokoro/continuous/prerelease-deps-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/noxfile.py b/noxfile.py index 4b12e8138..d41573407 100644 --- a/noxfile.py +++ b/noxfile.py @@ -16,6 +16,7 @@ import pathlib import os +import re import shutil import nox @@ -212,16 +213,30 @@ def prerelease_deps(session): # PyArrow prerelease packages are published to an alternative PyPI host. # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages session.install( - "--extra-index-url", "https://pypi.fury.io/arrow-nightlies/", "--pre", "pyarrow" + "--extra-index-url", + "https://pypi.fury.io/arrow-nightlies/", + "--prefer-binary", + "--pre", + "--upgrade", + "pyarrow", ) session.install( + "--extra-index-url", + "https://pypi.anaconda.org/scipy-wheels-nightly/simple", + "--prefer-binary", "--pre", + "--upgrade", + "pandas", + ) + + session.install( + "--pre", + "--upgrade", "google-api-core", "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", "grpcio", - "pandas", ) session.install( "freezegun", @@ -234,7 +249,30 @@ def prerelease_deps(session): "pytest", "pytest-cov", ) - session.install("-e", ".[all]") + + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + # We use --no-deps to ensure that pre-release versions aren't overwritten + # by the version ranges in setup.py. + session.install(*deps) + session.install("--no-deps", "-e", ".[all]") # Print out prerelease package versions. session.run("python", "-c", "import grpc; print(grpc.__version__)") From ab6e76f9489262fd9c1876a1c4f93d7e139aa999 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 14 Sep 2021 09:58:28 -0500 Subject: [PATCH 73/89] docs: simplify destination table sample with f-strings (#966) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes internal issue 199420466 🦕 --- samples/snippets/natality_tutorial.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/samples/snippets/natality_tutorial.py b/samples/snippets/natality_tutorial.py index a8d90501a..ed08b279a 100644 --- a/samples/snippets/natality_tutorial.py +++ b/samples/snippets/natality_tutorial.py @@ -38,12 +38,12 @@ def run_natality_tutorial(override_values={}): # Prepare a reference to a new dataset for storing the query results. dataset_id = "natality_regression" - dataset_id_full = "{}.{}".format(client.project, dataset_id) + dataset_id_full = f"{client.project}.{dataset_id}" # [END bigquery_query_natality_tutorial] # To facilitate testing, we replace values with alternatives # provided by the testing harness. dataset_id = override_values.get("dataset_id", dataset_id) - dataset_id_full = "{}.{}".format(client.project, dataset_id) + dataset_id_full = f"{client.project}.{dataset_id}" # [START bigquery_query_natality_tutorial] dataset = bigquery.Dataset(dataset_id_full) @@ -51,15 +51,13 @@ def run_natality_tutorial(override_values={}): # Create the new BigQuery dataset. dataset = client.create_dataset(dataset) - # In the new BigQuery dataset, create a reference to a new table for - # storing the query results. - table_ref = dataset.table("regression_input") - # Configure the query job. job_config = bigquery.QueryJobConfig() - # Set the destination table to the table reference created above. - job_config.destination = table_ref + # Set the destination table to where you want to store query results. + # As of google-cloud-bigquery 1.11.0, a fully qualified table ID can be + # used in place of a TableReference. + job_config.destination = f"{dataset_id_full}.regression_input" # Set up a query in Standard SQL, which is the default for the BigQuery # Python client library. From 6e785c74183c92c4622c0b4c280c1848ca8d5cbb Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 17 Sep 2021 09:50:57 +0200 Subject: [PATCH 74/89] cleanup: get rid of unit test warnings caused by our code (#973) * Explicitly register custom pytest marker * Avoid/silence user warnings in geopandas tests --- google/cloud/bigquery/table.py | 5 +++- tests/unit/conftest.py | 5 ++++ tests/unit/test_table.py | 43 +++++++++++++++++++++++++--------- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c4a45dc83..3378b3378 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2251,7 +2251,10 @@ def to_geodataframe( """ if geopandas is None: raise ValueError(_NO_GEOPANDAS_ERROR) - return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + + # Since an empty GeoDataFrame has no geometry column, we do not CRS on it, + # because that's deprecated. + return geopandas.GeoDataFrame() def to_dataframe_iterable( self, diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index feba65aa5..c2ae78eaa 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -54,3 +54,8 @@ def disable_add_server_timeout_header(request): noop_add_server_timeout_header, ): yield + + +def pytest_configure(config): + # Explicitly register custom test markers to avoid warnings. + config.addinivalue_line("markers", "enable_add_server_timeout_header") diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index c64620a48..c94bf3a7f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1866,8 +1866,7 @@ def test_to_geodataframe(self): df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) self.assertEqual(len(df), 0) # verify the number of rows - self.assertEqual(df.crs.srs, "EPSG:4326") - self.assertEqual(df.crs.name, "WGS 84") + self.assertIsNone(df.crs) class TestRowIterator(unittest.TestCase): @@ -4027,8 +4026,14 @@ def test_to_geodataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.geog.dtype.name, "geometry") self.assertIsInstance(df.geog, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) - self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") self.assertEqual(df.crs.name, "WGS 84") self.assertEqual(df.geog.crs.srs, "EPSG:4326") @@ -4099,8 +4104,14 @@ def test_to_geodataframe_w_geography_column(self): self.assertEqual(df.geog.dtype.name, "geometry") self.assertEqual(df.geog2.dtype.name, "object") self.assertIsInstance(df.geog, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) - self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] ) @@ -4110,10 +4121,14 @@ def test_to_geodataframe_w_geography_column(self): self.assertEqual( [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] ) + # and can easily be converted to a GeoSeries - self.assertEqual( - list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), + ["0.0", "0.0", "0.0"], + ) @unittest.skipIf(geopandas is None, "Requires `geopandas`") @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") @@ -4165,8 +4180,14 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.g.dtype.name, "geometry") self.assertIsInstance(df.g, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0"]) - self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"]) From afdc1edf36914a512ef5c738efcb1e11055d903f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 17 Sep 2021 10:39:33 +0200 Subject: [PATCH 75/89] refactor: extract common logic to TableBase class (#956) * refactor: extract common logic to TableBase class * Adress pytype's missing attribute false warning * Mark TableBase class as private * Simplify TableReference.to_api_repr() logic * Avoid get/set subproperty helper gotcha * Test _TableBase class directly --- google/cloud/bigquery/_helpers.py | 16 +- google/cloud/bigquery/table.py | 236 ++++++++------------- tests/unit/test__helpers.py | 13 +- tests/unit/test_table.py | 342 ++++++++++++++++-------------- 4 files changed, 293 insertions(+), 314 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 9df0f3d0a..28a76206e 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -672,8 +672,9 @@ def _get_sub_prop(container, keys, default=None): container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): - A sequence of keys to attempt to get the value for. Each item in - the sequence represents a deeper nesting. The first key is for + A sequence of keys to attempt to get the value for. If ``keys`` is a + string, it is treated as sequence containing a single string key. Each item + in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. default (Optional[object]): @@ -700,6 +701,9 @@ def _get_sub_prop(container, keys, default=None): Returns: object: The value if present or the default. """ + if isinstance(keys, str): + keys = [keys] + sub_val = container for key in keys: if key not in sub_val: @@ -715,8 +719,9 @@ def _set_sub_prop(container, keys, value): container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): - A sequence of keys to attempt to set the value for. Each item in - the sequence represents a deeper nesting. The first key is for + A sequence of keys to attempt to set the value for. If ``keys`` is a + string, it is treated as sequence containing a single string key. Each item + in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. value (object): Value to set within the container. @@ -743,6 +748,9 @@ def _set_sub_prop(container, keys, value): >>> container {'key': {'subkey': 'new'}} """ + if isinstance(keys, str): + keys = [keys] + sub_val = container for key in keys[:-1]: if key not in sub_val: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 3378b3378..608218fdc 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -69,6 +69,7 @@ import geopandas import pyarrow from google.cloud import bigquery_storage + from google.cloud.bigquery.dataset import DatasetReference _NO_PANDAS_ERROR = ( @@ -126,45 +127,93 @@ def _view_use_legacy_sql_getter(table): return True -class TableReference(object): - """TableReferences are pointers to tables. +class _TableBase: + """Base class for Table-related classes with common functionality.""" - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference - - Args: - dataset_ref (google.cloud.bigquery.dataset.DatasetReference): - A pointer to the dataset - table_id (str): The ID of the table - """ + _PROPERTY_TO_API_FIELD = { + "dataset_id": ["tableReference", "datasetId"], + "project": ["tableReference", "projectId"], + "table_id": ["tableReference", "tableId"], + } - def __init__(self, dataset_ref, table_id): - self._project = dataset_ref.project - self._dataset_id = dataset_ref.dataset_id - self._table_id = table_id + def __init__(self): + self._properties = {} @property - def project(self): - """str: Project bound to the table""" - return self._project + def project(self) -> str: + """Project bound to the table.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._dataset_id + def dataset_id(self) -> str: + """ID of dataset containing the table.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) @property - def table_id(self): - """str: The table ID.""" - return self._table_id + def table_id(self) -> str: + """The table ID.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"] + ) @property - def path(self): - """str: URL path for the table's APIs.""" - return "/projects/%s/datasets/%s/tables/%s" % ( - self._project, - self._dataset_id, - self._table_id, + def path(self) -> str: + """URL path for the table's APIs.""" + return ( + f"/projects/{self.project}/datasets/{self.dataset_id}" + f"/tables/{self.table_id}" + ) + + def __eq__(self, other): + if isinstance(other, _TableBase): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + + +class TableReference(_TableBase): + """TableReferences are pointers to tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference + + Args: + dataset_ref: A pointer to the dataset + table_id: The ID of the table + """ + + _PROPERTY_TO_API_FIELD = { + "dataset_id": "datasetId", + "project": "projectId", + "table_id": "tableId", + } + + def __init__(self, dataset_ref: "DatasetReference", table_id: str): + self._properties = {} + + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["project"], + dataset_ref.project, + ) + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["dataset_id"], + dataset_ref.dataset_id, + ) + _helpers._set_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"], table_id, ) @classmethod @@ -233,11 +282,7 @@ def to_api_repr(self) -> dict: Returns: Dict[str, object]: Table reference represented as an API resource """ - return { - "projectId": self._project, - "datasetId": self._dataset_id, - "tableId": self._table_id, - } + return copy.deepcopy(self._properties) def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. @@ -257,54 +302,25 @@ def to_bqstorage(self) -> str: str: A reference to this table in the BigQuery Storage API. """ - table_id, _, _ = self._table_id.partition("@") + table_id, _, _ = self.table_id.partition("@") table_id, _, _ = table_id.partition("$") - table_ref = "projects/{}/datasets/{}/tables/{}".format( - self._project, self._dataset_id, table_id, + table_ref = ( + f"projects/{self.project}/datasets/{self.dataset_id}/tables/{table_id}" ) - return table_ref - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - Tuple[str]: The contents of this :class:`DatasetReference`. - """ - return (self._project, self._dataset_id, self._table_id) - - def __eq__(self, other): - if isinstance(other, (Table, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - elif isinstance(other, TableReference): - return self._key() == other._key() - else: - return NotImplemented - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(self._key()) - def __str__(self): return f"{self.project}.{self.dataset_id}.{self.table_id}" def __repr__(self): from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference(self._project, self._dataset_id) - return "TableReference({}, '{}')".format(repr(dataset_ref), self._table_id) + dataset_ref = DatasetReference(self.project, self.dataset_id) + return f"TableReference({dataset_ref!r}, '{self.table_id}')" -class Table(object): +class Table(_TableBase): """Tables represent a set of rows whose values correspond to a schema. See @@ -325,9 +341,9 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { + **_TableBase._PROPERTY_TO_API_FIELD, "clustering_fields": "clustering", "created": "creationTime", - "dataset_id": ["tableReference", "datasetId"], "description": "description", "encryption_configuration": "encryptionConfiguration", "etag": "etag", @@ -346,14 +362,12 @@ class Table(object): "num_rows": "numRows", "partition_expiration": "timePartitioning", "partitioning_type": "timePartitioning", - "project": ["tableReference", "projectId"], "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", "schema": "schema", "snapshot_definition": "snapshotDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", - "table_id": ["tableReference", "tableId"], "time_partitioning": "timePartitioning", "type": "type", "view_use_legacy_sql": "view", @@ -368,38 +382,8 @@ def __init__(self, table_ref, schema=None): if schema is not None: self.schema = schema - @property - def project(self): - """str: Project bound to the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["project"] - ) - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] - ) - - @property - def table_id(self): - """str: ID of the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["table_id"] - ) - reference = property(_reference_getter) - @property - def path(self): - """str: URL path for the table's APIs.""" - return "/projects/%s/datasets/%s/tables/%s" % ( - self.project, - self.dataset_id, - self.table_id, - ) - @property def require_partition_filter(self): """bool: If set to true, queries over the partitioned table require a @@ -1040,29 +1024,11 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) - def __eq__(self, other): - if isinstance(other, Table): - return ( - self._properties["tableReference"] - == other._properties["tableReference"] - ) - elif isinstance(other, (TableReference, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - else: - return NotImplemented - - def __hash__(self): - return hash((self.project, self.dataset_id, self.table_id)) - def __repr__(self): return "Table({})".format(repr(self.reference)) -class TableListItem(object): +class TableListItem(_TableBase): """A read-only table resource from a list operation. For performance reasons, the BigQuery API only includes some of the table @@ -1126,21 +1092,6 @@ def expires(self): 1000.0 * float(expiration_time) ) - @property - def project(self): - """str: Project bound to the table.""" - return self._properties["tableReference"]["projectId"] - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._properties["tableReference"]["datasetId"] - - @property - def table_id(self): - """str: ID of the table.""" - return self._properties["tableReference"]["tableId"] - reference = property(_reference_getter) @property @@ -1276,19 +1227,6 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) - def __eq__(self, other): - if isinstance(other, (Table, TableReference, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - else: - return NotImplemented - - def __hash__(self): - return hash((self.project, self.dataset_id, self.table_id)) - def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 945b95d1b..94e63fd63 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1214,9 +1214,13 @@ def test_w_empty_container_default_default(self): def test_w_missing_key_explicit_default(self): self.assertEqual(self._call_fut({"key2": 2}, ["key1"], default=1), 1) - def test_w_matching_single_key(self): + def test_w_matching_single_key_in_sequence(self): self.assertEqual(self._call_fut({"key1": 1}, ["key1"]), 1) + def test_w_matching_single_string_key(self): + data = {"k": {"e": {"y": "foo"}}, "key": "bar"} + self.assertEqual(self._call_fut(data, "key"), "bar") + def test_w_matching_first_key_missing_second_key(self): self.assertIsNone(self._call_fut({"key1": {"key3": 3}}, ["key1", "key2"])) @@ -1230,11 +1234,16 @@ def _call_fut(self, container, keys, value): return _set_sub_prop(container, keys, value) - def test_w_empty_container_single_key(self): + def test_w_empty_container_single_key_in_sequence(self): container = {} self._call_fut(container, ["key1"], "value") self.assertEqual(container, {"key1": "value"}) + def test_w_empty_container_single_string_key(self): + container = {} + self._call_fut(container, "key", "value") + self.assertEqual(container, {"key": "value"}) + def test_w_empty_container_nested_keys(self): container = {} self._call_fut(container, ["key1", "key2", "key3"], "value") diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index c94bf3a7f..3c68e3c5e 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -102,6 +102,189 @@ def test_ctor_with_key(self): self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) +class TestTableBase: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import _TableBase + + return _TableBase + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + instance = self._make_one() + assert instance._properties == {} + + def test_project(self): + instance = self._make_one() + instance._properties = {"tableReference": {"projectId": "p_1"}} + assert instance.project == "p_1" + + def test_dataset_id(self): + instance = self._make_one() + instance._properties = {"tableReference": {"datasetId": "ds_1"}} + assert instance.dataset_id == "ds_1" + + def test_table_id(self): + instance = self._make_one() + instance._properties = {"tableReference": {"tableId": "tbl_1"}} + assert instance.table_id == "tbl_1" + + def test_path(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + assert instance.path == "/projects/p_1/datasets/ds_1/tables/tbl_1" + + def test___eq___wrong_type(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + class TableWannabe: + pass + + wannabe_other = TableWannabe() + wannabe_other._properties = instance._properties + wannabe_other.project = "p_1" + wannabe_other.dataset_id = "ds_1" + wannabe_other.table_id = "tbl_1" + + assert instance != wannabe_other # Can't fake it. + assert instance == mock.ANY # ...but delegation to other object works. + + def test___eq___project_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "projectId": "p_2", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + assert instance != other + + def test___eq___dataset_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_2", + "tableId": "tbl_1", + } + } + assert instance != other + + def test___eq___table_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_2", + } + } + assert instance != other + + def test___eq___equality(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + assert instance == other + + def test___hash__set_equality(self): + instance_1 = self._make_one() + instance_1._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + instance_2 = self._make_one() + instance_2._properties = { + "tableReference": { + "projectId": "p_2", + "datasetId": "ds_2", + "tableId": "tbl_2", + } + } + + set_one = {instance_1, instance_2} + set_two = {instance_1, instance_2} + assert set_one == set_two + + def test___hash__sets_not_equal(self): + instance_1 = self._make_one() + instance_1._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + instance_2 = self._make_one() + instance_2._properties = { + "tableReference": { + "projectId": "p_2", + "datasetId": "ds_2", + "tableId": "tbl_2", + } + } + + set_one = {instance_1} + set_two = {instance_2} + assert set_one != set_two + + class TestTableReference(unittest.TestCase): @staticmethod def _get_target_class(): @@ -196,55 +379,6 @@ def test_from_string_ignores_default_project(self): self.assertEqual(got.dataset_id, "string_dataset") self.assertEqual(got.table_id, "string_table") - def test___eq___wrong_type(self): - dataset_ref = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset_ref, "table_1") - other = object() - self.assertNotEqual(table, other) - self.assertEqual(table, mock.ANY) - - def test___eq___project_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - other_dataset = DatasetReference("project_2", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(other_dataset, "table_1") - self.assertNotEqual(table, other) - - def test___eq___dataset_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - other_dataset = DatasetReference("project_1", "dataset_2") - table = self._make_one(dataset, "table_1") - other = self._make_one(other_dataset, "table_1") - self.assertNotEqual(table, other) - - def test___eq___table_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(dataset, "table_2") - self.assertNotEqual(table, other) - - def test___eq___equality(self): - dataset = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(dataset, "table_1") - self.assertEqual(table, other) - - def test___hash__set_equality(self): - dataset = DatasetReference("project_1", "dataset_1") - table1 = self._make_one(dataset, "table1") - table2 = self._make_one(dataset, "table2") - set_one = {table1, table2} - set_two = {table1, table2} - self.assertEqual(set_one, set_two) - - def test___hash__not_equals(self): - dataset = DatasetReference("project_1", "dataset_1") - table1 = self._make_one(dataset, "table1") - table2 = self._make_one(dataset, "table2") - set_one = {table1} - set_two = {table2} - self.assertNotEqual(set_one, set_two) - def test___repr__(self): dataset = DatasetReference("project1", "dataset1") table1 = self._make_one(dataset, "table1") @@ -558,44 +692,6 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") - def test__eq__wrong_type(self): - table = self._make_one("project_foo.dataset_bar.table_baz") - - class TableWannabe: - pass - - not_a_table = TableWannabe() - not_a_table._properties = table._properties - - assert table != not_a_table # Can't fake it. - - def test__eq__same_table_basic(self): - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_2 = self._make_one("project_foo.dataset_bar.table_baz") - assert table_1 == table_2 - - def test__eq__same_table_multiple_properties(self): - from google.cloud.bigquery import SchemaField - - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_1.require_partition_filter = True - table_1.labels = {"first": "one", "second": "two"} - - table_1.schema = [ - SchemaField("name", "STRING", "REQUIRED"), - SchemaField("age", "INTEGER", "NULLABLE"), - ] - - table_2 = self._make_one("project_foo.dataset_bar.table_baz") - table_2.require_partition_filter = True - table_2.labels = {"first": "one", "second": "two"} - table_2.schema = [ - SchemaField("name", "STRING", "REQUIRED"), - SchemaField("age", "INTEGER", "NULLABLE"), - ] - - assert table_1 == table_2 - def test__eq__same_table_property_different(self): table_1 = self._make_one("project_foo.dataset_bar.table_baz") table_1.description = "This is table baz" @@ -605,12 +701,6 @@ def test__eq__same_table_property_different(self): assert table_1 == table_2 # Still equal, only table reference is important. - def test__eq__different_table(self): - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_2 = self._make_one("project_foo.dataset_bar.table_baz_2") - - assert table_1 != table_2 - def test_hashable(self): table_1 = self._make_one("project_foo.dataset_bar.table_baz") table_1.description = "This is a table" @@ -1584,38 +1674,6 @@ def test_to_api_repr(self): table = self._make_one(resource) self.assertEqual(table.to_api_repr(), resource) - def test__eq__wrong_type(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table = self._make_one(resource) - - class FakeTableListItem: - project = "project_foo" - dataset_id = "dataset_bar" - table_id = "table_baz" - - not_a_table = FakeTableListItem() - - assert table != not_a_table # Can't fake it. - - def test__eq__same_table(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_1 = self._make_one(resource) - table_2 = self._make_one(resource) - - assert table_1 == table_2 - def test__eq__same_table_property_different(self): table_ref_resource = { "projectId": "project_foo", @@ -1631,40 +1689,6 @@ def test__eq__same_table_property_different(self): assert table_1 == table_2 # Still equal, only table reference is important. - def test__eq__different_table(self): - resource_1 = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_1 = self._make_one(resource_1) - - resource_2 = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_quux", - } - } - table_2 = self._make_one(resource_2) - - assert table_1 != table_2 - - def test_hashable(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_item = self._make_one(resource) - table_item_2 = self._make_one(resource) - - assert hash(table_item) == hash(table_item_2) - class TestTableClassesInterchangeability: @staticmethod From 96e6beef3c63b663b7e5879b1458f4dd1a47a5b5 Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Mon, 20 Sep 2021 08:25:42 -0700 Subject: [PATCH 76/89] feat: Add py.typed for PEP 561 compliance (#976) Type annotations were added in commit f8d4aaa335a0eef915e73596fc9b43b11d11be9f. For these annotations to be useful by library users, the package should install a py.typed file. This tells mypy and other tools to consume and use these types. For more details, see: https://mypy.readthedocs.io/en/stable/installed_packages.html#creating-pep-561-compatible-packages --- google/cloud/bigquery/py.typed | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 google/cloud/bigquery/py.typed diff --git a/google/cloud/bigquery/py.typed b/google/cloud/bigquery/py.typed new file mode 100644 index 000000000..e73777993 --- /dev/null +++ b/google/cloud/bigquery/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-bigquery package uses inline types. From 1cef0d4664bf448168b26487a71795144b7f4d6b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Sep 2021 02:18:28 -0500 Subject: [PATCH 77/89] fix: remove default timeout (#974) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Internal folks, see: go/microgenerator-retries > "Methods will **not** hedge by default." (emphasis mine) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #970 🦕 --- google/cloud/bigquery/retry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 830582322..8a86973cd 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -60,7 +60,7 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ -DEFAULT_TIMEOUT = 5.0 * 60.0 +DEFAULT_TIMEOUT = None """The default API timeout. This is the time to wait per request. To adjust the total wait time, set a From fcd633617aed0d73e5ca77edff3ddd230cc3f74d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Sep 2021 11:43:52 -0500 Subject: [PATCH 78/89] test: look for transaction info on child job, not parent job (#978) * test: look for transaction info on child job, not parent job * clarify transaction_info docstring * use sphinx method directive --- google/cloud/bigquery/job/base.py | 5 +++++ tests/system/test_client.py | 12 +++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 72db5a63c..698181092 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -357,6 +357,11 @@ def reservation_usage(self): def transaction_info(self) -> Optional[TransactionInfo]: """Information of the multi-statement transaction if this job is part of one. + Since a scripting query job can execute multiple transactions, this + property is only expected on child jobs. Use the + :meth:`google.cloud.bigquery.client.Client.list_jobs` method with the + ``parent_job`` parameter to iterate over child jobs. + .. versionadded:: 2.24.0 """ info = self._properties.get("statistics", {}).get("transactionInfo") diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 9da45ee6e..6c8da4d23 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1586,9 +1586,15 @@ def test_transaction_info(self): query_job = Config.CLIENT.query(sql) query_job.result() - # Transaction ID set by the server should be accessible - assert query_job.transaction_info is not None - assert query_job.transaction_info.transaction_id != "" + child_jobs = Config.CLIENT.list_jobs(parent_job=query_job) + begin_transaction_job = next(iter(child_jobs)) + + # Transaction ID set by the server should be accessible on the child + # job responsible for `BEGIN TRANSACTION`. It is not expected to be + # present on the parent job itself. + # https://github.com/googleapis/python-bigquery/issues/975 + assert begin_transaction_job.transaction_info is not None + assert begin_transaction_job.transaction_info.transaction_id != "" def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: From ee1e25c68fff8f35b595a832a582bdd7dd8dc73b Mon Sep 17 00:00:00 2001 From: Jeffrey Rennie Date: Tue, 21 Sep 2021 12:36:15 -0700 Subject: [PATCH 79/89] chore: relocate owl bot post processor (#979) chore: relocate owl bot post processor --- .github/.OwlBot.lock.yaml | 4 ++-- .github/.OwlBot.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index c07f148f0..2567653c0 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: - image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d + image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest + digest: sha256:87eee22d276554e4e52863ec9b1cb6a7245815dfae20439712bf644348215a5a diff --git a/.github/.OwlBot.yaml b/.github/.OwlBot.yaml index 2b6451c19..e54051157 100644 --- a/.github/.OwlBot.yaml +++ b/.github/.OwlBot.yaml @@ -13,7 +13,7 @@ # limitations under the License. docker: - image: gcr.io/repo-automation-bots/owlbot-python:latest + image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest deep-remove-regex: - /owl-bot-staging From f83c00acead70fc0ce9959eefb133a672d816277 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 24 Sep 2021 02:06:11 -0500 Subject: [PATCH 80/89] fix: disambiguate missing policy tags from explicitly unset policy tags (#983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #981 Fixes #982 Towards https://github.com/googleapis/python-bigquery-pandas/issues/387 🦕 --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/schema.py | 100 ++++++++--------- tests/system/test_client.py | 5 +- tests/unit/job/test_load_config.py | 4 - tests/unit/test_client.py | 64 ++++------- tests/unit/test_external_config.py | 9 +- tests/unit/test_schema.py | 173 +++++++++++------------------ 8 files changed, 139 insertions(+), 219 deletions(-) diff --git a/docs/reference.rst b/docs/reference.rst index d8738e67b..f4f11abc9 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -128,6 +128,7 @@ Schema :toctree: generated schema.SchemaField + schema.PolicyTagList Query diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index a7a0da3dd..931e0f3e6 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -88,6 +88,7 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row @@ -140,6 +141,7 @@ "RoutineReference", # Shared helpers "SchemaField", + "PolicyTagList", "UDFResource", "ExternalConfig", "BigtableOptions", diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 157db7ce6..5bad52273 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,12 +15,12 @@ """Schemas for BigQuery tables / queries.""" import collections -from typing import Optional +import enum +from typing import Iterable, Union from google.cloud.bigquery_v2 import types -_DEFAULT_VALUE = object() _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: @@ -49,47 +49,62 @@ """String names of the legacy SQL types to integer codes of Standard SQL types.""" +class _DefaultSentinel(enum.Enum): + """Object used as 'sentinel' indicating default value should be used. + + Uses enum so that pytype/mypy knows that this is the only possible value. + https://stackoverflow.com/a/60605919/101923 + + Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8. + https://docs.python.org/3/library/typing.html#typing.Literal + """ + + DEFAULT_VALUE = object() + + +_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE + + class SchemaField(object): """Describe a single field within a table schema. Args: - name (str): The name of the field. + name: The name of the field. - field_type (str): The type of the field. See + field_type: + The type of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type - mode (Optional[str]): The mode of the field. See + mode: + Defaults to ``'NULLABLE'``. The mode of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode - description (Optional[str]): Description for the field. + description: Description for the field. - fields (Optional[Tuple[google.cloud.bigquery.schema.SchemaField]]): - Subfields (requires ``field_type`` of 'RECORD'). + fields: Subfields (requires ``field_type`` of 'RECORD'). - policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + policy_tags: The policy tag list for the field. - precision (Optional[int]): + precision: Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. - scale (Optional[int]): + scale: Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. - max_length (Optional[int]): - Maximim length of fields with STRING or BYTES type. - + max_length: Maximum length of fields with STRING or BYTES type. """ def __init__( self, - name, - field_type, - mode="NULLABLE", - description=_DEFAULT_VALUE, - fields=(), - policy_tags=None, - precision=_DEFAULT_VALUE, - scale=_DEFAULT_VALUE, - max_length=_DEFAULT_VALUE, + name: str, + field_type: str, + mode: str = "NULLABLE", + description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, + fields: Iterable["SchemaField"] = (), + policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, + precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, ): self._properties = { "name": name, @@ -105,28 +120,12 @@ def __init__( self._properties["scale"] = scale if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length + if policy_tags is not _DEFAULT_VALUE: + self._properties["policyTags"] = ( + policy_tags.to_api_repr() if policy_tags is not None else None + ) self._fields = tuple(fields) - self._policy_tags = self._determine_policy_tags(field_type, policy_tags) - - @staticmethod - def _determine_policy_tags( - field_type: str, given_policy_tags: Optional["PolicyTagList"] - ) -> Optional["PolicyTagList"]: - """Return the given policy tags, or their suitable representation if `None`. - - Args: - field_type: The type of the schema field. - given_policy_tags: The policy tags to maybe ajdust. - """ - if given_policy_tags is not None: - return given_policy_tags - - if field_type is not None and field_type.upper() in _STRUCT_TYPES: - return None - - return PolicyTagList() - @staticmethod def __get_int(api_repr, name): v = api_repr.get(name, _DEFAULT_VALUE) @@ -152,10 +151,10 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) + policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) - policy_tags = cls._determine_policy_tags( - field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags")) - ) + if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: + policy_tags = PolicyTagList.from_api_repr(policy_tags) return cls( field_type=field_type, @@ -230,7 +229,8 @@ def policy_tags(self): """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list definition for this field. """ - return self._policy_tags + resource = self._properties.get("policyTags") + return PolicyTagList.from_api_repr(resource) if resource is not None else None def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. @@ -244,10 +244,6 @@ def to_api_repr(self) -> dict: # add this to the serialized representation. if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] - else: - # Explicitly include policy tag definition (we must not do it for RECORD - # fields, because those are not leaf fields). - answer["policyTags"] = self.policy_tags.to_api_repr() # Done; return the serialized dictionary. return answer @@ -272,7 +268,7 @@ def _key(self): field_type = f"{field_type}({self.precision})" policy_tags = ( - () if self._policy_tags is None else tuple(sorted(self._policy_tags.names)) + () if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) return ( diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 6c8da4d23..f6f95c184 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -673,14 +673,15 @@ def test_unset_table_schema_attributes(self): mode=old_field.mode, description=None, fields=old_field.fields, - policy_tags=None, + policy_tags=PolicyTagList(), ) table.schema = new_schema updated_table = Config.CLIENT.update_table(table, ["schema"]) self.assertFalse(updated_table.schema[1].description) # Empty string or None. - self.assertEqual(updated_table.schema[1].policy_tags.names, ()) + # policyTags key expected to be missing from response. + self.assertIsNone(updated_table.schema[1].policy_tags) def test_update_table_clustering_configuration(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index cbe087dac..5a0c5a83f 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -484,13 +484,11 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -503,13 +501,11 @@ def test_schema_setter_valid_mappings_list(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } schema = [full_name_repr, age_repr] config.schema = schema diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d2a75413f..eb70470b5 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1024,18 +1024,8 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query}, @@ -1069,18 +1059,8 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query, "useLegacySql": False}, @@ -2003,6 +1983,7 @@ def test_update_routine(self): def test_update_table(self): from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.table import Table path = "projects/%s/datasets/%s/tables/%s" % ( @@ -2029,7 +2010,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -2040,7 +2020,15 @@ def test_update_table(self): } ) schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + # Explicly setting policyTags to no names should be included in the sent resource. + # https://github.com/googleapis/python-bigquery/issues/981 + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + description=None, + policy_tags=PolicyTagList(names=()), + ), SchemaField( "age", "INTEGER", mode="REQUIRED", description="New field description" ), @@ -2078,7 +2066,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -2197,21 +2184,14 @@ def test_update_table_w_query(self): "type": "STRING", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "this is a column", - "policyTags": {"names": []}, - }, - { - "name": "country", - "type": "STRING", - "mode": "NULLABLE", - "policyTags": {"names": []}, }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ @@ -6795,7 +6775,13 @@ def test_load_table_from_dataframe(self): assert field["type"] == table_field.field_type assert field["mode"] == table_field.mode assert len(field.get("fields", [])) == len(table_field.fields) - assert field["policyTags"]["names"] == [] + # Avoid accidentally updating policy tags when not explicitly included. + # https://github.com/googleapis/python-bigquery/issues/981 + # Also, avoid 403 if someone has permission to write to table but + # not update policy tags by omitting policy tags we might have + # received from a get table request. + # https://github.com/googleapis/python-bigquery/pull/557 + assert "policyTags" not in field # Omit unnecessary fields when they come from getting the table # (not passed in via job_config) assert "description" not in field @@ -8069,21 +8055,18 @@ def test_schema_to_json_with_file_path(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] @@ -8116,21 +8099,18 @@ def test_schema_to_json_with_file_object(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 1f49dba5d..3dc9dd179 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -78,14 +78,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - } - ] + "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index d0b5ca54c..2180e1f6e 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -42,15 +42,40 @@ def test_constructor_defaults(self): self.assertEqual(field.mode, "NULLABLE") self.assertIsNone(field.description) self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, PolicyTagList()) + self.assertIsNone(field.policy_tags) def test_constructor_explicit(self): - field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") self.assertEqual(field.mode, "REQUIRED") self.assertEqual(field.description, "Testing") self.assertEqual(field.fields, ()) + self.assertEqual( + field.policy_tags, + PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) + + def test_constructor_explicit_none(self): + field = self._make_one("test", "STRING", description=None, policy_tags=None) + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) def test_constructor_subfields(self): sub_field1 = self._make_one("area_code", "STRING") @@ -66,20 +91,6 @@ def test_constructor_subfields(self): self.assertEqual(field.fields[0], sub_field1) self.assertEqual(field.fields[1], sub_field2) - def test_constructor_with_policy_tags(self): - from google.cloud.bigquery.schema import PolicyTagList - - policy = PolicyTagList(names=("foo", "bar")) - field = self._make_one( - "test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy - ) - self.assertEqual(field.name, "test") - self.assertEqual(field.field_type, "STRING") - self.assertEqual(field.mode, "REQUIRED") - self.assertEqual(field.description, "Testing") - self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, policy) - def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -88,17 +99,28 @@ def test_to_api_repr(self): policy.to_api_repr(), {"names": ["foo", "bar"]}, ) - field = self._make_one("foo", "INTEGER", "NULLABLE", policy_tags=policy) + field = self._make_one( + "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + ) self.assertEqual( field.to_api_repr(), { "mode": "NULLABLE", "name": "foo", "type": "INTEGER", + "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, }, ) + def test_to_api_repr_omits_unset_properties(self): + # Prevent accidentally modifying fields that aren't explicitly set. + # https://github.com/googleapis/python-bigquery/issues/981 + field = self._make_one("foo", "INTEGER") + resource = field.to_api_repr() + self.assertNotIn("description", resource) + self.assertNotIn("policyTags", resource) + def test_to_api_repr_with_subfield(self): for record_type in ("RECORD", "STRUCT"): subfield = self._make_one("bar", "INTEGER", "NULLABLE") @@ -106,14 +128,7 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "policyTags": {"names": []}, - } - ], + "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], "mode": "REQUIRED", "name": "foo", "type": record_type, @@ -163,9 +178,15 @@ def test_from_api_repr_defaults(self): self.assertEqual(field.name, "foo") self.assertEqual(field.field_type, "RECORD") self.assertEqual(field.mode, "NULLABLE") - self.assertEqual(field.description, None) self.assertEqual(len(field.fields), 0) + # Keys not present in API representation shouldn't be included in + # _properties. + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) + self.assertNotIn("description", field._properties) + self.assertNotIn("policyTags", field._properties) + def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") @@ -567,22 +588,10 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ) def test_w_description(self): @@ -608,7 +617,6 @@ def test_w_description(self): "type": "STRING", "mode": "REQUIRED", "description": DESCRIPTION, - "policyTags": {"names": []}, }, ) self.assertEqual( @@ -618,7 +626,6 @@ def test_w_description(self): "type": "INTEGER", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, ) @@ -634,13 +641,7 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -649,18 +650,8 @@ def test_w_subfields(self): "type": "RECORD", "mode": "REPEATED", "fields": [ - { - "name": "type", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "number", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, ], }, ) @@ -872,83 +863,43 @@ def test_from_api_repr_parameterized(api, expect, key2): [ ( dict(name="n", field_type="NUMERIC"), - dict(name="n", type="NUMERIC", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="NUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9,), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2,), ), ( dict(name="n", field_type="BIGNUMERIC"), - dict( - name="n", type="BIGNUMERIC", mode="NULLABLE", policyTags={"names": []} - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40,), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2,), ), ( dict(name="n", field_type="STRING"), - dict(name="n", type="STRING", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="STRING", mode="NULLABLE"), ), ( dict(name="n", field_type="STRING", max_length=9), - dict( - name="n", - type="STRING", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="STRING", mode="NULLABLE", maxLength=9,), ), ( dict(name="n", field_type="BYTES"), - dict(name="n", type="BYTES", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="BYTES", mode="NULLABLE"), ), ( dict(name="n", field_type="BYTES", max_length=9), - dict( - name="n", - type="BYTES", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9,), ), ], ) From 71cc728a96d617a109cdd063262a74ed46e8978a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 27 Sep 2021 10:43:17 +0200 Subject: [PATCH 81/89] chore: release 2.27.0 (#965) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 20 ++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3cb6bee..80a195b1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,26 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.27.0](https://www.github.com/googleapis/python-bigquery/compare/v2.26.0...v2.27.0) (2021-09-24) + + +### Features + +* Add py.typed for PEP 561 compliance ([#976](https://www.github.com/googleapis/python-bigquery/issues/976)) ([96e6bee](https://www.github.com/googleapis/python-bigquery/commit/96e6beef3c63b663b7e5879b1458f4dd1a47a5b5)) +* include key metadata in Job representation ([#964](https://www.github.com/googleapis/python-bigquery/issues/964)) ([acca1cb](https://www.github.com/googleapis/python-bigquery/commit/acca1cb7baaa3b00508246c994ade40314d421c3)) + + +### Bug Fixes + +* Arrow extension-type metadata was not set when calling the REST API or when there are no rows ([#946](https://www.github.com/googleapis/python-bigquery/issues/946)) ([864383b](https://www.github.com/googleapis/python-bigquery/commit/864383bc01636b3774f7da194587b8b7edd0383d)) +* disambiguate missing policy tags from explicitly unset policy tags ([#983](https://www.github.com/googleapis/python-bigquery/issues/983)) ([f83c00a](https://www.github.com/googleapis/python-bigquery/commit/f83c00acead70fc0ce9959eefb133a672d816277)) +* remove default timeout ([#974](https://www.github.com/googleapis/python-bigquery/issues/974)) ([1cef0d4](https://www.github.com/googleapis/python-bigquery/commit/1cef0d4664bf448168b26487a71795144b7f4d6b)) + + +### Documentation + +* simplify destination table sample with f-strings ([#966](https://www.github.com/googleapis/python-bigquery/issues/966)) ([ab6e76f](https://www.github.com/googleapis/python-bigquery/commit/ab6e76f9489262fd9c1876a1c4f93d7e139aa999)) + ## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 1f7d79ab9..4f038c462 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.26.0" +__version__ = "2.27.0" From 39030f26ce081cfacd456b84694c68e3f04ed48d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 27 Sep 2021 14:01:04 -0500 Subject: [PATCH 82/89] fix: remove py.typed since package fails mypy check (#988) This reverts commit 96e6beef3c63b663b7e5879b1458f4dd1a47a5b5. --- google/cloud/bigquery/py.typed | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 google/cloud/bigquery/py.typed diff --git a/google/cloud/bigquery/py.typed b/google/cloud/bigquery/py.typed deleted file mode 100644 index e73777993..000000000 --- a/google/cloud/bigquery/py.typed +++ /dev/null @@ -1,2 +0,0 @@ -# Marker file for PEP 561. -# The google-cloud-bigquery package uses inline types. From 3e32c1a3ecb8392a3b9c2eac8abef29d4075266b Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 27 Sep 2021 19:22:42 +0000 Subject: [PATCH 83/89] chore: release 2.27.1 (#989) :robot: I have created a release \*beep\* \*boop\* --- ### [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27) ### Bug Fixes * remove py.typed since package fails mypy check ([#988](https://www.github.com/googleapis/python-bigquery/issues/988)) ([39030f2](https://www.github.com/googleapis/python-bigquery/commit/39030f26ce081cfacd456b84694c68e3f04ed48d)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80a195b1e..d531ec477 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27) + + +### Bug Fixes + +* remove py.typed since package fails mypy check ([#988](https://www.github.com/googleapis/python-bigquery/issues/988)) ([39030f2](https://www.github.com/googleapis/python-bigquery/commit/39030f26ce081cfacd456b84694c68e3f04ed48d)) + ## [2.27.0](https://www.github.com/googleapis/python-bigquery/compare/v2.26.0...v2.27.0) (2021-09-24) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 4f038c462..3e5c77ede 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.27.0" +__version__ = "2.27.1" From ea50e8031fc035b3772a338bc00982de263cefad Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 28 Sep 2021 04:44:59 -0500 Subject: [PATCH 84/89] docs: link to stable pandas docs (#990) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: link to stable pandas docs * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * trailing slash * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot --- docs/conf.py | 2 +- owlbot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 07e5d8c30..329951636 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -366,7 +366,7 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), - "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), "geopandas": ("https://geopandas.org/", None), } diff --git a/owlbot.py b/owlbot.py index 09aa8ca6f..c39433d3c 100644 --- a/owlbot.py +++ b/owlbot.py @@ -98,7 +98,7 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": "http://pandas.pydata.org/pandas-docs/dev", + "pandas": "http://pandas.pydata.org/pandas-docs/stable/", "geopandas": "https://geopandas.org/", }, ) From 2ca58dc1cb206864a4d35c23d6e32f1a7452c7b4 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 28 Sep 2021 06:10:27 -0400 Subject: [PATCH 85/89] chore: exclude 'CODEOWNERS' from templated files (#986) See: https://github.com/googleapis/synthtool/pull/1201 --- owlbot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/owlbot.py b/owlbot.py index c39433d3c..ace99e687 100644 --- a/owlbot.py +++ b/owlbot.py @@ -110,6 +110,7 @@ "noxfile.py", "docs/multiprocessing.rst", ".coveragerc", + ".github/CODEOWNERS", # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", From a789d00c00b93e06458a4e1cc1b240a9503f9ec9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 28 Sep 2021 17:57:00 +0200 Subject: [PATCH 86/89] chore(deps): update all dependencies (#963) --- samples/geography/requirements.txt | 20 ++++++++++---------- samples/snippets/requirements.txt | 6 +++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index a4b4ed692..f808c5ec2 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -2,7 +2,7 @@ attrs==21.2.0 cachetools==4.2.2 certifi==2021.5.30 cffi==1.14.6 -charset-normalizer==2.0.4 +charset-normalizer==2.0.6 click==8.0.1 click-plugins==1.1.1 cligj==0.7.2 @@ -11,17 +11,17 @@ Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 google-api-core==2.0.1 -google-auth==2.0.2 -google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.7.0 +google-auth==2.2.0 +google-cloud-bigquery==2.27.1 +google-cloud-bigquery-storage==2.9.0 google-cloud-core==2.0.0 -google-crc32c==1.1.2 -google-resumable-media==2.0.2 +google-crc32c==1.2.0 +google-resumable-media==2.0.3 googleapis-common-protos==1.53.0 -grpcio==1.40.0 +grpcio==1.41.0 idna==3.2 importlib-metadata==4.8.1 -libcst==0.3.20 +libcst==0.3.21 munch==2.5.0 mypy-extensions==0.4.3 numpy==1.19.5; python_version < "3.7" @@ -30,7 +30,7 @@ packaging==21.0 pandas==1.1.5; python_version < '3.7' pandas==1.3.2; python_version >= '3.7' proto-plus==1.19.0 -protobuf==3.17.3 +protobuf==3.18.0 pyarrow==5.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 @@ -47,5 +47,5 @@ Shapely==1.7.1 six==1.16.0 typing-extensions==3.10.0.2 typing-inspect==0.7.1 -urllib3==1.26.6 +urllib3==1.26.7 zipp==3.5.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f575e41b1..f9b9d023c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ -google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.7.0 +google-cloud-bigquery==2.27.1 +google-cloud-bigquery-storage==2.9.0 google-auth-oauthlib==0.4.6 -grpcio==1.40.0 +grpcio==1.41.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 19e98127fa2b23800499d594e4c9bfab34c517c1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 30 Sep 2021 16:35:03 +0200 Subject: [PATCH 87/89] Fix coverage (unnecessary helper in test) --- tests/unit/test__helpers.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index e439a7bbe..2377be79c 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -32,12 +32,6 @@ def _object_under_test(self): return _helpers.BQStorageVersions() - def _call_fut(self): - from google.cloud.bigquery import _helpers - - # Reset any cached versions since it may not match reality. - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() From fc6f7fcf9873311c921ded4edda68d2c3d08aaed Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 30 Sep 2021 17:29:32 +0200 Subject: [PATCH 88/89] Remove unnecessary fields from TableReference --- google/cloud/bigquery/table.py | 82 ++-------------------------------- tests/unit/test_table.py | 51 +-------------------- 2 files changed, 5 insertions(+), 128 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index bfcf1d731..e0b2dd95f 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -21,7 +21,7 @@ import functools import operator import typing -from typing import Any, Dict, Iterable, Iterator, Optional, Sequence, Tuple +from typing import Any, Dict, Iterable, Iterator, Optional, Tuple import warnings try: @@ -184,34 +184,15 @@ class TableReference(_TableBase): A pointer to the dataset table_id: The ID of the table - project_id_alternative: - The alternative field that will be used when ESF is not able - to translate the received data to the project_id field. - dataset_id_alternative: - The alternative field that will be used when ESF is not able - to translate the received data to the project_id field. - table_id_alternative: - The alternative field that will be used when ESF is not able - to translate the received data to the project_id field. """ _PROPERTY_TO_API_FIELD = { "dataset_id": "datasetId", "project": "projectId", "table_id": "tableId", - "project_id_alternative": "projectIdAlternative", - "dataset_id_alternative": "datasetIdAlternative", - "table_id_alternative": "tableIdAlternative", } - def __init__( - self, - dataset_ref: "DatasetReference", - table_id: str, - project_id_alternative: Sequence[str] = None, - dataset_id_alternative: Sequence[str] = None, - table_id_alternative: Sequence[str] = None, - ): + def __init__(self, dataset_ref: "DatasetReference", table_id: str): self._properties = {} _helpers._set_sub_prop( @@ -228,27 +209,6 @@ def __init__( self._properties, self._PROPERTY_TO_API_FIELD["table_id"], table_id, ) - if project_id_alternative is not None: - _helpers._set_sub_prop( - self._properties, - self._PROPERTY_TO_API_FIELD["project_id_alternative"], - project_id_alternative, - ) - - if dataset_id_alternative is not None: - _helpers._set_sub_prop( - self._properties, - self._PROPERTY_TO_API_FIELD["dataset_id_alternative"], - dataset_id_alternative, - ) - - if table_id_alternative is not None: - _helpers._set_sub_prop( - self._properties, - self._PROPERTY_TO_API_FIELD["table_id_alternative"], - table_id_alternative, - ) - @classmethod def from_string( cls, table_id: str, default_project: str = None @@ -308,43 +268,7 @@ def from_api_repr(cls, resource: dict) -> "TableReference": dataset_id = resource["datasetId"] table_id = resource["tableId"] - return cls( - DatasetReference(project, dataset_id), - table_id, - project_id_alternative=resource.get("projectIdAlternative"), - dataset_id_alternative=resource.get("datasetIdAlternative"), - table_id_alternative=resource.get("tableIdAlternative"), - ) - - @property - def project_id_alternative(self) -> Sequence[str]: - """The alternative project IDs.""" - value = _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["project_id_alternative"] - ) - if value is None: - value = [] - return value - - @property - def dataset_id_alternative(self) -> Sequence[str]: - """The alternative dataset IDs.""" - value = _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["dataset_id_alternative"] - ) - if value is None: - value = [] - return value - - @property - def table_id_alternative(self) -> Sequence[str]: - """The alternative table IDs.""" - value = _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["table_id_alternative"] - ) - if value is None: - value = [] - return value + return cls(DatasetReference(project, dataset_id), table_id) def to_api_repr(self) -> dict: """Construct the API resource representation of this table reference. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 2f4e879af..a34b0d56b 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -292,32 +292,16 @@ def test_ctor_defaults(self): table_ref = self._make_one(dataset_ref, "table_1") self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id) self.assertEqual(table_ref.table_id, "table_1") - self.assertEqual(table_ref.project_id_alternative, []) - self.assertEqual(table_ref.dataset_id_alternative, []) - self.assertEqual(table_ref.table_id_alternative, []) def test_to_api_repr(self): dataset_ref = DatasetReference("project_1", "dataset_1") - table_ref = self._make_one( - dataset_ref, - "table_1", - project_id_alternative=["p1", "p2"], - dataset_id_alternative=["d1", "d2"], - table_id_alternative=["t1", "t2"], - ) + table_ref = self._make_one(dataset_ref, "table_1") resource = table_ref.to_api_repr() self.assertEqual( resource, - { - "projectId": "project_1", - "datasetId": "dataset_1", - "tableId": "table_1", - "projectIdAlternative": ["p1", "p2"], - "datasetIdAlternative": ["d1", "d2"], - "tableIdAlternative": ["t1", "t2"], - }, + {"projectId": "project_1", "datasetId": "dataset_1", "tableId": "table_1"}, ) def test_from_api_repr(self): @@ -330,39 +314,8 @@ def test_from_api_repr(self): {"projectId": "project_1", "datasetId": "dataset_1", "tableId": "table_1"} ) - self.assertEqual(expected, got) # TODO: overide EQ? - - def test_from_api_repr_full(self): - from google.cloud.bigquery.table import TableReference - - dataset_ref = DatasetReference("project_1", "dataset_1") - expected = self._make_one( - dataset_ref, - "table_1", - project_id_alternative=["p1", "p2"], - dataset_id_alternative=["d1", "d2"], - table_id_alternative=["t1", "t2"], - ) - - got = TableReference.from_api_repr( - { - "projectId": "project_1", - "datasetId": "dataset_1", - "tableId": "table_1", - "projectIdAlternative": ["p1", "p2"], - "datasetIdAlternative": ["d1", "d2"], - "tableIdAlternative": ["t1", "t2"], - } - ) - self.assertEqual(expected, got) - # additional attributes are isgnored when comapring for equality, thus perform - # those checks separately. - self.assertEqual(got.project_id_alternative, ["p1", "p2"]) - self.assertEqual(got.dataset_id_alternative, ["d1", "d2"]) - self.assertEqual(got.table_id_alternative, ["t1", "t2"]) - def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset.string_table") From bdacef21a1ab151d428f837e7e782b6b72de5700 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 30 Sep 2021 12:42:58 -0500 Subject: [PATCH 89/89] Apply suggestions from code review --- google/cloud/bigquery/client.py | 2 -- google/cloud/bigquery/table.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 754a704d3..a738dd0f3 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -116,8 +116,6 @@ TIMEOUT_HEADER = "X-Server-Timeout" -TIMEOUT_HEADER = "X-Server-Timeout" - class Project(object): """Wrapper for resource describing a BigQuery project. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index e0b2dd95f..75901afb4 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1690,8 +1690,6 @@ def to_arrow( """ self._maybe_warn_max_results(bqstorage_client) - self._maybe_warn_max_results(bqstorage_client) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None