Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: merge recent changes from master #823

Merged
merged 5 commits into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/sync-repo-settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
branchProtectionRules:
# Identifies the protection rule pattern. Name of the branch to be protected.
# Defaults to `master`
- pattern: master
- pattern: '{master,v3}'
requiredStatusCheckContexts:
- 'Kokoro'
- 'Kokoro snippets-3.8'
Expand Down
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,24 @@
[1]: https://pypi.org/project/google-cloud-bigquery/#history


## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27)


### Features

* Update proto definitions for bigquery/v2 to support new proto fields for BQML. ([#817](https://www.github.com/googleapis/python-bigquery/issues/817)) ([fe7a902](https://www.github.com/googleapis/python-bigquery/commit/fe7a902e8b3e723ace335c9b499aea6d180a025b))


### Bug Fixes

* no longer raise a warning in `to_dataframe` if `max_results` set ([#815](https://www.github.com/googleapis/python-bigquery/issues/815)) ([3c1be14](https://www.github.com/googleapis/python-bigquery/commit/3c1be149e76b1d1d8879fdcf0924ddb1c1839e94))
* retry ChunkedEncodingError by default ([#802](https://www.github.com/googleapis/python-bigquery/issues/802)) ([419d36d](https://www.github.com/googleapis/python-bigquery/commit/419d36d6b1887041e5795dbc8fc808890e91ab11))


### Documentation

* correct docs for `LoadJobConfig.destination_table_description` ([#810](https://www.github.com/googleapis/python-bigquery/issues/810)) ([da87fd9](https://www.github.com/googleapis/python-bigquery/commit/da87fd921cc8067b187d7985c978aac8eb58d107))

### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22)


Expand Down
30 changes: 25 additions & 5 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1544,11 +1544,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client):
return False

if self.max_results is not None:
warnings.warn(
"Cannot use bqstorage_client if max_results is set, "
"reverting to fetching data with the REST endpoint.",
stacklevel=2,
)
return False

return True
Expand Down Expand Up @@ -1585,6 +1580,25 @@ def total_rows(self):
"""int: The total number of rows in the table."""
return self._total_rows

def _maybe_warn_max_results(
self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"],
):
"""Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set.

This helper method should be used directly in the relevant top-level public
methods, so that the warning is issued for the correct line in user code.

Args:
bqstorage_client:
The BigQuery Storage client intended to use for downloading result rows.
"""
if bqstorage_client is not None and self.max_results is not None:
warnings.warn(
"Cannot use bqstorage_client if max_results is set, "
"reverting to fetching data with the REST endpoint.",
stacklevel=3,
)

def _to_page_iterable(
self, bqstorage_download, tabledata_list_download, bqstorage_client=None
):
Expand Down Expand Up @@ -1674,6 +1688,8 @@ def to_arrow(

.. versionadded:: 1.17.0
"""
self._maybe_warn_max_results(bqstorage_client)

if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
create_bqstorage_client = False
bqstorage_client = None
Expand Down Expand Up @@ -1763,6 +1779,8 @@ def to_dataframe_iterable(
if dtypes is None:
dtypes = {}

self._maybe_warn_max_results(bqstorage_client)

column_names = [field.name for field in self._schema]
bqstorage_download = functools.partial(
_pandas_helpers.download_dataframe_bqstorage,
Expand Down Expand Up @@ -1866,6 +1884,8 @@ def to_dataframe(
if dtypes is None:
dtypes = {}

self._maybe_warn_max_results(bqstorage_client)

if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
create_bqstorage_client = False
bqstorage_client = None
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/bigquery/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "2.22.1"
__version__ = "2.23.0"
104 changes: 95 additions & 9 deletions google/cloud/bigquery_v2/types/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ class Model(proto.Message):
Output only. Label columns that were used to train this
model. The output of the model will have a `predicted_`
prefix to these columns.
best_trial_id (int):
The best trial_id across all training runs.
"""

class ModelType(proto.Enum):
Expand All @@ -113,6 +115,7 @@ class ModelType(proto.Enum):
ARIMA = 11
AUTOML_REGRESSOR = 12
AUTOML_CLASSIFIER = 13
ARIMA_PLUS = 19

class LossType(proto.Enum):
r"""Loss metric to evaluate model training performance."""
Expand Down Expand Up @@ -151,6 +154,7 @@ class DataFrequency(proto.Enum):
WEEKLY = 5
DAILY = 6
HOURLY = 7
PER_MINUTE = 8

class HolidayRegion(proto.Enum):
r"""Type of supported holiday regions for time series forecasting
Expand Down Expand Up @@ -285,7 +289,7 @@ class RegressionMetrics(proto.Message):
median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue):
Median absolute error.
r_squared (google.protobuf.wrappers_pb2.DoubleValue):
R^2 score.
R^2 score. This corresponds to r2_score in ML.EVALUATE.
"""

mean_absolute_error = proto.Field(
Expand Down Expand Up @@ -528,7 +532,7 @@ class ClusteringMetrics(proto.Message):
Mean of squared distances between each sample
to its cluster centroid.
clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]):
[Beta] Information for all clusters.
Information for all clusters.
"""

class Cluster(proto.Message):
Expand Down Expand Up @@ -697,10 +701,29 @@ class ArimaSingleModelForecastingMetrics(proto.Message):
Is arima model fitted with drift or not. It
is always false when d is not 1.
time_series_id (str):
The id to indicate different time series.
The time_series_id value for this time series. It will be
one of the unique values from the time_series_id_column
specified during ARIMA model training. Only present when
time_series_id_column training option was used.
time_series_ids (Sequence[str]):
The tuple of time_series_ids identifying this time series.
It will be one of the unique tuples of values present in the
time_series_id_columns specified during ARIMA model
training. Only present when time_series_id_columns training
option was used and the order of values here are same as the
order of time_series_id_columns.
seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]):
Seasonal periods. Repeated because multiple
periods are supported for one time series.
has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue):
If true, holiday_effect is a part of time series
decomposition result.
has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue):
If true, spikes_and_dips is a part of time series
decomposition result.
has_step_changes (google.protobuf.wrappers_pb2.BoolValue):
If true, step_changes is a part of time series decomposition
result.
"""

non_seasonal_order = proto.Field(
Expand All @@ -711,9 +734,19 @@ class ArimaSingleModelForecastingMetrics(proto.Message):
)
has_drift = proto.Field(proto.BOOL, number=3,)
time_series_id = proto.Field(proto.STRING, number=4,)
time_series_ids = proto.RepeatedField(proto.STRING, number=9,)
seasonal_periods = proto.RepeatedField(
proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType",
)
has_holiday_effect = proto.Field(
proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue,
)
has_spikes_and_dips = proto.Field(
proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue,
)
has_step_changes = proto.Field(
proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue,
)

non_seasonal_order = proto.RepeatedField(
proto.MESSAGE, number=1, message="Model.ArimaOrder",
Expand Down Expand Up @@ -901,7 +934,7 @@ class TrainingRun(proto.Message):
"""

class TrainingOptions(proto.Message):
r"""
r"""Options used in model training.
Attributes:
max_iterations (int):
The maximum number of iterations in training.
Expand Down Expand Up @@ -972,8 +1005,9 @@ class TrainingOptions(proto.Message):
num_clusters (int):
Number of clusters for clustering models.
model_uri (str):
[Beta] Google Cloud Storage URI from which the model was
imported. Only applicable for imported models.
Google Cloud Storage URI from which the model
was imported. Only applicable for imported
models.
optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy):
Optimization strategy for training linear
regression models.
Expand Down Expand Up @@ -1030,8 +1064,11 @@ class TrainingOptions(proto.Message):
If a valid value is specified, then holiday
effects modeling is enabled.
time_series_id_column (str):
The id column that will be used to indicate
different time series to forecast in parallel.
The time series id column that was used
during ARIMA model training.
time_series_id_columns (Sequence[str]):
The time series id columns that were used
during ARIMA model training.
horizon (int):
The number of periods ahead that need to be
forecasted.
Expand All @@ -1042,6 +1079,15 @@ class TrainingOptions(proto.Message):
output feature name is A.b.
auto_arima_max_order (int):
The max value of non-seasonal p and q.
decompose_time_series (google.protobuf.wrappers_pb2.BoolValue):
If true, perform decompose time series and
save the results.
clean_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue):
If true, clean spikes and dips in the input
time series.
adjust_step_changes (google.protobuf.wrappers_pb2.BoolValue):
If true, detect step changes and make data
adjustment in the input time series.
"""

max_iterations = proto.Field(proto.INT64, number=1,)
Expand Down Expand Up @@ -1120,9 +1166,19 @@ class TrainingOptions(proto.Message):
proto.ENUM, number=42, enum="Model.HolidayRegion",
)
time_series_id_column = proto.Field(proto.STRING, number=43,)
time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,)
horizon = proto.Field(proto.INT64, number=44,)
preserve_input_structs = proto.Field(proto.BOOL, number=45,)
auto_arima_max_order = proto.Field(proto.INT64, number=46,)
decompose_time_series = proto.Field(
proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue,
)
clean_spikes_and_dips = proto.Field(
proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue,
)
adjust_step_changes = proto.Field(
proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue,
)

class IterationResult(proto.Message):
r"""Information about a single iteration of the training run.
Expand Down Expand Up @@ -1218,10 +1274,29 @@ class ArimaModelInfo(proto.Message):
Whether Arima model fitted with drift or not.
It is always false when d is not 1.
time_series_id (str):
The id to indicate different time series.
The time_series_id value for this time series. It will be
one of the unique values from the time_series_id_column
specified during ARIMA model training. Only present when
time_series_id_column training option was used.
time_series_ids (Sequence[str]):
The tuple of time_series_ids identifying this time series.
It will be one of the unique tuples of values present in the
time_series_id_columns specified during ARIMA model
training. Only present when time_series_id_columns training
option was used and the order of values here are same as the
order of time_series_id_columns.
seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]):
Seasonal periods. Repeated because multiple
periods are supported for one time series.
has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue):
If true, holiday_effect is a part of time series
decomposition result.
has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue):
If true, spikes_and_dips is a part of time series
decomposition result.
has_step_changes (google.protobuf.wrappers_pb2.BoolValue):
If true, step_changes is a part of time series decomposition
result.
"""

non_seasonal_order = proto.Field(
Expand All @@ -1237,11 +1312,21 @@ class ArimaModelInfo(proto.Message):
)
has_drift = proto.Field(proto.BOOL, number=4,)
time_series_id = proto.Field(proto.STRING, number=5,)
time_series_ids = proto.RepeatedField(proto.STRING, number=10,)
seasonal_periods = proto.RepeatedField(
proto.ENUM,
number=6,
enum="Model.SeasonalPeriod.SeasonalPeriodType",
)
has_holiday_effect = proto.Field(
proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue,
)
has_spikes_and_dips = proto.Field(
proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue,
)
has_step_changes = proto.Field(
proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue,
)

arima_model_info = proto.RepeatedField(
proto.MESSAGE,
Expand Down Expand Up @@ -1319,6 +1404,7 @@ class ArimaModelInfo(proto.Message):
label_columns = proto.RepeatedField(
proto.MESSAGE, number=11, message=standard_sql.StandardSqlField,
)
best_trial_id = proto.Field(proto.INT64, number=19,)


class GetModelRequest(proto.Message):
Expand Down
12 changes: 12 additions & 0 deletions google/cloud/bigquery_v2/types/table_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,23 @@ class TableReference(proto.Message):
maximum length is 1,024 characters. Certain operations allow
suffixing of the table ID with a partition decorator, such
as ``sample_table$20190123``.
project_id_alternative (Sequence[str]):
The alternative field that will be used when ESF is not able
to translate the received data to the project_id field.
dataset_id_alternative (Sequence[str]):
The alternative field that will be used when ESF is not able
to translate the received data to the project_id field.
table_id_alternative (Sequence[str]):
The alternative field that will be used when ESF is not able
to translate the received data to the project_id field.
"""

project_id = proto.Field(proto.STRING, number=1,)
dataset_id = proto.Field(proto.STRING, number=2,)
table_id = proto.Field(proto.STRING, number=3,)
project_id_alternative = proto.RepeatedField(proto.STRING, number=4,)
dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,)
table_id_alternative = proto.RepeatedField(proto.STRING, number=6,)


__all__ = tuple(sorted(__protobuf__.manifest))
Loading