Skip to content

Commit

Permalink
feat(components/google-cloud): add preprocessing_bigquery_dataset to …
Browse files Browse the repository at this point in the history
…the inputs of preprocessing component (#6461)
  • Loading branch information
yzhaozh authored Aug 27, 2021
1 parent d2a1e63 commit c127996
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
"""Google Cloud Pipeline Experimental Forecasting Components."""

import os
from typing import Optional

from kfp.components import load_component_from_file

__all__ = [
Expand All @@ -22,7 +24,10 @@
]


def ForecastingPreprocessingOp(project_id: str, input_tables: str):
def ForecastingPreprocessingOp(
project_id: str,
input_tables: str,
preprocessing_bigquery_dataset: Optional[str] = None):
"""Preprocesses BigQuery tables for training or prediction.
Creates a BigQuery table for training or prediction based on the input tables.
Expand All @@ -34,6 +39,9 @@ def ForecastingPreprocessingOp(project_id: str, input_tables: str):
project_id (str): The GCP project id that runs the pipeline.
input_tables (str): Serialized Json array that specifies input BigQuery
tables and specs.
preprocessing_bigquery_dataset (Optional[str]): Optional BigQuery dataset
to save the preprocessing result BigQuery table. Not not present, a new
dataset will be created by the component.
Returns:
None
Expand All @@ -43,7 +51,9 @@ def ForecastingPreprocessingOp(project_id: str, input_tables: str):
return load_component_from_file(
os.path.join(
os.path.dirname(__file__), 'preprocess/component.yaml'))(
project_id=project_id, input_tables=input_tables)
project_id=project_id,
input_tables=input_tables,
preprocessing_bigquery_dataset=preprocessing_bigquery_dataset)


def ForecastingValidationOp(input_tables: str, validation_theme: str):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ name: Forecasting Preprocessing
inputs:
- {name: project, type: String}
- {name: input_tables, type: String}
- {name: preprocessing_bigquery_dataset, type: String}
outputs:
- {name: preprocess_metadata, type: String}
implementation:
Expand All @@ -30,5 +31,7 @@ implementation:
- {inputValue: project}
- --input_table_specs
- {inputValue: input_tables}
- --bigquery_dataset_id
- {inputValue: preprocessing_bigquery_dataset}
- --preprocess_metadata_path
- {outputPath: preprocess_metadata}

0 comments on commit c127996

Please sign in to comment.