Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 BigQuery destination mlp #11238

Merged
merged 21 commits into from
Mar 23, 2022
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ ENV ENABLE_SENTRY true

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=0.6.12
LABEL io.airbyte.version=0.6.13
andriikorotkov marked this conversation as resolved.
Show resolved Hide resolved
LABEL io.airbyte.name=airbyte/destination-bigquery
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
"additionalProperties": true,
"properties": {
"big_query_client_buffer_size_mb": {
"title": "Google BigQuery client chunk size",
"description": "Google BigQuery client's chunk(buffer) size (MIN=1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. The default 15MiB value is used if not set explicitly. It's recommended to decrease value for big data sets migration for less HEAP memory consumption and avoiding crashes. For more details refer to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html",
"title": "Google BigQuery Client Chunk Size (Optional)",
"description": "Google BigQuery client's chunk (buffer) size (MIN=1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. The default 15MB value is used if not set explicitly. Read more <a href=\"https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html\">here</a>.",
"type": "integer",
"minimum": 1,
"maximum": 15,
Expand All @@ -22,18 +22,18 @@
},
"project_id": {
"type": "string",
"description": "The GCP project ID for the project containing the target BigQuery dataset.",
"description": "The GCP project ID for the project containing the target BigQuery dataset. Read more <a href=\"https://cloud.google.com/iam/docs/creating-managing-service-accounts#creating\">here</a>.",
"title": "Project ID"
},
"dataset_id": {
"type": "string",
"description": "Default BigQuery Dataset ID tables are replicated to if the source does not specify a namespace.",
"description": "The default BigQuery Dataset ID that tables are replicated to if the source does not specify a namespace. Read more <a href=\"https://cloud.google.com/bigquery/docs/datasets#create-dataset\">here</a>.",
"title": "Default Dataset ID"
},
"dataset_location": {
"type": "string",
"description": "The location of the dataset. Warning: Changes made after creation will not be applied.",
"title": "Dataset Location",
"description": "The location of the dataset. Warning: Changes made after creation will not be applied. The default \"US\" value is used if not set explicitly. Read more <a href=\"https://cloud.google.com/bigquery/docs/locations\">here</a>.",
"title": "Dataset Location (Optional)",
"default": "US",
"enum": [
"US",
Expand Down Expand Up @@ -71,26 +71,25 @@
},
"credentials_json": {
"type": "string",
"description": "The contents of the JSON service account key. Check out the <a href=\"https://docs.airbyte.io/integrations/destinations/bigquery\">docs</a> if you need help generating this key. Default credentials will be used if this field is left empty.",
"title": "Credentials JSON",
"description": "The contents of the JSON service account key. Check out the <a href=\"https://docs.airbyte.com/integrations/destinations/bigquery#service-account-key\">docs</a> if you need help generating this key. Default credentials will be used if this field is left empty.",
"title": "Credentials JSON (Optional)",
"airbyte_secret": true
},
"transformation_priority": {
"type": "string",
"description": "Interactive run type means that the query is executed as soon as possible, and these queries count towards concurrent rate limit and daily limit. Batch queries are queued and started as soon as idle resources are available in the BigQuery shared resource pool, which usually occurs within a few minutes. Batch queries don’t count towards your concurrent rate limit.",
"title": "Transformation Query Run Type",
"description": "Interactive run type means that the query is executed as soon as possible, and these queries count towards concurrent rate limit and daily limit. Read more about interactive run type <a href=\"https://cloud.google.com/bigquery/docs/running-queries#queries\">here</a>. Batch queries are queued and started as soon as idle resources are available in the BigQuery shared resource pool, which usually occurs within a few minutes. Batch queries don’t count towards your concurrent rate limit. Read more about batch queries <a href=\"https://cloud.google.com/bigquery/docs/running-queries#batch\">here</a>. The default \"interactive\" value is used if not set explicitly.",
"title": "Transformation Query Run Type (Optional)",
"default": "interactive",
"enum": ["interactive", "batch"]
},
"loading_method": {
"type": "object",
"title": "Loading Method",
"description": "Loading method used to send select the way data will be uploaded to BigQuery.",
"description": "Loading method used to send select the way data will be uploaded to BigQuery. <br><b>Standard Inserts</b> - Direct uploading using SQL INSERT statements. This method is extremely inefficient and provided only for quick testing. In almost all cases, you should use staging. <br><b>GCS Staging</b> - Writes large batches of records to a file, uploads the file to GCS, then uses <b>COPY INTO table</b> to upload the file. Recommended for most workloads for better speed and scalability. Read more about GCS Staging <a href=\"https://docs.airbyte.com/integrations/destinations/bigquery#gcs-staging\">here</a>.",
"oneOf": [
{
"title": "Standard Inserts",
"additionalProperties": false,
"description": "Direct uploading using streams.",
"required": ["method"],
"properties": {
"method": {
Expand All @@ -102,7 +101,6 @@
{
"title": "GCS Staging",
"additionalProperties": false,
"description": "Writes large batches of records to a file, uploads the file to GCS, then uses <pre>COPY INTO table</pre> to upload the file. Recommended for large production workloads for better speed and scalability.",
"required": [
"method",
"gcs_bucket_name",
Expand All @@ -117,16 +115,17 @@
"gcs_bucket_name": {
"title": "GCS Bucket Name",
"type": "string",
"description": "The name of the GCS bucket.",
"description": "The name of the GCS bucket. Read more <a href=\"https://cloud.google.com/storage/docs/naming-buckets\">here</a>.",
"examples": ["airbyte_sync"]
},
"gcs_bucket_path": {
"title": "GCS Bucket Path",
"description": "Directory under the GCS bucket where data will be written.",
"type": "string",
"examples": ["data_sync/test"]
},
"part_size_mb": {
"title": "Block Size (MB) for GCS multipart upload",
"title": "Block Size (MB) for GCS Multipart Upload (Optional)",
"description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes more memory. Allowed values: min=5MB, max=525MB Default: 5MB.",
"type": "integer",
"default": 5,
Expand All @@ -136,8 +135,8 @@
},
"keep_files_in_gcs-bucket": {
"type": "string",
"description": "This upload method is supposed to temporary store records in GCS bucket. What do you want to do with data in GCS bucket when migration has finished?",
"title": "GCS tmp files afterward processing",
"description": "This upload method is supposed to temporary store records in GCS bucket. What do you want to do with data in GCS bucket when migration has finished? The default \"Delete all tmp files from GCS\" value is used if not set explicitly.",
"title": "GCS Tmp Files Afterward Processing (Optional)",
"default": "Delete all tmp files from GCS",
"enum": [
"Delete all tmp files from GCS",
Expand All @@ -146,6 +145,7 @@
},
"credential": {
"title": "Credential",
"description": "An HMAC key is a type of credential and can be associated with a service account or a user account in Cloud Storage. Read more <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys\">here</a>.",
"type": "object",
"oneOf": [
{
Expand Down
Loading