From d9cdc99c9602103f5dbf6a78c1d83127983e7c5c Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Sun, 15 May 2022 01:28:36 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=9D=20Destination=20bigquery:=20mark?= =?UTF-8?q?=20service=20account=20as=20required=20for=20cloud=20(#12768)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update spec * Update doc * Bump version and update changelog * Modify wording * Add sample service account key json * Add screenshots and common permission issues * Refactor service account helper method * Update log message * Update version date in changelog * auto-bump connector version * auto-bump connector version Co-authored-by: Octavia Squidington III --- .../seed/destination_definitions.yaml | 4 +- .../resources/seed/destination_specs.yaml | 8 +- .../Dockerfile | 2 +- .../src/main/resources/spec.json | 2 +- .../destination-bigquery/Dockerfile | 2 +- .../bigquery/BigQueryDestination.java | 38 ++-- .../destination/bigquery/BigQueryUtils.java | 10 +- .../src/main/resources/spec.json | 2 +- .../bigquery/BigQueryUtilsTest.java | 38 +++- docs/integrations/destinations/bigquery.md | 163 +++++++++++------- 10 files changed, 172 insertions(+), 97 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 8e7e095df065..e7270004d4be 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -27,7 +27,7 @@ - name: BigQuery destinationDefinitionId: 22f6c74f-5699-40ff-833c-4a879ea40133 dockerRepository: airbyte/destination-bigquery - dockerImageTag: 1.1.5 + dockerImageTag: 1.1.6 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg resourceRequirements: @@ -40,7 +40,7 @@ - name: BigQuery (denormalized typed struct) destinationDefinitionId: 079d5540-f236-4294-ba7c-ade8fd918496 dockerRepository: airbyte/destination-bigquery-denormalized - dockerImageTag: 0.3.5 + dockerImageTag: 1.1.6 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg resourceRequirements: diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 334831a8a467..b1f9de3e8266 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -285,7 +285,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-bigquery:1.1.5" +- dockerImage: "airbyte/destination-bigquery:1.1.6" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -368,7 +368,7 @@ \ docs if you need help generating this key. Default credentials will\ \ be used if this field is left empty." - title: "Service Account Key JSON (Optional)" + title: "Service Account Key JSON (Required for cloud, optional for open-source)" airbyte_secret: true transformation_priority: type: "string" @@ -494,7 +494,7 @@ - "overwrite" - "append" - "append_dedup" -- dockerImage: "airbyte/destination-bigquery-denormalized:0.3.5" +- dockerImage: "airbyte/destination-bigquery-denormalized:1.1.6" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -578,7 +578,7 @@ \ docs if you need help generating this key. Default credentials will\ \ be used if this field is left empty." - title: "Service Account Key JSON (Optional)" + title: "Service Account Key JSON (Required for cloud, optional for open-source)" airbyte_secret: true loading_method: type: "object" diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile index 6afb73da7f18..fb47afb01537 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile @@ -17,5 +17,5 @@ ENV ENABLE_SENTRY true COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.3.5 +LABEL io.airbyte.version=1.1.6 LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json index 7d855cf39bde..05879f7651c9 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json @@ -72,7 +72,7 @@ "credentials_json": { "type": "string", "description": "The contents of the JSON service account key. Check out the docs if you need help generating this key. Default credentials will be used if this field is left empty.", - "title": "Service Account Key JSON (Optional)", + "title": "Service Account Key JSON (Required for cloud, optional for open-source)", "airbyte_secret": true }, "loading_method": { diff --git a/airbyte-integrations/connectors/destination-bigquery/Dockerfile b/airbyte-integrations/connectors/destination-bigquery/Dockerfile index c5639583f7a6..987c29edd497 100644 --- a/airbyte-integrations/connectors/destination-bigquery/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery/Dockerfile @@ -17,5 +17,5 @@ ENV ENABLE_SENTRY true COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=1.1.5 +LABEL io.airbyte.version=1.1.6 LABEL io.airbyte.name=airbyte/destination-bigquery diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java index 0a4a2aafc1af..b32ed96e9595 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java @@ -4,10 +4,9 @@ package io.airbyte.integrations.destination.bigquery; -import static java.util.Objects.isNull; - import com.codepoetics.protonpack.StreamUtils; import com.fasterxml.jackson.databind.JsonNode; +import com.google.auth.oauth2.GoogleCredentials; import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryOptions; @@ -110,7 +109,7 @@ public AirbyteConnectionStatus check(final JsonNode config) { return new AirbyteConnectionStatus().withStatus(Status.FAILED).withMessage(result.getRight()); } } catch (final Exception e) { - LOGGER.info("Check failed.", e); + LOGGER.error("Check failed.", e); return new AirbyteConnectionStatus().withStatus(Status.FAILED).withMessage(e.getMessage() != null ? e.getMessage() : e.toString()); } } @@ -120,11 +119,10 @@ public AirbyteConnectionStatus checkStorageIamPermissions(final JsonNode config) final String bucketName = loadingMethod.get(BigQueryConsts.GCS_BUCKET_NAME).asText(); try { - final ServiceAccountCredentials credentials = getServiceAccountCredentials(config); - + final GoogleCredentials credentials = getServiceAccountCredentials(config); final Storage storage = StorageOptions.newBuilder() .setProjectId(config.get(BigQueryConsts.CONFIG_PROJECT_ID).asText()) - .setCredentials(!isNull(credentials) ? credentials : ServiceAccountCredentials.getApplicationDefault()) + .setCredentials(credentials) .build().getService(); final List permissionsCheckStatusList = storage.testIamPermissions(bucketName, REQUIRED_PERMISSIONS); @@ -146,11 +144,11 @@ public AirbyteConnectionStatus checkStorageIamPermissions(final JsonNode config) return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); } catch (final Exception e) { - LOGGER.error("Exception attempting to access the Gcs bucket: {}", e.getMessage()); + LOGGER.error("Cannot access the GCS bucket", e); return new AirbyteConnectionStatus() .withStatus(AirbyteConnectionStatus.Status.FAILED) - .withMessage("Could not connect to the Gcs bucket with the provided configuration. \n" + e + .withMessage("Could access the GCS bucket with the provided configuration.\n" + e .getMessage()); } } @@ -160,15 +158,10 @@ protected BigQuery getBigQuery(final JsonNode config) { try { final BigQueryOptions.Builder bigQueryBuilder = BigQueryOptions.newBuilder(); - ServiceAccountCredentials credentials = null; - if (BigQueryUtils.isUsingJsonCredentials(config)) { - // handle the credentials json being passed as a json object or a json object already serialized as - // a string. - credentials = getServiceAccountCredentials(config); - } + final GoogleCredentials credentials = getServiceAccountCredentials(config); return bigQueryBuilder .setProjectId(projectId) - .setCredentials(!isNull(credentials) ? credentials : ServiceAccountCredentials.getApplicationDefault()) + .setCredentials(credentials) .build() .getService(); } catch (final IOException e) { @@ -176,14 +169,19 @@ protected BigQuery getBigQuery(final JsonNode config) { } } - private ServiceAccountCredentials getServiceAccountCredentials(final JsonNode config) throws IOException { - final ServiceAccountCredentials credentials; + private static GoogleCredentials getServiceAccountCredentials(final JsonNode config) throws IOException { + if (!BigQueryUtils.isUsingJsonCredentials(config)) { + LOGGER.info("No service account key json is provided. It is required if you are using Airbyte cloud."); + LOGGER.info("Using the default service account credential from environment."); + return ServiceAccountCredentials.getApplicationDefault(); + } + + // The JSON credential can either be a raw JSON object, or a serialized JSON object. final String credentialsString = config.get(BigQueryConsts.CONFIG_CREDS).isObject() ? Jsons.serialize(config.get(BigQueryConsts.CONFIG_CREDS)) : config.get(BigQueryConsts.CONFIG_CREDS).asText(); - credentials = ServiceAccountCredentials - .fromStream(new ByteArrayInputStream(credentialsString.getBytes(Charsets.UTF_8))); - return credentials; + return ServiceAccountCredentials.fromStream( + new ByteArrayInputStream(credentialsString.getBytes(Charsets.UTF_8))); } @Override diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java index 7ecc370bfdb1..b93a63758adc 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java @@ -272,7 +272,15 @@ public static JobInfo.WriteDisposition getWriteDisposition(final DestinationSync } public static boolean isUsingJsonCredentials(final JsonNode config) { - return config.has(BigQueryConsts.CONFIG_CREDS) && !config.get(BigQueryConsts.CONFIG_CREDS).asText().isEmpty(); + if (!config.has(BigQueryConsts.CONFIG_CREDS)) { + return false; + } + final JsonNode json = config.get(BigQueryConsts.CONFIG_CREDS); + if (json.isTextual()) { + return !json.asText().isEmpty(); + } else { + return !Jsons.serialize(json).isEmpty(); + } } // https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json index b51812ee0d7b..0945d960beba 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json @@ -71,7 +71,7 @@ "credentials_json": { "type": "string", "description": "The contents of the JSON service account key. Check out the docs if you need help generating this key. Default credentials will be used if this field is left empty.", - "title": "Service Account Key JSON (Optional)", + "title": "Service Account Key JSON (Required for cloud, optional for open-source)", "airbyte_secret": true }, "transformation_priority": { diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java index f0538e443a62..80598d8955a6 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java @@ -5,13 +5,18 @@ package io.airbyte.integrations.destination.bigquery; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; +import java.util.Collections; +import java.util.Map; import java.util.stream.Stream; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -29,30 +34,51 @@ public void init() { @ParameterizedTest @MethodSource("validBigQueryIdProvider") - public void testGetDatasetIdSuccess(String projectId, String datasetId, String expected) throws Exception { - JsonNode config = Jsons.jsonNode(configMapBuilder + public void testGetDatasetIdSuccess(final String projectId, final String datasetId, final String expected) { + final JsonNode config = Jsons.jsonNode(configMapBuilder .put(BigQueryConsts.CONFIG_PROJECT_ID, projectId) .put(BigQueryConsts.CONFIG_DATASET_ID, datasetId) .build()); - String actual = BigQueryUtils.getDatasetId(config); + final String actual = BigQueryUtils.getDatasetId(config); assertEquals(expected, actual); } @ParameterizedTest @MethodSource("invalidBigQueryIdProvider") - public void testGetDatasetIdFail(String projectId, String datasetId, String expected) throws Exception { - JsonNode config = Jsons.jsonNode(configMapBuilder + public void testGetDatasetIdFail(final String projectId, final String datasetId, final String expected) { + final JsonNode config = Jsons.jsonNode(configMapBuilder .put(BigQueryConsts.CONFIG_PROJECT_ID, projectId) .put(BigQueryConsts.CONFIG_DATASET_ID, datasetId) .build()); - Exception exception = assertThrows(IllegalArgumentException.class, () -> BigQueryUtils.getDatasetId(config)); + final Exception exception = assertThrows(IllegalArgumentException.class, () -> BigQueryUtils.getDatasetId(config)); assertEquals(expected, exception.getMessage()); } + @Test + public void testIsUsingJsonCredentials() { + // empty + final JsonNode emptyConfig = Jsons.jsonNode(Collections.emptyMap()); + assertFalse(BigQueryUtils.isUsingJsonCredentials(emptyConfig)); + + // empty text + final JsonNode emptyTextConfig = Jsons.jsonNode(Map.of(BigQueryConsts.CONFIG_CREDS, "")); + assertFalse(BigQueryUtils.isUsingJsonCredentials(emptyTextConfig)); + + // non-empty text + final JsonNode nonEmptyTextConfig = Jsons.jsonNode( + Map.of(BigQueryConsts.CONFIG_CREDS, "{ \"service_account\": \"test@airbyte.io\" }")); + assertTrue(BigQueryUtils.isUsingJsonCredentials(nonEmptyTextConfig)); + + // object + final JsonNode objectConfig = Jsons.jsonNode(Map.of( + BigQueryConsts.CONFIG_CREDS, Jsons.jsonNode(Map.of("service_account", "test@airbyte.io")))); + assertTrue(BigQueryUtils.isUsingJsonCredentials(objectConfig)); + } + private static Stream validBigQueryIdProvider() { return Stream.of( Arguments.arguments("my-project", "my_dataset", "my_dataset"), diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index 816a2231fbd7..d26164c44197 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -74,69 +74,97 @@ Note that queries written in BigQuery can only reference Datasets in the same ph #### Service account -In order for Airbyte to sync data into BigQuery, it needs credentials for a [Service Account](https://cloud.google.com/iam/docs/service-accounts) with the "BigQuery User"(roles/bigquery.user) and "BigQuery Data Editor"(roles/bigquery.dataEditor) roles, which grants permissions to run BigQuery jobs, write to BigQuery Datasets, and read table metadata. More read about BigQuery roles permissions ypu can read [here](https://cloud.google.com/bigquery/docs/access-control). -We highly recommend that this Service Account is exclusive to Airbyte for ease of permissioning and auditing. However, you can use a pre-existing Service Account if you already have one with the correct permissions. "BigQuery User"(roles/bigquery.user) role permissions: -``` -bigquery.bireservations.get -bigquery.capacityCommitments.get -bigquery.capacityCommitments.list -bigquery.config.get -bigquery.datasets.create -bigquery.datasets.get -bigquery.datasets.getIamPolicy -bigquery.jobs.create -bigquery.jobs.list -bigquery.models.list -bigquery.readsessions.* -bigquery.reservationAssignments.list -bigquery.reservationAssignments.search -bigquery.reservations.get -bigquery.reservations.list -bigquery.routines.list -bigquery.savedqueries.get -bigquery.savedqueries.list -bigquery.tables.list -bigquery.transfers.get -resourcemanager.projects.get -resourcemanager.projects.list -``` +In order for Airbyte to sync data into BigQuery, it needs credentials for a [Service Account](https://cloud.google.com/iam/docs/service-accounts) with the `BigQuery User`(`roles/bigquery.user`) and `BigQuery Data Editor`(`roles/bigquery.dataEditor`) roles, which grants permissions to run BigQuery jobs, write to BigQuery Datasets, and read table metadata. More read about BigQuery roles permissions ypu can read [here](https://cloud.google.com/bigquery/docs/access-control). + +![create a service account with the bigquery user and data editor roles](https://user-images.githubusercontent.com/1933157/168459232-6b88458c-a038-4bc1-883d-cf506e363441.png) + +We highly recommend that this Service Account is exclusive to Airbyte for ease of permissioning and auditing. However, you can use a pre-existing Service Account if you already have one with the correct permissions. + +* `BigQuery User`(`roles/bigquery.user`) role permissions: + + ``` + bigquery.bireservations.get + bigquery.capacityCommitments.get + bigquery.capacityCommitments.list + bigquery.config.get + bigquery.datasets.create + bigquery.datasets.get + bigquery.datasets.getIamPolicy + bigquery.jobs.create + bigquery.jobs.list + bigquery.models.list + bigquery.readsessions.* + bigquery.reservationAssignments.list + bigquery.reservationAssignments.search + bigquery.reservations.get + bigquery.reservations.list + bigquery.routines.list + bigquery.savedqueries.get + bigquery.savedqueries.list + bigquery.tables.list + bigquery.transfers.get + resourcemanager.projects.get + resourcemanager.projects.list + ``` +* `BigQuery Data Editor` (`roles/bigquery.dataEditor`) role permissions: + ``` + bigquery.config.get + bigquery.datasets.create + bigquery.datasets.get + bigquery.datasets.getIamPolicy + bigquery.datasets.updateTag + bigquery.models.* + bigquery.routines.* + bigquery.tables.create + bigquery.tables.createSnapshot + bigquery.tables.delete + bigquery.tables.export + bigquery.tables.get + bigquery.tables.getData + bigquery.tables.getIamPolicy + bigquery.tables.list + bigquery.tables.restoreSnapshot + bigquery.tables.update + bigquery.tables.updateData + bigquery.tables.updateTag + resourcemanager.projects.get + resourcemanager.projects.list + ``` + +#### Service account key json (required for cloud, optional for open source) -"BigQuery Data Editor"(roles/bigquery.dataEditor) role permissions: -``` -bigquery.config.get -bigquery.datasets.create -bigquery.datasets.get -bigquery.datasets.getIamPolicy -bigquery.datasets.updateTag -bigquery.models.* -bigquery.routines.* -bigquery.tables.create -bigquery.tables.createSnapshot -bigquery.tables.delete -bigquery.tables.export -bigquery.tables.get -bigquery.tables.getData -bigquery.tables.getIamPolicy -bigquery.tables.list -bigquery.tables.restoreSnapshot -bigquery.tables.update -bigquery.tables.updateData -bigquery.tables.updateTag -resourcemanager.projects.get -resourcemanager.projects.list -``` +Service Account Keys are used to authenticate as Google Service Accounts. For Airbyte to leverage the permissions you granted to the Service Account in the previous step, you'll need to provide its Service Account Keys. See the [Google documentation](https://cloud.google.com/iam/docs/service-accounts#service_account_keys) for more information about Keys. -The easiest way to create a Service Account is to follow GCP's guide for [Creating a Service Account](https://cloud.google.com/iam/docs/creating-managing-service-accounts). Once you've created the Service Account, make sure to keep its ID handy as you will need to reference it when granting roles. Service Account IDs typically take the form `@.iam.gserviceaccount.com` +Follow the [Creating and Managing Service Account Keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) guide to create a key. Airbyte currently supports JSON Keys only, so make sure you create your key in that format. As soon as you created the key, make sure to download it, as that is the only time Google will allow you to see its contents. Once you've successfully configured BigQuery as a destination in Airbyte, delete this key from your computer. -Then, add the service account as a Member in your Google Cloud Project with the "BigQuery User" role. To do this, follow the instructions for [Granting Access](https://cloud.google.com/iam/docs/granting-changing-revoking-access#granting-console) in the Google documentation. The email address of the member you are adding is the same as the Service Account ID you just created. +The key JSON looks like the following (copied from the example [here](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#creating)): + +```json +{ + "type": "service_account", + "project_id": "", + "private_key_id": "", + "private_key": "-----BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY-----\n", + "client_email": "", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://accounts.google.com/o/oauth2/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/" +} +``` -At this point you should have a service account with the "BigQuery User" project-level permission. +This parameter is **REQUIRED** when you set up the connector on cloud. It is only optional if you deploy Airbyte in your own infra and provide the credential through the environment. The service account key json will be searched in the following order: -#### Service account key +- Credentials file pointed to by the `GOOGLE_APPLICATION_CREDENTIALS` environment variable +- Credentials provided by the Google Cloud SDK `gcloud auth application-default login` command +- Google App Engine built-in credentials +- Google Cloud Shell built-in credentials +- Google Compute Engine built-in credentials -Service Account Keys are used to authenticate as Google Service Accounts. For Airbyte to leverage the permissions you granted to the Service Account in the previous step, you'll need to provide its Service Account Keys. See the [Google documentation](https://cloud.google.com/iam/docs/service-accounts#service_account_keys) for more information about Keys. +See the [Authenticating as a service account](https://cloud.google.com/docs/authentication/production#automatically) for details. -Follow the [Creating and Managing Service Account Keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) guide to create a key. Airbyte currently supports JSON Keys only, so make sure you create your key in that format. As soon as you created the key, make sure to download it, as that is the only time Google will allow you to see its contents. Once you've successfully configured BigQuery as a destination in Airbyte, delete this key from your computer. +---- You should now have all the requirements needed to configure BigQuery as a destination in the UI. You'll need the following information to configure the BigQuery destination: @@ -184,8 +212,9 @@ This is the recommended configuration for uploading data to BigQuery. It works b * **GCS Bucket Keep files after migration** * See [this](https://cloud.google.com/storage/docs/creating-buckets) for instructions on how to create a GCS bucket. The bucket cannot have a retention policy. Set Protection Tools to none or Object versioning. * **HMAC Key Access ID** - * See [this](https://cloud.google.com/storage/docs/authentication/managing-hmackeys) on how to generate an access key. For more information on hmac keys please reference the [GCP docs](https://cloud.google.com/storage/docs/authentication/hmackeys) - * We recommend creating an Airbyte-specific user or service account. This user or account will require the following permissions for the bucket: + * See [this](https://cloud.google.com/storage/docs/authentication/managing-hmackeys) on how to generate an access key. For more information on hmac keys please reference the [GCP docs](https://cloud.google.com/storage/docs/authentication/hmackeys). + ![add hmac key to the bigquery service account](https://user-images.githubusercontent.com/1933157/168459101-f6d59db4-ebd6-4307-b528-f47b2ccf11e3.png) + * The BigQuery service account (see the doc [above](#service-account)) should have the following permissions for the bucket: ``` storage.multipartUploads.abort storage.multipartUploads.create @@ -194,7 +223,11 @@ This is the recommended configuration for uploading data to BigQuery. It works b storage.objects.get storage.objects.list ``` - You can set those by going to the permissions tab in the GCS bucket and adding the appropriate the email address of the service account or user and adding the aforementioned permissions. + * The `Storage Object Admin` role has a superset of all the above permissions. So the quickest way is to add that role to the BigQuery service account in the IAM page as shown below. + ![add storage object admin role to bigquery service account](https://user-images.githubusercontent.com/1933157/168458678-f3223a58-9403-4780-87dd-f44806f11d67.png) + * Alternatively, create a dedicated role with just the above permissions, and assign this role to the BigQuery service account. In this way, the service account will have the minimum permissions required. + ![create a dedicated role for gcs access](https://user-images.githubusercontent.com/1933157/168458835-05794756-4b2a-462f-baae-6811b61e9d22.png) + * **Secret Access Key** * Corresponding key to the above access ID. * Make sure your GCS bucket is accessible from the machine running Airbyte. This depends on your networking setup. The easiest way to verify if Airbyte is able to connect to your GCS bucket is via the check connection tool in the UI. @@ -203,12 +236,22 @@ This is the recommended configuration for uploading data to BigQuery. It works b This uploads data directly from your source to BigQuery. While this is faster to setup initially, **we strongly recommend that you do not use this option for anything other than a quick demo**. It is more than 10x slower than the GCS uploading option and will fail for many datasets. Please be aware you may see some failures for big datasets and slow sources, e.g. if reading from source takes more than 10-12 hours. This is caused by the Google BigQuery SDK client limitations. For more details please check [https://github.com/airbytehq/airbyte/issues/3549](https://github.com/airbytehq/airbyte/issues/3549) +## Common Root Causes of Permission Issues + +The service account does not have the proper permissions. +- Make sure the BigQuery service account has `BigQuery User` and `BigQuery Data Editor` roles, or equivalent permissions as those two roles. +- If the GCS staging mode is selected, make sure the BigQuery service account has the right permissions, or the `Cloud Storage Admin` role, which includes a super set of the required permissions. + +The HMAC key is wrong. +- Make sure that the HMAC key is created under the right GCS bucket, and it has access to the GCS bucket path. + ## CHANGELOG ### bigquery | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:------------------------------------------------------------------------------------------------| +| 1.1.6 | 2022-05-15 | [12768](https://github.com/airbytehq/airbyte/pull/12768) | Clarify that the service account key json field is required on cloud. | | 1.1.5 | 2022-05-12 | [12805](https://github.com/airbytehq/airbyte/pull/12805) | Updated to latest base-java to emit AirbyteTraceMessage on error. | | 1.1.4 | 2022-05-04 | [12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | | 1.1.3 | 2022-05-02 | [12528](https://github.com/airbytehq/airbyte/pull/12528) | Update Dataset location field description | @@ -244,6 +287,7 @@ This uploads data directly from your source to BigQuery. While this is faster to | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------| +| 1.1.6 | 2022-05-15 | [12768](https://github.com/airbytehq/airbyte/pull/12768) | Clarify that the service account key json field is required on cloud. | | 0.3.5 | 2022-05-12 | [12805](https://github.com/airbytehq/airbyte/pull/12805) | Updated to latest base-java to emit AirbyteTraceMessage on error. | | 0.3.4 | 2022-05-04 | [12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | | 0.3.3 | 2022-05-02 | [12528](https://github.com/airbytehq/airbyte/pull/12528) | Update Dataset location field description | @@ -277,4 +321,3 @@ This uploads data directly from your source to BigQuery. While this is faster to | 0.1.2 | 2021-07-30 | [\#5125](https://github.com/airbytehq/airbyte/pull/5125) | Enable `additionalPropertities` in spec.json | | 0.1.1 | 2021-06-21 | [\#3555](https://github.com/airbytehq/airbyte/pull/3555) | Partial Success in BufferedStreamConsumer | | 0.1.0 | 2021-06-21 | [\#4176](https://github.com/airbytehq/airbyte/pull/4176) | Destination using Typed Struct and Repeated fields | -