From 427e0609a7be971895fd9161f10d8b72cbe0b9d7 Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Fri, 19 Mar 2021 12:11:01 -0700 Subject: [PATCH 01/11] Update eventarc trigger yaml, markdown to allow setting transport --- .../website/docs/r/eventarc_trigger.html.markdown | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/mmv1/third_party/terraform/website/docs/r/eventarc_trigger.html.markdown b/mmv1/third_party/terraform/website/docs/r/eventarc_trigger.html.markdown index 7aebecde9180..4221aeeef3c1 100644 --- a/mmv1/third_party/terraform/website/docs/r/eventarc_trigger.html.markdown +++ b/mmv1/third_party/terraform/website/docs/r/eventarc_trigger.html.markdown @@ -89,7 +89,7 @@ The following arguments are supported: * `matching_criteria` - (Required) - Required. The criteria by which events are filtered. Only events that match with this criteria will be sent to the destination. + Required. null The criteria by which events are filtered. Only events that match with this criteria will be sent to the destination. * `name` - (Required) @@ -144,6 +144,19 @@ The `cloud_run_service` block supports: * `region` - (Optional) Required. The region the Cloud Run service is deployed in. + The `transport` block supports: + +* `pubsub` - + (Optional) + The Pub/Sub topic and subscription used by Eventarc as delivery intermediary. + The `pubsub` block supports: + +* `topic` - + (Optional) + Optional. The name of the Pub/Sub topic created and managed by Eventarc system as a transport for the event delivery. Format: `projects/{PROJECT_ID}/topics/{TOPIC_NAME}`. You may set an existing topic for triggers of the type `google.cloud.pubsub.topic.v1.messagePublished` only. The topic you provide here will not be deleted by Eventarc at trigger deletion. + +* `subscription` - + Output only. The name of the Pub/Sub subscription created and managed by Eventarc system as a transport for the event delivery. Format: `projects/{PROJECT_ID}/subscriptions/{SUBSCRIPTION_NAME}`. The `transport` block supports: From 0ba83a278e2fd1280942f19729785b467776ca94 Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Fri, 19 Mar 2021 14:13:08 -0700 Subject: [PATCH 02/11] Update test, add handwritten utils --- .../resource_eventarc_trigger_test.go.erb | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb diff --git a/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb b/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb new file mode 100644 index 000000000000..c5221f11af37 --- /dev/null +++ b/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb @@ -0,0 +1,126 @@ +<% autogen_exception -%> +package google +<% unless version == 'ga' -%> +import ( + "testing" + + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" +) + +func TestAccEventarcTrigger_basic(t *testing.T) { + // DCL currently fails due to transport modification + skipIfVcr(t) + t.Parallel() + + context := map[string]interface{}{ + "random_suffix": randString(t, 10), + "project": getTestProjectFromEnv(), + } + + vcrTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProvidersOiCS, + CheckDestroy: funcAccTestEventarcTriggerCheckDestroy(t), + Steps: []resource.TestStep{ + { + Config: testAccEventarcTrigger_basic(context), + }, + { + ImportState: true, + ImportStateVerify: true, + ResourceName: "google_eventarc_trigger.trigger", + }, + }, + }) +} + +func testAccEventarcTrigger_basic(context map[string]interface{}) string { + return Nprintf(` +resource "google_eventarc_trigger" "trigger" { + provider = google-beta + name = "trigger%{random_suffix}" + location = "us-central1" + matching_criteria { + attribute = "type" + value = "google.cloud.pubsub.topic.v1.messagePublished" + } + destination { + cloud_run_service { + service = google_cloud_run_service.default.name + region = "us-central1" + } + } + transport { + pubsub { + topic = google_pubsub_topic.foo.id + } + } +} + +resource "google_pubsub_topic" "foo" { + name = "topic%{random_suffix}" +} + +resource "google_cloud_run_service" "default" { + provider = google-beta + name = "service-eventarc%{random_suffix}" + location = "us-central1" + + metadata { + namespace = "%{project}" + } + + template { + spec { + containers { + image = "gcr.io/cloudrun/hello" + args = ["arrgs"] + } + container_concurrency = 50 + } + } + + traffic { + percent = 100 + latest_revision = true + } +} +`, context) +} + +func funcAccTestEventarcTriggerCheckDestroy(t *testing.T) func(s *terraform.State) error { + return func(s *terraform.State) error { + for name, rs := range s.RootModule().Resources { + if rs.Type != "google_eventarc_trigger" { + continue + } + if strings.HasPrefix(name, "data.") { + continue + } + + config := googleProviderConfig(t) + + url, err := replaceVarsForTest(config, rs, "{{EventarcBasePath}}projects/{{project}}/locations/{{location}}/triggers/{{name}}") + if err != nil { + return err + } + + billingProject := "" + + if config.BillingProject != "" { + billingProject = config.BillingProject + } + + _, err = sendRequest(config, "GET", billingProject, url, config.userAgent, nil) + if err == nil { + return fmt.Errorf("EventarcTrigger still exists at %s", url) + } + } + + return nil + } +} + +<% else %> +// Magic Modules doesn't let us remove files - blank out beta-only common-compile files for now. +<% end -%> From dfad8a8eedb302c6d97fd41c4c1977954dfc2435 Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Fri, 19 Mar 2021 14:24:20 -0700 Subject: [PATCH 03/11] Update handwritten --- mmv1/third_party/terraform/utils/utils.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mmv1/third_party/terraform/utils/utils.go b/mmv1/third_party/terraform/utils/utils.go index f877b25446ef..89f4a2232ff9 100644 --- a/mmv1/third_party/terraform/utils/utils.go +++ b/mmv1/third_party/terraform/utils/utils.go @@ -481,14 +481,6 @@ func SnakeToPascalCase(s string) string { return strings.Join(split, "") } -func checkStringMap(v interface{}) map[string]string { - m, ok := v.(map[string]string) - if ok { - return m - } - return convertStringMap(v.(map[string]interface{})) -} - func multiEnvSearch(ks []string) string { for _, k := range ks { if v := os.Getenv(k); v != "" { @@ -497,3 +489,11 @@ func multiEnvSearch(ks []string) string { } return "" } + +func checkStringMap(v interface{}) map[string]string { + m, ok := v.(map[string]string) + if ok { + return m + } + return convertStringMap(v.(map[string]interface{})) +} From cb8d06e0052853ef11a2d0a384037583df98aa8b Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Fri, 19 Mar 2021 14:49:49 -0700 Subject: [PATCH 04/11] Updating eventarc test --- .../terraform/tests/resource_eventarc_trigger_test.go.erb | 1 + 1 file changed, 1 insertion(+) diff --git a/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb b/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb index c5221f11af37..480960ce5154 100644 --- a/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb +++ b/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb @@ -58,6 +58,7 @@ resource "google_eventarc_trigger" "trigger" { } resource "google_pubsub_topic" "foo" { + provider = google-beta name = "topic%{random_suffix}" } From 0fe6cf1dd9656528e244bf7c615765778fc9c6b9 Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Mon, 29 Mar 2021 13:21:02 -0700 Subject: [PATCH 05/11] Updating trigger --- .../resource_eventarc_trigger_test.go.erb | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb b/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb index 480960ce5154..a62423354219 100644 --- a/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb +++ b/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb @@ -34,8 +34,91 @@ func TestAccEventarcTrigger_basic(t *testing.T) { }) } +func TestAccEventarcTrigger_transport(t *testing.T) { + // DCL currently fails due to transport modification + skipIfVcr(t) + t.Parallel() + + context := map[string]interface{}{ + "random_suffix": randString(t, 10), + "project": getTestProjectFromEnv(), + } + + vcrTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProvidersOiCS, + CheckDestroy: funcAccTestEventarcTriggerCheckDestroy(t), + ExternalProviders: map[string]resource.ExternalProvider{ + "random": {}, + }, + Steps: []resource.TestStep{ + { + Config: testAccEventarcTrigger_transport(context), + }, + { + ImportState: true, + ImportStateVerify: true, + ResourceName: "google_eventarc_trigger.trigger", + }, + }, + }) +} + func testAccEventarcTrigger_basic(context map[string]interface{}) string { return Nprintf(` +resource "google_eventarc_trigger" "trigger" { + provider = google-beta + name = "trigger%{random_suffix}" + location = "us-central1" + matching_criteria { + attribute = "type" + value = "google.cloud.pubsub.topic.v1.messagePublished" + } + destination { + cloud_run_service { + service = google_cloud_run_service.default.name + region = "us-central1" + } + } + labels = { + foo = "bar" + } +} + +resource "google_pubsub_topic" "foo" { + provider = google-beta + name = "topic%{random_suffix}" +} + +resource "google_cloud_run_service" "default" { + provider = google-beta + name = "service-eventarc%{random_suffix}" + location = "us-central1" + + metadata { + namespace = "%{project}" + } + + template { + spec { + containers { + image = "gcr.io/cloudrun/hello" + args = ["arrgs"] + } + container_concurrency = 50 + } + } + + traffic { + percent = 100 + latest_revision = true + } +} +`, context) +} + +func testAccEventarcTrigger_transport(context map[string]interface{}) string { + return Nprintf(` resource "google_eventarc_trigger" "trigger" { provider = google-beta name = "trigger%{random_suffix}" From 26229da339e24148def960f68f97d5b67e44c386 Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Thu, 1 Apr 2021 13:15:11 -0700 Subject: [PATCH 06/11] Adding dataproc workflow --- ...rce_dataproc_workflow_template_test.go.erb | 126 ++ .../terraform/utils/provider.go.erb | 2 + .../api/dataproc/beta/workflow_template.yaml | 1743 +++++++++++++++++ tpgtools/api/dataproc/workflow_template.yaml | 1681 ++++++++++++++++ tpgtools/property.go | 29 +- tpgtools/resource.go | 2 + tpgtools/templates/resource.go.tmpl | 26 +- 7 files changed, 3593 insertions(+), 16 deletions(-) create mode 100644 mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb create mode 100644 tpgtools/api/dataproc/beta/workflow_template.yaml create mode 100644 tpgtools/api/dataproc/workflow_template.yaml diff --git a/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb b/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb new file mode 100644 index 000000000000..5f4fa6776cd2 --- /dev/null +++ b/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb @@ -0,0 +1,126 @@ +<% autogen_exception -%> +package google + +import ( + "testing" + + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" +) + +func TestAccDataprocWorkflowTemplate_basic(t *testing.T) { + // DCL currently fails due to transport modification + skipIfVcr(t) + t.Parallel() + + context := map[string]interface{}{ + "random_suffix": randString(t, 10), + "project": getTestProjectFromEnv(), + } + + vcrTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: funcAccTestEventarcTriggerCheckDestroy(t), + ExternalProviders: map[string]resource.ExternalProvider{ + "random": {}, + }, + Steps: []resource.TestStep{ + { + Config: testAccDataprocWorkflowTemplate_basic(context), + }, + { + ImportState: true, + ImportStateVerify: true, + ResourceName: "google_dataproc_workflow_template.template", + }, + }, + }) +} + +func testAccDataprocWorkflowTemplate_basic(context map[string]interface{}) string { + return Nprintf(` +resource "google_dataproc_workflow_template" "template" { + name = "template%{random_suffix}" + location = "us-central1" + placement { + managed_cluster { + cluster_name = "my-cluster" + config { + gce_cluster_config { + zone = "us-central1-a" + tags = ["foo", "bar"] + } + master_config { + num_instances = 1 + machine_type = "n1-standard-1" + disk_config { + disk_boot_type = "pd-ssd" + disk_boot_size_gb = 15 + } + } + worker_config { + num_instances = 3 + machine_type = "n1-standard-2" + } + disk_config { + boot_disk_size_gb = 10 + num_local_ssds = 2 + } + secondary_worker_config { + num_instances = 2 + } + software_config { + image_version = "1.3.7-deb9" + } + } + } + } + jobs { + step_id = "someJob" + spark_job = { + main_class = "SomeClass" + } + } + jobs { + step_id = "otherJob" + prerequisite_job_steps = ["someJob"] + presto_job = { + query_file_uri = "someuri" + } + } +} +`, context) +} + +func funcAccTestDataprocWorkflowTemplateCheckDestroy(t *testing.T) func(s *terraform.State) error { + return func(s *terraform.State) error { + for name, rs := range s.RootModule().Resources { + if rs.Type != "google_dataproc_workflow_template" { + continue + } + if strings.HasPrefix(name, "data.") { + continue + } + + config := googleProviderConfig(t) + + url, err := replaceVarsForTest(config, rs, "{{DataprocBasePath}}projects/{{project}}/locations/{{location}}/workflowTemplates/{{name}}") + if err != nil { + return err + } + + billingProject := "" + + if config.BillingProject != "" { + billingProject = config.BillingProject + } + + _, err = sendRequest(config, "GET", billingProject, url, config.userAgent, nil) + if err == nil { + return fmt.Errorf("DataprocWorkflowTemplate still exists at %s", url) + } + } + + return nil + } +} diff --git a/mmv1/third_party/terraform/utils/provider.go.erb b/mmv1/third_party/terraform/utils/provider.go.erb index d2323a52a1e3..de7f0c715216 100644 --- a/mmv1/third_party/terraform/utils/provider.go.erb +++ b/mmv1/third_party/terraform/utils/provider.go.erb @@ -364,6 +364,8 @@ end # products.each do <% end -%> "google_dataproc_cluster": resourceDataprocCluster(), "google_dataproc_job": resourceDataprocJob(), + "google_dataproc_workflow_template": resourceDataprocWorkflowTemplate(), + "google_dns_record_set": resourceDnsRecordSet(), "google_endpoints_service": resourceEndpointsService(), "google_eventarc_trigger": resourceEventarcTrigger(), "google_folder": resourceGoogleFolder(), diff --git a/tpgtools/api/dataproc/beta/workflow_template.yaml b/tpgtools/api/dataproc/beta/workflow_template.yaml new file mode 100644 index 000000000000..240360c298e4 --- /dev/null +++ b/tpgtools/api/dataproc/beta/workflow_template.yaml @@ -0,0 +1,1743 @@ +# Copyright 2021 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +components: + schemas: + ClusterConfig: + description: Required. The cluster configuration. + properties: + autoscalingConfig: + description: Optional. Autoscaling config for the policy associated with + the cluster. Cluster does not autoscale if this field is unset. + properties: + policy: + description: 'Optional. The autoscaling policy used by the cluster. + Only resource names including projectid and location (region) are + valid. Examples: * `https://www.googleapis.com/compute/v1/projects/` + Note that the policy must be in the same project and Dataproc region.' + type: string + x-dcl-go-name: Policy + x-dcl-references: + - field: name + resource: Dataproc/AutoscalingPolicy + x-kubernetes-immutable: true + type: object + x-dcl-go-name: AutoscalingConfig + x-dcl-go-type: ClusterClusterConfigAutoscalingConfig + x-kubernetes-immutable: true + encryptionConfig: + description: Optional. Encryption settings for the cluster. + properties: + gcePdKmsKeyName: + description: Optional. The Cloud KMS key name to use for PD disk encryption + for all instances in the cluster. + type: string + x-dcl-go-name: GcePdKmsKeyName + x-dcl-references: + - field: selfLink + resource: Cloudkms/CryptoKey + x-kubernetes-immutable: true + type: object + x-dcl-go-name: EncryptionConfig + x-dcl-go-type: ClusterClusterConfigEncryptionConfig + x-kubernetes-immutable: true + endpointConfig: + description: Optional. Port/endpoint configuration for this cluster + properties: + enableHttpPortAccess: + description: Optional. If true, enable http access to specific ports + on the cluster from external sources. Defaults to false. + type: boolean + x-dcl-go-name: EnableHttpPortAccess + x-kubernetes-immutable: true + httpPorts: + additionalProperties: + type: string + description: Output only. The map of port descriptions to URLs. Will + only be populated if enable_http_port_access is true. + readOnly: true + type: object + x-dcl-go-name: HttpPorts + x-kubernetes-immutable: true + type: object + x-dcl-go-name: EndpointConfig + x-dcl-go-type: ClusterClusterConfigEndpointConfig + x-kubernetes-immutable: true + gceClusterConfig: + description: Optional. The shared Compute Engine config settings for all + instances in a cluster. + properties: + internalIPOnly: + description: Optional. If true, all instances in the cluster will only + have internal IP addresses. By default, clusters are not restricted + to internal IP addresses, and will have ephemeral external IP addresses + assigned to each instance. This `internal_ip_only` restriction can + only be enabled for subnetwork enabled networks, and all off-cluster + dependencies must be configured to be accessible without external + IP addresses. + type: boolean + x-dcl-go-name: InternalIPOnly + x-kubernetes-immutable: true + metadata: + additionalProperties: + type: string + description: The Compute Engine metadata entries to add to all instances + (see (https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)). + type: object + x-dcl-go-name: Metadata + x-kubernetes-immutable: true + network: + description: Optional. The Compute Engine network to be used for machine + communications. Cannot be specified with subnetwork_uri. If neither + `network_uri` nor `subnetwork_uri` is specified, the "default" network + of the project is used, if it exists. Cannot be a "Custom Subnet Network" + (see /regions/global/default` * `default` + type: string + x-dcl-go-name: Network + x-dcl-references: + - field: selfLink + resource: Compute/Network + x-kubernetes-immutable: true + nodeGroupAffinity: + description: Optional. Node Group Affinity for sole-tenant clusters. + properties: + nodeGroup: + description: Required. The URI of a sole-tenant /zones/us-central1-a/nodeGroups/node-group-1` + * `node-group-1` + type: string + x-dcl-go-name: NodeGroup + x-dcl-references: + - field: selfLink + resource: Compute/NodeGroup + x-kubernetes-immutable: true + required: + - nodeGroup + type: object + x-dcl-go-name: NodeGroupAffinity + x-dcl-go-type: ClusterClusterConfigGceClusterConfigNodeGroupAffinity + x-kubernetes-immutable: true + privateIPv6GoogleAccess: + description: 'Optional. The type of IPv6 access for a cluster. Possible + values: PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED, INHERIT_FROM_SUBNETWORK, + OUTBOUND, BIDIRECTIONAL' + enum: + - PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED + - INHERIT_FROM_SUBNETWORK + - OUTBOUND + - BIDIRECTIONAL + type: string + x-dcl-go-name: PrivateIPv6GoogleAccess + x-dcl-go-type: ClusterClusterConfigGceClusterConfigPrivateIPv6GoogleAccessEnum + x-kubernetes-immutable: true + reservationAffinity: + description: Optional. Reservation Affinity for consuming Zonal reservation. + properties: + consumeReservationType: + description: 'Optional. Type of reservation to consume Possible + values: TYPE_UNSPECIFIED, NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION' + enum: + - TYPE_UNSPECIFIED + - NO_RESERVATION + - ANY_RESERVATION + - SPECIFIC_RESERVATION + type: string + x-dcl-go-name: ConsumeReservationType + x-dcl-go-type: ClusterClusterConfigGceClusterConfigReservationAffinityConsumeReservationTypeEnum + x-kubernetes-immutable: true + key: + description: Optional. Corresponds to the label key of reservation + resource. + type: string + x-dcl-go-name: Key + x-kubernetes-immutable: true + values: + description: Optional. Corresponds to the label values of reservation + resource. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Values + x-dcl-list-type: list + x-kubernetes-immutable: true + type: object + x-dcl-go-name: ReservationAffinity + x-dcl-go-type: ClusterClusterConfigGceClusterConfigReservationAffinity + x-kubernetes-immutable: true + serviceAccount: + description: Optional. The (https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) + is used. + type: string + x-dcl-go-name: ServiceAccount + x-dcl-references: + - field: email + resource: Iam/ServiceAccount + x-kubernetes-immutable: true + serviceAccountScopes: + description: 'Optional. The URIs of service account scopes to be included + in Compute Engine instances. The following base set of scopes is always + included: * https://www.googleapis.com/auth/cloud.useraccounts.readonly + * https://www.googleapis.com/auth/devstorage.read_write * https://www.googleapis.com/auth/logging.write + If no scopes are specified, the following defaults are also provided: + * https://www.googleapis.com/auth/bigquery * https://www.googleapis.com/auth/bigtable.admin.table + * https://www.googleapis.com/auth/bigtable.data * https://www.googleapis.com/auth/devstorage.full_control' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ServiceAccountScopes + x-dcl-list-type: list + x-kubernetes-immutable: true + subnetwork: + description: 'Optional. The Compute Engine subnetwork to be used for + machine communications. Cannot be specified with network_uri. A full + URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects//regions/us-east1/subnetworks/sub0` + * `sub0`' + type: string + x-dcl-go-name: Subnetwork + x-dcl-references: + - field: selfLink + resource: Compute/Subnetwork + x-kubernetes-immutable: true + tags: + description: The Compute Engine tags to add to all instances (see (https://cloud.google.com/compute/docs/label-or-tag-resources#tags)). + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Tags + x-dcl-list-type: set + zone: + description: 'Optional. The zone where the Compute Engine cluster will + be located. On a create request, it is required in the "global" region. + If omitted in a non-global Dataproc region, the service will pick + a zone in the corresponding Compute Engine region. On a get request, + zone will always be present. A full URL, partial URI, or short name + are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/` + * `us-central1-f`' + type: string + x-dcl-go-name: Zone + x-kubernetes-immutable: true + type: object + x-dcl-go-name: GceClusterConfig + x-dcl-go-type: ClusterClusterConfigGceClusterConfig + x-kubernetes-immutable: true + gkeClusterConfig: + description: Optional. The Kubernetes Engine config for Dataproc clusters + deployed to Kubernetes. Setting this is considered mutually exclusive + with Compute Engine-based options such as `gce_cluster_config`, `master_config`, + `worker_config`, `secondary_worker_config`, and `autoscaling_config`. + properties: + namespacedGkeDeploymentTarget: + description: Optional. A target for the deployment. + properties: + clusterNamespace: + description: Optional. A namespace within the GKE cluster to deploy + into. + type: string + x-dcl-go-name: ClusterNamespace + x-kubernetes-immutable: true + targetGkeCluster: + description: 'Optional. The target GKE cluster to deploy to. Format: + ''projects/{project}/locations/{location}/clusters/{cluster_id}''' + type: string + x-dcl-go-name: TargetGkeCluster + x-dcl-references: + - field: name + resource: Container/Cluster + x-kubernetes-immutable: true + type: object + x-dcl-go-name: NamespacedGkeDeploymentTarget + x-dcl-go-type: ClusterClusterConfigGkeClusterConfigNamespacedGkeDeploymentTarget + x-kubernetes-immutable: true + type: object + x-dcl-go-name: GkeClusterConfig + x-dcl-go-type: ClusterClusterConfigGkeClusterConfig + x-kubernetes-immutable: true + initializationActions: + description: 'Optional. Commands to execute on each node after config is + completed. By default, executables are run on master and all worker nodes. + You can test a node''s `role` metadata to run an executable on a master + or worker node, as shown below using `curl` (you can also use `wget`): + ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) + if ; then ... master specific actions ... else ... worker specific actions + ... fi' + items: + properties: + executableFile: + description: Required. Cloud Storage URI of executable file. + type: string + x-dcl-go-name: ExecutableFile + x-kubernetes-immutable: true + executionTimeout: + description: Optional. Amount of time executable has to complete. + Default is 10 minutes (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + Cluster creation fails with an explanatory error message (the name + of the executable that caused the error and the exceeded timeout + period) if the executable is not completed at end of the timeout + period. + type: string + x-dcl-go-name: ExecutionTimeout + x-kubernetes-immutable: true + type: object + x-dcl-go-type: ClusterClusterConfigInitializationActions + type: array + x-dcl-go-name: InitializationActions + x-dcl-list-type: list + x-kubernetes-immutable: true + lifecycleConfig: + description: Optional. Lifecycle setting for the cluster. + properties: + autoDeleteTime: + description: Optional. The time when cluster will be auto-deleted (see + JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + format: date-time + type: string + x-dcl-go-name: AutoDeleteTime + x-kubernetes-immutable: true + autoDeleteTtl: + description: Optional. The lifetime duration of cluster. The cluster + will be auto-deleted at the end of this period. Minimum value is 10 + minutes; maximum value is 14 days (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + type: string + x-dcl-go-name: AutoDeleteTtl + x-kubernetes-immutable: true + idleDeleteTtl: + description: Optional. The duration to keep the cluster alive while + idling (when no jobs are running). Passing this threshold will cause + the cluster to be deleted. Minimum value is 5 minutes; maximum value + is 14 days (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json). + type: string + x-dcl-go-name: IdleDeleteTtl + x-kubernetes-immutable: true + idleStartTime: + description: Output only. The time when cluster became idle (most recent + job finished) and became eligible for deletion due to idleness (see + JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + format: date-time + readOnly: true + type: string + x-dcl-go-name: IdleStartTime + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LifecycleConfig + x-dcl-go-type: ClusterClusterConfigLifecycleConfig + x-kubernetes-immutable: true + masterConfig: + $ref: '#/components/schemas/InstanceGroupConfig' + x-dcl-go-name: MasterConfig + metastoreConfig: + description: Optional. Metastore configuration. + properties: + dataprocMetastoreService: + description: 'Required. Resource name of an existing Dataproc Metastore + service. Example: * `projects/`' + type: string + x-dcl-go-name: DataprocMetastoreService + x-dcl-references: + - field: selfLink + resource: Metastore/Service + x-kubernetes-immutable: true + required: + - dataprocMetastoreService + type: object + x-dcl-go-name: MetastoreConfig + x-dcl-go-type: ClusterClusterConfigMetastoreConfig + x-kubernetes-immutable: true + secondaryWorkerConfig: + $ref: '#/components/schemas/InstanceGroupConfig' + x-dcl-go-name: SecondaryWorkerConfig + securityConfig: + description: Optional. Security settings for the cluster. + properties: + kerberosConfig: + description: Kerberos related configuration. + properties: + crossRealmTrustAdminServer: + description: Optional. The admin server (IP or hostname) for the + remote trusted realm in a cross realm trust relationship. + type: string + x-dcl-go-name: CrossRealmTrustAdminServer + x-kubernetes-immutable: true + crossRealmTrustKdc: + description: Optional. The KDC (IP or hostname) for the remote trusted + realm in a cross realm trust relationship. + type: string + x-dcl-go-name: CrossRealmTrustKdc + x-kubernetes-immutable: true + crossRealmTrustRealm: + description: Optional. The remote realm the Dataproc on-cluster + KDC will trust, should the user enable cross realm trust. + type: string + x-dcl-go-name: CrossRealmTrustRealm + x-kubernetes-immutable: true + crossRealmTrustSharedPassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the shared password between the on-cluster Kerberos + realm and the remote trusted realm, in a cross realm trust relationship. + type: string + x-dcl-go-name: CrossRealmTrustSharedPassword + x-kubernetes-immutable: true + enableKerberos: + description: 'Optional. Flag to indicate whether to Kerberize the + cluster (default: false). Set this field to true to enable Kerberos + on a cluster.' + type: boolean + x-dcl-go-name: EnableKerberos + x-kubernetes-immutable: true + kdcDbKey: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the master key of the KDC database. + type: string + x-dcl-go-name: KdcDbKey + x-kubernetes-immutable: true + keyPassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the password to the user provided key. For the + self-signed certificate, this password is generated by Dataproc. + type: string + x-dcl-go-name: KeyPassword + x-kubernetes-immutable: true + keystore: + description: Optional. The Cloud Storage URI of the keystore file + used for SSL encryption. If not provided, Dataproc will provide + a self-signed certificate. + type: string + x-dcl-go-name: Keystore + x-kubernetes-immutable: true + keystorePassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the password to the user provided keystore. For + the self-signed certificate, this password is generated by Dataproc. + type: string + x-dcl-go-name: KeystorePassword + x-kubernetes-immutable: true + kmsKey: + description: Optional. The uri of the KMS key used to encrypt various + sensitive files. + type: string + x-dcl-go-name: KmsKey + x-dcl-references: + - field: selfLink + resource: Cloudkms/CryptoKey + x-kubernetes-immutable: true + realm: + description: Optional. The name of the on-cluster Kerberos realm. + If not specified, the uppercased domain of hostnames will be the + realm. + type: string + x-dcl-go-name: Realm + x-kubernetes-immutable: true + rootPrincipalPassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the root principal password. + type: string + x-dcl-go-name: RootPrincipalPassword + x-kubernetes-immutable: true + tgtLifetimeHours: + description: Optional. The lifetime of the ticket granting ticket, + in hours. If not specified, or user specifies 0, then default + value 10 will be used. + format: int64 + type: integer + x-dcl-go-name: TgtLifetimeHours + x-kubernetes-immutable: true + truststore: + description: Optional. The Cloud Storage URI of the truststore file + used for SSL encryption. If not provided, Dataproc will provide + a self-signed certificate. + type: string + x-dcl-go-name: Truststore + x-kubernetes-immutable: true + truststorePassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the password to the user provided truststore. + For the self-signed certificate, this password is generated by + Dataproc. + type: string + x-dcl-go-name: TruststorePassword + x-kubernetes-immutable: true + type: object + x-dcl-go-name: KerberosConfig + x-dcl-go-type: ClusterClusterConfigSecurityConfigKerberosConfig + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SecurityConfig + x-dcl-go-type: ClusterClusterConfigSecurityConfig + x-kubernetes-immutable: true + softwareConfig: + description: Optional. The config settings for software inside the cluster. + properties: + imageVersion: + description: Optional. The version of software inside the cluster. It + must be one of the supported (https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions). + If unspecified, it defaults to the latest Debian version. + type: string + x-dcl-go-name: ImageVersion + x-kubernetes-immutable: true + optionalComponents: + description: Optional. The set of components to activate on the cluster. + items: + type: string + x-dcl-go-type: ClusterClusterConfigSoftwareConfigOptionalComponentsEnum + type: array + x-dcl-go-name: OptionalComponents + x-dcl-list-type: list + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: 'Optional. The properties to set on daemon config files. + Property keys are specified in `prefix:property` format, for example + `core:hadoop.tmp.dir`. The following are supported prefixes and their + mappings: * capacity-scheduler: `capacity-scheduler.xml` * core: `core-site.xml` + * distcp: `distcp-default.xml` * hdfs: `hdfs-site.xml` * hive: `hive-site.xml` + * mapred: `mapred-site.xml` * pig: `pig.properties` * spark: `spark-defaults.conf` + * yarn: `yarn-site.xml` For more information, see (https://cloud.google.com/dataproc/docs/concepts/cluster-properties).' + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SoftwareConfig + x-dcl-go-type: ClusterClusterConfigSoftwareConfig + x-kubernetes-immutable: true + stagingBucket: + description: Optional. A Cloud Storage bucket used to stage job dependencies, + config files, and job driver console output. If you do not specify a staging + bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, + or EU) for your cluster's staging bucket according to the Compute Engine + zone where your cluster is deployed, and then create and manage this project-level, + per-location bucket (see (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). + type: string + x-dcl-go-name: StagingBucket + x-dcl-references: + - field: name + resource: Storage/Bucket + x-kubernetes-immutable: true + tempBucket: + description: Optional. A Cloud Storage bucket used to store ephemeral cluster + and jobs data, such as Spark and MapReduce history files. If you do not + specify a temp bucket, Dataproc will determine a Cloud Storage location + (US, ASIA, or EU) for your cluster's temp bucket according to the Compute + Engine zone where your cluster is deployed, and then create and manage + this project-level, per-location bucket. The default bucket has a TTL + of 90 days, but you can use any TTL (or none) if you specify a bucket. + type: string + x-dcl-go-name: TempBucket + x-dcl-references: + - field: name + resource: Storage/Bucket + x-kubernetes-immutable: true + workerConfig: + $ref: '#/components/schemas/InstanceGroupConfig' + x-dcl-go-name: WorkerConfig + type: object + x-dcl-go-name: Config + x-dcl-go-type: ClusterClusterConfig + x-kubernetes-immutable: true + InstanceGroupConfig: + description: Optional. The Compute Engine config settings for additional worker + instances in a cluster. + properties: + accelerators: + description: Optional. The Compute Engine accelerator configuration for + these instances. + items: + properties: + acceleratorCount: + description: The number of the accelerator cards of this type exposed + to this instance. + format: int64 + type: integer + x-dcl-go-name: AcceleratorCount + x-kubernetes-immutable: true + acceleratorType: + description: Full URL, partial URI, or short name of the accelerator + type resource to expose to this instance. See (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) + feature, you must use the short name of the accelerator type resource, + for example, `nvidia-tesla-k80`. + type: string + x-dcl-go-name: AcceleratorType + x-kubernetes-immutable: true + type: object + x-dcl-go-type: ClusterInstanceGroupConfigAccelerators + type: array + x-dcl-go-name: Accelerators + x-dcl-list-type: list + x-kubernetes-immutable: true + diskConfig: + description: Optional. Disk option config settings. + properties: + bootDiskSizeGb: + description: Optional. Size in GB of the boot disk (default is 500GB). + format: int64 + type: integer + x-dcl-go-name: BootDiskSizeGb + x-kubernetes-immutable: true + bootDiskType: + description: 'Optional. Type of the boot disk (default is "pd-standard"). + Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" + (Persistent Disk Hard Disk Drive).' + type: string + x-dcl-go-name: BootDiskType + x-kubernetes-immutable: true + numLocalSsds: + description: Optional. Number of attached SSDs, from 0 to 4 (default + is 0). If SSDs are not attached, the boot disk is used to store runtime + logs and (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) + data. If one or more SSDs are attached, this runtime bulk data is + spread across them, and the boot disk contains only basic config and + installed binaries. + format: int64 + type: integer + x-dcl-go-name: NumLocalSsds + x-kubernetes-immutable: true + type: object + x-dcl-go-name: DiskConfig + x-dcl-go-type: ClusterInstanceGroupConfigDiskConfig + x-kubernetes-immutable: true + image: + description: 'Optional. The Compute Engine image resource used for cluster + instances. The URI can represent an image or image family. Image examples: + * `https://www.googleapis.com/compute/beta/projects/` If the URI is unspecified, + it will be inferred from `SoftwareConfig.image_version` or the system + default.' + type: string + x-dcl-go-name: Image + x-dcl-references: + - field: selfLink + resource: Compute/Image + x-kubernetes-immutable: true + instanceNames: + description: Output only. The list of instance names. Dataproc derives the + names from `cluster_name`, `num_instances`, and the instance group. + items: + type: string + x-dcl-go-type: string + x-dcl-references: + - field: selfLink + resource: Compute/Instance + readOnly: true + type: array + x-dcl-go-name: InstanceNames + x-dcl-list-type: list + x-kubernetes-immutable: true + isPreemptible: + description: Output only. Specifies that this instance group contains preemptible + instances. + readOnly: true + type: boolean + x-dcl-go-name: IsPreemptible + x-kubernetes-immutable: true + machineType: + description: 'Optional. The Compute Engine machine type used for cluster + instances. A full URL, partial URI, or short name are valid. Examples: + * `https://www.googleapis.com/compute/v1/projects/(https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) + feature, you must use the short name of the machine type resource, for + example, `n1-standard-2`.' + type: string + x-dcl-go-name: MachineType + x-kubernetes-immutable: true + managedGroupConfig: + description: Output only. The config for Compute Engine Instance Group Manager + that manages this group. This is only used for preemptible instance groups. + properties: + instanceGroupManagerName: + description: Output only. The name of the Instance Group Manager for + this group. + readOnly: true + type: string + x-dcl-go-name: InstanceGroupManagerName + x-kubernetes-immutable: true + instanceTemplateName: + description: Output only. The name of the Instance Template used for + the Managed Instance Group. + readOnly: true + type: string + x-dcl-go-name: InstanceTemplateName + x-kubernetes-immutable: true + readOnly: true + type: object + x-dcl-go-name: ManagedGroupConfig + x-dcl-go-type: ClusterInstanceGroupConfigManagedGroupConfig + x-kubernetes-immutable: true + minCpuPlatform: + description: Optional. Specifies the minimum cpu platform for the Instance + Group. See (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + type: string + x-dcl-go-name: MinCpuPlatform + x-kubernetes-immutable: true + numInstances: + description: Optional. The number of VM instances in the instance group. + For master instance groups, must be set to 1. + format: int64 + type: integer + x-dcl-go-name: NumInstances + x-kubernetes-immutable: true + preemptibility: + description: 'Optional. Specifies the preemptibility of the instance group. + The default value for master and worker groups is `NON_PREEMPTIBLE`. This + default cannot be changed. The default value for secondary instances is + `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED, NON_PREEMPTIBLE, + PREEMPTIBLE' + enum: + - PREEMPTIBILITY_UNSPECIFIED + - NON_PREEMPTIBLE + - PREEMPTIBLE + type: string + x-dcl-go-name: Preemptibility + x-dcl-go-type: ClusterInstanceGroupConfigPreemptibilityEnum + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SecondaryWorkerConfig + x-dcl-go-type: ClusterInstanceGroupConfig + x-kubernetes-immutable: true + WorkflowTemplate: + properties: + createTime: + description: Output only. The time template was created. + format: date-time + readOnly: true + type: string + x-dcl-go-name: CreateTime + x-kubernetes-immutable: true + dagTimeout: + description: Optional. Timeout duration for the DAG of jobs. You can use + "s", "m", "h", and "d" suffixes for second, minute, hour, and day duration + values, respectively. The timeout duration must be from 10 minutes ("10m") + to 24 hours ("24h" or "1d"). The timer begins when the first job is submitted. + If the workflow is running at the end of the timeout period, any remaining + jobs are cancelled, the workflow is ended, and if the workflow was running + on a (/dataproc/docs/concepts/workflows/using-workflows#configuring_or_selecting_a_cluster), + the cluster is deleted. + type: string + x-dcl-go-name: DagTimeout + x-kubernetes-immutable: true + jobs: + description: Required. The Directed Acyclic Graph of Jobs to submit. + items: + properties: + hadoopJob: + description: Optional. Job is a Hadoop job. + properties: + archiveUris: + description: 'Optional. HCFS URIs of archives to be extracted + in the working directory of Hadoop drivers and tasks. Supported + file types: .jar, .tar, .tar.gz, .tgz, or .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ArchiveUris + x-dcl-list-type: list + x-kubernetes-immutable: true + args: + description: Optional. The arguments to pass to the driver. Do + not include arguments, such as `-libjars` or `-Dfoo=bar`, that + can be set as job properties, since a collision may occur that + causes an incorrect job submission. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Args + x-dcl-list-type: list + x-kubernetes-immutable: true + fileUris: + description: Optional. HCFS (Hadoop Compatible Filesystem) URIs + of files to be copied to the working directory of Hadoop drivers + and distributed tasks. Useful for naively parallel tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: FileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + jarFileUris: + description: Optional. Jar file URIs to add to the CLASSPATHs + of the Hadoop driver and tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsHadoopJobLoggingConfig + x-kubernetes-immutable: true + mainClass: + description: The name of the driver's main class. The jar file + containing the class must be in the default CLASSPATH or specified + in `jar_file_uris`. + type: string + x-dcl-go-name: MainClass + x-kubernetes-immutable: true + mainJarFileUri: + description: 'The HCFS URI of the jar file containing the main + class. Examples: ''gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'' + ''hdfs:/tmp/test-samples/custom-wordcount.jar'' ''file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar''' + type: string + x-dcl-go-name: MainJarFileUri + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure Hadoop. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/hadoop/conf/*-site and classes in user code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + type: object + x-dcl-go-name: HadoopJob + x-dcl-go-type: WorkflowTemplateJobsHadoopJob + x-kubernetes-immutable: true + hiveJob: + description: Optional. Job is a Hive job. + properties: + continueOnFailure: + description: Optional. Whether to continue executing queries if + a query fails. The default value is `false`. Setting to `true` + can be useful when executing independent parallel queries. + type: boolean + x-dcl-go-name: ContinueOnFailure + x-kubernetes-immutable: true + jarFileUris: + description: Optional. HCFS URIs of jar files to add to the CLASSPATH + of the Hive server and Hadoop MapReduce (MR) tasks. Can contain + Hive SerDes and UDFs. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names and values, + used to configure Hive. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, + and classes in user code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + queryFileUri: + description: The HCFS URI of the script that contains Hive queries. + type: string + x-dcl-go-name: QueryFileUri + x-kubernetes-immutable: true + queryList: + description: A list of queries. + properties: + queries: + description: 'Required. The queries to execute. You do not + need to end a query expression with a semicolon. Multiple + queries can be specified in one string by separating each + with a semicolon. Here is an example of a Dataproc API snippet + that uses a QueryList to specify a HiveJob: "hiveJob": { + "queryList": { "queries": } }' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Queries + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - queries + type: object + x-dcl-go-name: QueryList + x-dcl-go-type: WorkflowTemplateJobsHiveJobQueryList + x-kubernetes-immutable: true + scriptVariables: + additionalProperties: + type: string + description: 'Optional. Mapping of query variable names to values + (equivalent to the Hive command: `SET name="value";`).' + type: object + x-dcl-go-name: ScriptVariables + x-kubernetes-immutable: true + type: object + x-dcl-go-name: HiveJob + x-dcl-go-type: WorkflowTemplateJobsHiveJob + x-kubernetes-immutable: true + labels: + additionalProperties: + type: string + description: 'Optional. The labels to associate with this job. Label + keys must be between 1 and 63 characters long, and must conform + to the following regular expression: {0,63} No more than 32 labels + can be associated with a given job.' + type: object + x-dcl-go-name: Labels + x-kubernetes-immutable: true + pigJob: + description: Optional. Job is a Pig job. + properties: + continueOnFailure: + description: Optional. Whether to continue executing queries if + a query fails. The default value is `false`. Setting to `true` + can be useful when executing independent parallel queries. + type: boolean + x-dcl-go-name: ContinueOnFailure + x-kubernetes-immutable: true + jarFileUris: + description: Optional. HCFS URIs of jar files to add to the CLASSPATH + of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain + Pig UDFs. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsPigJobLoggingConfig + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure Pig. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, + and classes in user code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + queryFileUri: + description: The HCFS URI of the script that contains the Pig + queries. + type: string + x-dcl-go-name: QueryFileUri + x-kubernetes-immutable: true + queryList: + description: A list of queries. + properties: + queries: + description: 'Required. The queries to execute. You do not + need to end a query expression with a semicolon. Multiple + queries can be specified in one string by separating each + with a semicolon. Here is an example of a Dataproc API snippet + that uses a QueryList to specify a HiveJob: "hiveJob": { + "queryList": { "queries": } }' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Queries + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - queries + type: object + x-dcl-go-name: QueryList + x-dcl-go-type: WorkflowTemplateJobsPigJobQueryList + x-kubernetes-immutable: true + scriptVariables: + additionalProperties: + type: string + description: 'Optional. Mapping of query variable names to values + (equivalent to the Pig command: `name=`).' + type: object + x-dcl-go-name: ScriptVariables + x-kubernetes-immutable: true + type: object + x-dcl-go-name: PigJob + x-dcl-go-type: WorkflowTemplateJobsPigJob + x-kubernetes-immutable: true + prerequisiteStepIds: + description: Optional. The optional list of prerequisite job step_ids. + If not specified, the job will start at the beginning of workflow. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: PrerequisiteStepIds + x-dcl-list-type: list + x-kubernetes-immutable: true + prestoJob: + description: Optional. Job is a Presto job. + properties: + clientTags: + description: Optional. Presto client tags to attach to this query + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ClientTags + x-dcl-list-type: list + x-kubernetes-immutable: true + continueOnFailure: + description: Optional. Whether to continue executing queries if + a query fails. The default value is `false`. Setting to `true` + can be useful when executing independent parallel queries. + type: boolean + x-dcl-go-name: ContinueOnFailure + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsPrestoJobLoggingConfig + x-kubernetes-immutable: true + outputFormat: + description: Optional. The format in which query output will be + displayed. See the Presto documentation for supported output + formats + type: string + x-dcl-go-name: OutputFormat + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values. + Used to set Presto (https://prestodb.io/docs/current/sql/set-session.html) + Equivalent to using the --session flag in the Presto CLI + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + queryFileUri: + description: The HCFS URI of the script that contains SQL queries. + type: string + x-dcl-go-name: QueryFileUri + x-kubernetes-immutable: true + queryList: + description: A list of queries. + properties: + queries: + description: 'Required. The queries to execute. You do not + need to end a query expression with a semicolon. Multiple + queries can be specified in one string by separating each + with a semicolon. Here is an example of a Dataproc API snippet + that uses a QueryList to specify a HiveJob: "hiveJob": { + "queryList": { "queries": } }' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Queries + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - queries + type: object + x-dcl-go-name: QueryList + x-dcl-go-type: WorkflowTemplateJobsPrestoJobQueryList + x-kubernetes-immutable: true + type: object + x-dcl-go-name: PrestoJob + x-dcl-go-type: WorkflowTemplateJobsPrestoJob + x-kubernetes-immutable: true + pysparkJob: + description: Optional. Job is a PySpark job. + properties: + archiveUris: + description: 'Optional. HCFS URIs of archives to be extracted + into the working directory of each executor. Supported file + types: .jar, .tar, .tar.gz, .tgz, and .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ArchiveUris + x-dcl-list-type: list + x-kubernetes-immutable: true + args: + description: Optional. The arguments to pass to the driver. Do + not include arguments, such as `--conf`, that can be set as + job properties, since a collision may occur that causes an incorrect + job submission. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Args + x-dcl-list-type: list + x-kubernetes-immutable: true + fileUris: + description: Optional. HCFS URIs of files to be placed in the + working directory of each executor. Useful for naively parallel + tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: FileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + jarFileUris: + description: Optional. HCFS URIs of jar files to add to the CLASSPATHs + of the Python driver and tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsPysparkJobLoggingConfig + x-kubernetes-immutable: true + mainPythonFileUri: + description: Required. The HCFS URI of the main Python file to + use as the driver. Must be a .py file. + type: string + x-dcl-go-name: MainPythonFileUri + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure PySpark. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/spark/conf/spark-defaults.conf and classes in user + code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + pythonFileUris: + description: 'Optional. HCFS file URIs of Python files to pass + to the PySpark framework. Supported file types: .py, .egg, and + .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: PythonFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - mainPythonFileUri + type: object + x-dcl-go-name: PysparkJob + x-dcl-go-type: WorkflowTemplateJobsPysparkJob + x-kubernetes-immutable: true + scheduling: + description: Optional. Job scheduling configuration. + properties: + maxFailuresPerHour: + description: Optional. Maximum number of times per hour a driver + may be restarted as a result of driver exiting with non-zero + code before job is reported failed. A job may be reported as + thrashing if driver exits with non-zero code 4 times within + 10 minute window. Maximum value is 10. + format: int64 + type: integer + x-dcl-go-name: MaxFailuresPerHour + x-kubernetes-immutable: true + maxFailuresTotal: + description: Optional. Maximum number of times in total a driver + may be restarted as a result of driver exiting with non-zero + code before job is reported failed. Maximum value is 240 + format: int64 + type: integer + x-dcl-go-name: MaxFailuresTotal + x-kubernetes-immutable: true + type: object + x-dcl-go-name: Scheduling + x-dcl-go-type: WorkflowTemplateJobsScheduling + x-kubernetes-immutable: true + sparkJob: + description: Optional. Job is a Spark job. + properties: + archiveUris: + description: 'Optional. HCFS URIs of archives to be extracted + into the working directory of each executor. Supported file + types: .jar, .tar, .tar.gz, .tgz, and .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ArchiveUris + x-dcl-list-type: list + x-kubernetes-immutable: true + args: + description: Optional. The arguments to pass to the driver. Do + not include arguments, such as `--conf`, that can be set as + job properties, since a collision may occur that causes an incorrect + job submission. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Args + x-dcl-list-type: list + x-kubernetes-immutable: true + fileUris: + description: Optional. HCFS URIs of files to be placed in the + working directory of each executor. Useful for naively parallel + tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: FileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + jarFileUris: + description: Optional. HCFS URIs of jar files to add to the CLASSPATHs + of the Spark driver and tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsSparkJobLoggingConfig + x-kubernetes-immutable: true + mainClass: + description: The name of the driver's main class. The jar file + that contains the class must be in the default CLASSPATH or + specified in `jar_file_uris`. + type: string + x-dcl-go-name: MainClass + x-kubernetes-immutable: true + mainJarFileUri: + description: The HCFS URI of the jar file that contains the main + class. + type: string + x-dcl-go-name: MainJarFileUri + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure Spark. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/spark/conf/spark-defaults.conf and classes in user + code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SparkJob + x-dcl-go-type: WorkflowTemplateJobsSparkJob + x-kubernetes-immutable: true + sparkRJob: + description: Optional. Job is a SparkR job. + properties: + archiveUris: + description: 'Optional. HCFS URIs of archives to be extracted + into the working directory of each executor. Supported file + types: .jar, .tar, .tar.gz, .tgz, and .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ArchiveUris + x-dcl-list-type: list + x-kubernetes-immutable: true + args: + description: Optional. The arguments to pass to the driver. Do + not include arguments, such as `--conf`, that can be set as + job properties, since a collision may occur that causes an incorrect + job submission. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Args + x-dcl-list-type: list + x-kubernetes-immutable: true + fileUris: + description: Optional. HCFS URIs of files to be placed in the + working directory of each executor. Useful for naively parallel + tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: FileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsSparkRJobLoggingConfig + x-kubernetes-immutable: true + mainRFileUri: + description: Required. The HCFS URI of the main R file to use + as the driver. Must be a .R file. + type: string + x-dcl-go-name: MainRFileUri + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure SparkR. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/spark/conf/spark-defaults.conf and classes in user + code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + required: + - mainRFileUri + type: object + x-dcl-go-name: SparkRJob + x-dcl-go-type: WorkflowTemplateJobsSparkRJob + x-kubernetes-immutable: true + sparkSqlJob: + description: Optional. Job is a SparkSql job. + properties: + jarFileUris: + description: Optional. HCFS URIs of jar files to be added to the + Spark CLASSPATH. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsSparkSqlJobLoggingConfig + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure Spark SQL's SparkConf. Properties that conflict + with values set by the Dataproc API may be overwritten. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + queryFileUri: + description: The HCFS URI of the script that contains SQL queries. + type: string + x-dcl-go-name: QueryFileUri + x-kubernetes-immutable: true + queryList: + description: A list of queries. + properties: + queries: + description: 'Required. The queries to execute. You do not + need to end a query expression with a semicolon. Multiple + queries can be specified in one string by separating each + with a semicolon. Here is an example of a Dataproc API snippet + that uses a QueryList to specify a HiveJob: "hiveJob": { + "queryList": { "queries": } }' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Queries + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - queries + type: object + x-dcl-go-name: QueryList + x-dcl-go-type: WorkflowTemplateJobsSparkSqlJobQueryList + x-kubernetes-immutable: true + scriptVariables: + additionalProperties: + type: string + description: 'Optional. Mapping of query variable names to values + (equivalent to the Spark SQL command: SET `name="value";`).' + type: object + x-dcl-go-name: ScriptVariables + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SparkSqlJob + x-dcl-go-type: WorkflowTemplateJobsSparkSqlJob + x-kubernetes-immutable: true + stepId: + description: Required. The step id. The id must be unique among all + jobs within the template. The step id is used as prefix for job + id, as job `goog-dataproc-workflow-step-id` label, and in field + from other steps. The id must contain only letters (a-z, A-Z), numbers + (0-9), underscores (_), and hyphens (-). Cannot begin or end with + underscore or hyphen. Must consist of between 3 and 50 characters. + type: string + x-dcl-go-name: StepId + x-kubernetes-immutable: true + required: + - stepId + type: object + x-dcl-go-type: WorkflowTemplateJobs + type: array + x-dcl-go-name: Jobs + x-dcl-list-type: list + x-kubernetes-immutable: true + labels: + additionalProperties: + type: string + description: Optional. The labels to associate with this template. These + labels will be propagated to all jobs and clusters created by the workflow + instance. Label **keys** must contain 1 to 63 characters, and must conform + to (https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can + be associated with a template. + type: object + x-dcl-go-name: Labels + x-kubernetes-immutable: true + location: + description: The location for the resource + type: string + x-dcl-go-name: Location + x-kubernetes-immutable: true + name: + description: 'Output only. The resource name of the workflow template, as + described in https://cloud.google.com/apis/design/resource_names. * For + `projects.regions.workflowTemplates`, the resource name of the template + has the following format: `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}` + * For `projects.locations.workflowTemplates`, the resource name of the + template has the following format: `projects/{project_id}/locations/{location}/workflowTemplates/{template_id}`' + type: string + x-dcl-go-name: Name + x-kubernetes-immutable: true + parameters: + description: Optional. Template parameters whose values are substituted + into the template. Values for parameters must be provided when the template + is instantiated. + items: + properties: + description: + description: Optional. Brief description of the parameter. Must not + exceed 1024 characters. + type: string + x-dcl-go-name: Description + x-kubernetes-immutable: true + fields: + description: Required. Paths to all fields that the parameter replaces. + A field is allowed to appear in at most one parameter's list of + field paths. A field path is similar in syntax to a .sparkJob.args + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Fields + x-dcl-list-type: list + x-kubernetes-immutable: true + name: + description: Required. Parameter name. The parameter name is used + as the key, and paired with the parameter value, which are passed + to the template when the template is instantiated. The name must + contain only capital letters (A-Z), numbers (0-9), and underscores + (_), and must not start with a number. The maximum length is 40 + characters. + type: string + x-dcl-go-name: Name + x-kubernetes-immutable: true + validation: + description: Optional. Validation rules to be applied to this parameter's + value. + properties: + regex: + description: Validation based on regular expressions. + properties: + regexes: + description: Required. RE2 regular expressions used to validate + the parameter's value. The value must match the regex in + its entirety (substring matches are not sufficient). + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Regexes + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - regexes + type: object + x-dcl-go-name: Regex + x-dcl-go-type: WorkflowTemplateParametersValidationRegex + x-kubernetes-immutable: true + values: + description: Validation based on a list of allowed values. + properties: + values: + description: Required. List of allowed values for the parameter. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Values + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - values + type: object + x-dcl-go-name: Values + x-dcl-go-type: WorkflowTemplateParametersValidationValues + x-kubernetes-immutable: true + type: object + x-dcl-go-name: Validation + x-dcl-go-type: WorkflowTemplateParametersValidation + x-kubernetes-immutable: true + required: + - name + - fields + type: object + x-dcl-go-type: WorkflowTemplateParameters + type: array + x-dcl-go-name: Parameters + x-dcl-list-type: list + x-kubernetes-immutable: true + placement: + description: Required. WorkflowTemplate scheduling information. + properties: + clusterSelector: + description: Optional. A selector that chooses target cluster for jobs + based on metadata. The selector is evaluated at the time each job + is submitted. + properties: + clusterLabels: + additionalProperties: + type: string + description: Required. The cluster labels. Cluster must have all + labels to match. + type: object + x-dcl-go-name: ClusterLabels + x-kubernetes-immutable: true + zone: + description: Optional. The zone where workflow process executes. + This parameter does not affect the selection of the cluster. If + unspecified, the zone of the first cluster matching the selector + is used. + type: string + x-dcl-go-name: Zone + x-kubernetes-immutable: true + required: + - clusterLabels + type: object + x-dcl-go-name: ClusterSelector + x-dcl-go-type: WorkflowTemplatePlacementClusterSelector + x-kubernetes-immutable: true + managedCluster: + description: A cluster that is managed by the workflow. + properties: + clusterName: + description: Required. The cluster name prefix. A unique cluster + name will be formed by appending a random suffix. The name must + contain only lower-case letters (a-z), numbers (0-9), and hyphens + (-). Must begin with a letter. Cannot begin or end with hyphen. + Must consist of between 2 and 35 characters. + type: string + x-dcl-go-name: ClusterName + x-kubernetes-immutable: true + config: + $ref: '#/components/schemas/ClusterConfig' + x-dcl-go-name: Config + labels: + additionalProperties: + type: string + description: 'Optional. The labels to associate with this cluster. + Label keys must be between 1 and 63 characters long, and must + conform to the following PCRE regular expression: {0,63} No more + than 32 labels can be associated with a given cluster.' + type: object + x-dcl-go-name: Labels + x-kubernetes-immutable: true + required: + - clusterName + - config + type: object + x-dcl-go-name: ManagedCluster + x-dcl-go-type: WorkflowTemplatePlacementManagedCluster + x-kubernetes-immutable: true + type: object + x-dcl-go-name: Placement + x-dcl-go-type: WorkflowTemplatePlacement + x-kubernetes-immutable: true + project: + description: The project for the resource + type: string + x-dcl-go-name: Project + x-dcl-references: + - field: name + parent: true + resource: Cloudresourcemanager/Project + x-kubernetes-immutable: true + updateTime: + description: Output only. The time template was last updated. + format: date-time + readOnly: true + type: string + x-dcl-go-name: UpdateTime + x-kubernetes-immutable: true + version: + description: Optional. Used to perform a consistent read-modify-write. This + field should be left blank for a `CreateWorkflowTemplate` request. It + is required for an `UpdateWorkflowTemplate` request, and must match the + current server version. A typical update template flow would fetch the + current template with a `GetWorkflowTemplate` request, which will return + the current template with the `version` field filled in with the current + server version. The user updates other fields in the template, then returns + it as part of the `UpdateWorkflowTemplate` request. + format: int64 + type: integer + x-dcl-go-name: Version + x-kubernetes-immutable: true + required: + - name + - placement + - jobs + - project + - location + title: WorkflowTemplate + type: object + x-dcl-id: projects/{{project}}/locations/{{location}}/workflowTemplates/{{name}} + x-dcl-labels: labels + x-dcl-locations: [] + x-dcl-parent-container: project + x-dcl-uses-state-hint: false +info: + description: DCL Specification for the Dataproc WorkflowTemplate resource + title: Dataproc/WorkflowTemplate + x-dcl-has-iam: false +paths: + apply: + description: The function used to apply information about a WorkflowTemplate + parameters: + - description: A full instance of a WorkflowTemplate + name: WorkflowTemplate + required: true + delete: + description: The function used to delete a WorkflowTemplate + parameters: + - description: A full instance of a WorkflowTemplate + name: WorkflowTemplate + required: true + deleteAll: + description: The function used to delete all WorkflowTemplate + parameters: + - name: project + required: true + schema: + type: string + - name: location + required: true + schema: + type: string + get: + description: The function used to get information about a WorkflowTemplate + parameters: + - description: A full instance of a WorkflowTemplate + name: WorkflowTemplate + required: true + list: + description: The function used to list information about many WorkflowTemplate + parameters: + - name: project + required: true + schema: + type: string + - name: location + required: true + schema: + type: string \ No newline at end of file diff --git a/tpgtools/api/dataproc/workflow_template.yaml b/tpgtools/api/dataproc/workflow_template.yaml new file mode 100644 index 000000000000..2f800d776ba5 --- /dev/null +++ b/tpgtools/api/dataproc/workflow_template.yaml @@ -0,0 +1,1681 @@ +# Copyright 2021 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +components: + schemas: + ClusterConfig: + description: Required. The cluster configuration. + properties: + autoscalingConfig: + description: Optional. Autoscaling config for the policy associated with + the cluster. Cluster does not autoscale if this field is unset. + properties: + policy: + description: 'Optional. The autoscaling policy used by the cluster. + Only resource names including projectid and location (region) are + valid. Examples: * `https://www.googleapis.com/compute/v1/projects/` + Note that the policy must be in the same project and Dataproc region.' + type: string + x-dcl-go-name: Policy + x-dcl-references: + - field: name + resource: Dataproc/AutoscalingPolicy + x-kubernetes-immutable: true + type: object + x-dcl-go-name: AutoscalingConfig + x-dcl-go-type: ClusterClusterConfigAutoscalingConfig + x-kubernetes-immutable: true + encryptionConfig: + description: Optional. Encryption settings for the cluster. + properties: + gcePdKmsKeyName: + description: Optional. The Cloud KMS key name to use for PD disk encryption + for all instances in the cluster. + type: string + x-dcl-go-name: GcePdKmsKeyName + x-dcl-references: + - field: selfLink + resource: Cloudkms/CryptoKey + x-kubernetes-immutable: true + type: object + x-dcl-go-name: EncryptionConfig + x-dcl-go-type: ClusterClusterConfigEncryptionConfig + x-kubernetes-immutable: true + endpointConfig: + description: Optional. Port/endpoint configuration for this cluster + properties: + enableHttpPortAccess: + description: Optional. If true, enable http access to specific ports + on the cluster from external sources. Defaults to false. + type: boolean + x-dcl-go-name: EnableHttpPortAccess + x-kubernetes-immutable: true + httpPorts: + additionalProperties: + type: string + description: Output only. The map of port descriptions to URLs. Will + only be populated if enable_http_port_access is true. + readOnly: true + type: object + x-dcl-go-name: HttpPorts + x-kubernetes-immutable: true + type: object + x-dcl-go-name: EndpointConfig + x-dcl-go-type: ClusterClusterConfigEndpointConfig + x-kubernetes-immutable: true + gceClusterConfig: + description: Optional. The shared Compute Engine config settings for all + instances in a cluster. + properties: + internalIPOnly: + description: Optional. If true, all instances in the cluster will only + have internal IP addresses. By default, clusters are not restricted + to internal IP addresses, and will have ephemeral external IP addresses + assigned to each instance. This `internal_ip_only` restriction can + only be enabled for subnetwork enabled networks, and all off-cluster + dependencies must be configured to be accessible without external + IP addresses. + type: boolean + x-dcl-go-name: InternalIPOnly + x-kubernetes-immutable: true + metadata: + additionalProperties: + type: string + description: The Compute Engine metadata entries to add to all instances + (see (https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)). + type: object + x-dcl-go-name: Metadata + x-kubernetes-immutable: true + network: + description: Optional. The Compute Engine network to be used for machine + communications. Cannot be specified with subnetwork_uri. If neither + `network_uri` nor `subnetwork_uri` is specified, the "default" network + of the project is used, if it exists. Cannot be a "Custom Subnet Network" + (see /regions/global/default` * `default` + type: string + x-dcl-go-name: Network + x-dcl-references: + - field: selfLink + resource: Compute/Network + x-kubernetes-immutable: true + nodeGroupAffinity: + description: Optional. Node Group Affinity for sole-tenant clusters. + properties: + nodeGroup: + description: Required. The URI of a sole-tenant /zones/us-central1-a/nodeGroups/node-group-1` + * `node-group-1` + type: string + x-dcl-go-name: NodeGroup + x-dcl-references: + - field: selfLink + resource: Compute/NodeGroup + x-kubernetes-immutable: true + required: + - nodeGroup + type: object + x-dcl-go-name: NodeGroupAffinity + x-dcl-go-type: ClusterClusterConfigGceClusterConfigNodeGroupAffinity + x-kubernetes-immutable: true + privateIPv6GoogleAccess: + description: 'Optional. The type of IPv6 access for a cluster. Possible + values: PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED, INHERIT_FROM_SUBNETWORK, + OUTBOUND, BIDIRECTIONAL' + enum: + - PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED + - INHERIT_FROM_SUBNETWORK + - OUTBOUND + - BIDIRECTIONAL + type: string + x-dcl-go-name: PrivateIPv6GoogleAccess + x-dcl-go-type: ClusterClusterConfigGceClusterConfigPrivateIPv6GoogleAccessEnum + x-kubernetes-immutable: true + reservationAffinity: + description: Optional. Reservation Affinity for consuming Zonal reservation. + properties: + consumeReservationType: + description: 'Optional. Type of reservation to consume Possible + values: TYPE_UNSPECIFIED, NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION' + enum: + - TYPE_UNSPECIFIED + - NO_RESERVATION + - ANY_RESERVATION + - SPECIFIC_RESERVATION + type: string + x-dcl-go-name: ConsumeReservationType + x-dcl-go-type: ClusterClusterConfigGceClusterConfigReservationAffinityConsumeReservationTypeEnum + x-kubernetes-immutable: true + key: + description: Optional. Corresponds to the label key of reservation + resource. + type: string + x-dcl-go-name: Key + x-kubernetes-immutable: true + values: + description: Optional. Corresponds to the label values of reservation + resource. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Values + x-dcl-list-type: list + x-kubernetes-immutable: true + type: object + x-dcl-go-name: ReservationAffinity + x-dcl-go-type: ClusterClusterConfigGceClusterConfigReservationAffinity + x-kubernetes-immutable: true + serviceAccount: + description: Optional. The (https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) + is used. + type: string + x-dcl-go-name: ServiceAccount + x-dcl-references: + - field: email + resource: Iam/ServiceAccount + x-kubernetes-immutable: true + serviceAccountScopes: + description: 'Optional. The URIs of service account scopes to be included + in Compute Engine instances. The following base set of scopes is always + included: * https://www.googleapis.com/auth/cloud.useraccounts.readonly + * https://www.googleapis.com/auth/devstorage.read_write * https://www.googleapis.com/auth/logging.write + If no scopes are specified, the following defaults are also provided: + * https://www.googleapis.com/auth/bigquery * https://www.googleapis.com/auth/bigtable.admin.table + * https://www.googleapis.com/auth/bigtable.data * https://www.googleapis.com/auth/devstorage.full_control' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ServiceAccountScopes + x-dcl-list-type: list + x-kubernetes-immutable: true + subnetwork: + description: 'Optional. The Compute Engine subnetwork to be used for + machine communications. Cannot be specified with network_uri. A full + URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects//regions/us-east1/subnetworks/sub0` + * `sub0`' + type: string + x-dcl-go-name: Subnetwork + x-dcl-references: + - field: selfLink + resource: Compute/Subnetwork + x-kubernetes-immutable: true + tags: + description: The Compute Engine tags to add to all instances (see (https://cloud.google.com/compute/docs/label-or-tag-resources#tags)). + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Tags + x-dcl-list-type: set + zone: + description: 'Optional. The zone where the Compute Engine cluster will + be located. On a create request, it is required in the "global" region. + If omitted in a non-global Dataproc region, the service will pick + a zone in the corresponding Compute Engine region. On a get request, + zone will always be present. A full URL, partial URI, or short name + are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/` + * `us-central1-f`' + type: string + x-dcl-go-name: Zone + x-kubernetes-immutable: true + type: object + x-dcl-go-name: GceClusterConfig + x-dcl-go-type: ClusterClusterConfigGceClusterConfig + x-kubernetes-immutable: true + initializationActions: + description: 'Optional. Commands to execute on each node after config is + completed. By default, executables are run on master and all worker nodes. + You can test a node''s `role` metadata to run an executable on a master + or worker node, as shown below using `curl` (you can also use `wget`): + ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) + if ; then ... master specific actions ... else ... worker specific actions + ... fi' + items: + properties: + executableFile: + description: Required. Cloud Storage URI of executable file. + type: string + x-dcl-go-name: ExecutableFile + x-kubernetes-immutable: true + executionTimeout: + description: Optional. Amount of time executable has to complete. + Default is 10 minutes (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + Cluster creation fails with an explanatory error message (the name + of the executable that caused the error and the exceeded timeout + period) if the executable is not completed at end of the timeout + period. + type: string + x-dcl-go-name: ExecutionTimeout + x-kubernetes-immutable: true + type: object + x-dcl-go-type: ClusterClusterConfigInitializationActions + type: array + x-dcl-go-name: InitializationActions + x-dcl-list-type: list + x-kubernetes-immutable: true + lifecycleConfig: + description: Optional. Lifecycle setting for the cluster. + properties: + autoDeleteTime: + description: Optional. The time when cluster will be auto-deleted (see + JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + format: date-time + type: string + x-dcl-go-name: AutoDeleteTime + x-kubernetes-immutable: true + autoDeleteTtl: + description: Optional. The lifetime duration of cluster. The cluster + will be auto-deleted at the end of this period. Minimum value is 10 + minutes; maximum value is 14 days (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + type: string + x-dcl-go-name: AutoDeleteTtl + x-kubernetes-immutable: true + idleDeleteTtl: + description: Optional. The duration to keep the cluster alive while + idling (when no jobs are running). Passing this threshold will cause + the cluster to be deleted. Minimum value is 5 minutes; maximum value + is 14 days (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json). + type: string + x-dcl-go-name: IdleDeleteTtl + x-kubernetes-immutable: true + idleStartTime: + description: Output only. The time when cluster became idle (most recent + job finished) and became eligible for deletion due to idleness (see + JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + format: date-time + readOnly: true + type: string + x-dcl-go-name: IdleStartTime + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LifecycleConfig + x-dcl-go-type: ClusterClusterConfigLifecycleConfig + x-kubernetes-immutable: true + masterConfig: + $ref: '#/components/schemas/InstanceGroupConfig' + x-dcl-go-name: MasterConfig + secondaryWorkerConfig: + $ref: '#/components/schemas/InstanceGroupConfig' + x-dcl-go-name: SecondaryWorkerConfig + securityConfig: + description: Optional. Security settings for the cluster. + properties: + kerberosConfig: + description: Kerberos related configuration. + properties: + crossRealmTrustAdminServer: + description: Optional. The admin server (IP or hostname) for the + remote trusted realm in a cross realm trust relationship. + type: string + x-dcl-go-name: CrossRealmTrustAdminServer + x-kubernetes-immutable: true + crossRealmTrustKdc: + description: Optional. The KDC (IP or hostname) for the remote trusted + realm in a cross realm trust relationship. + type: string + x-dcl-go-name: CrossRealmTrustKdc + x-kubernetes-immutable: true + crossRealmTrustRealm: + description: Optional. The remote realm the Dataproc on-cluster + KDC will trust, should the user enable cross realm trust. + type: string + x-dcl-go-name: CrossRealmTrustRealm + x-kubernetes-immutable: true + crossRealmTrustSharedPassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the shared password between the on-cluster Kerberos + realm and the remote trusted realm, in a cross realm trust relationship. + type: string + x-dcl-go-name: CrossRealmTrustSharedPassword + x-kubernetes-immutable: true + enableKerberos: + description: 'Optional. Flag to indicate whether to Kerberize the + cluster (default: false). Set this field to true to enable Kerberos + on a cluster.' + type: boolean + x-dcl-go-name: EnableKerberos + x-kubernetes-immutable: true + kdcDbKey: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the master key of the KDC database. + type: string + x-dcl-go-name: KdcDbKey + x-kubernetes-immutable: true + keyPassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the password to the user provided key. For the + self-signed certificate, this password is generated by Dataproc. + type: string + x-dcl-go-name: KeyPassword + x-kubernetes-immutable: true + keystore: + description: Optional. The Cloud Storage URI of the keystore file + used for SSL encryption. If not provided, Dataproc will provide + a self-signed certificate. + type: string + x-dcl-go-name: Keystore + x-kubernetes-immutable: true + keystorePassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the password to the user provided keystore. For + the self-signed certificate, this password is generated by Dataproc. + type: string + x-dcl-go-name: KeystorePassword + x-kubernetes-immutable: true + kmsKey: + description: Optional. The uri of the KMS key used to encrypt various + sensitive files. + type: string + x-dcl-go-name: KmsKey + x-dcl-references: + - field: selfLink + resource: Cloudkms/CryptoKey + x-kubernetes-immutable: true + realm: + description: Optional. The name of the on-cluster Kerberos realm. + If not specified, the uppercased domain of hostnames will be the + realm. + type: string + x-dcl-go-name: Realm + x-kubernetes-immutable: true + rootPrincipalPassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the root principal password. + type: string + x-dcl-go-name: RootPrincipalPassword + x-kubernetes-immutable: true + tgtLifetimeHours: + description: Optional. The lifetime of the ticket granting ticket, + in hours. If not specified, or user specifies 0, then default + value 10 will be used. + format: int64 + type: integer + x-dcl-go-name: TgtLifetimeHours + x-kubernetes-immutable: true + truststore: + description: Optional. The Cloud Storage URI of the truststore file + used for SSL encryption. If not provided, Dataproc will provide + a self-signed certificate. + type: string + x-dcl-go-name: Truststore + x-kubernetes-immutable: true + truststorePassword: + description: Optional. The Cloud Storage URI of a KMS encrypted + file containing the password to the user provided truststore. + For the self-signed certificate, this password is generated by + Dataproc. + type: string + x-dcl-go-name: TruststorePassword + x-kubernetes-immutable: true + type: object + x-dcl-go-name: KerberosConfig + x-dcl-go-type: ClusterClusterConfigSecurityConfigKerberosConfig + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SecurityConfig + x-dcl-go-type: ClusterClusterConfigSecurityConfig + x-kubernetes-immutable: true + softwareConfig: + description: Optional. The config settings for software inside the cluster. + properties: + imageVersion: + description: Optional. The version of software inside the cluster. It + must be one of the supported (https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions). + If unspecified, it defaults to the latest Debian version. + type: string + x-dcl-go-name: ImageVersion + x-kubernetes-immutable: true + optionalComponents: + description: Optional. The set of components to activate on the cluster. + items: + type: string + x-dcl-go-type: ClusterClusterConfigSoftwareConfigOptionalComponentsEnum + type: array + x-dcl-go-name: OptionalComponents + x-dcl-list-type: list + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: 'Optional. The properties to set on daemon config files. + Property keys are specified in `prefix:property` format, for example + `core:hadoop.tmp.dir`. The following are supported prefixes and their + mappings: * capacity-scheduler: `capacity-scheduler.xml` * core: `core-site.xml` + * distcp: `distcp-default.xml` * hdfs: `hdfs-site.xml` * hive: `hive-site.xml` + * mapred: `mapred-site.xml` * pig: `pig.properties` * spark: `spark-defaults.conf` + * yarn: `yarn-site.xml` For more information, see (https://cloud.google.com/dataproc/docs/concepts/cluster-properties).' + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SoftwareConfig + x-dcl-go-type: ClusterClusterConfigSoftwareConfig + x-kubernetes-immutable: true + stagingBucket: + description: Optional. A Cloud Storage bucket used to stage job dependencies, + config files, and job driver console output. If you do not specify a staging + bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, + or EU) for your cluster's staging bucket according to the Compute Engine + zone where your cluster is deployed, and then create and manage this project-level, + per-location bucket (see (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). + type: string + x-dcl-go-name: StagingBucket + x-dcl-references: + - field: name + resource: Storage/Bucket + x-kubernetes-immutable: true + tempBucket: + description: Optional. A Cloud Storage bucket used to store ephemeral cluster + and jobs data, such as Spark and MapReduce history files. If you do not + specify a temp bucket, Dataproc will determine a Cloud Storage location + (US, ASIA, or EU) for your cluster's temp bucket according to the Compute + Engine zone where your cluster is deployed, and then create and manage + this project-level, per-location bucket. The default bucket has a TTL + of 90 days, but you can use any TTL (or none) if you specify a bucket. + type: string + x-dcl-go-name: TempBucket + x-dcl-references: + - field: name + resource: Storage/Bucket + x-kubernetes-immutable: true + workerConfig: + $ref: '#/components/schemas/InstanceGroupConfig' + x-dcl-go-name: WorkerConfig + type: object + x-dcl-go-name: Config + x-dcl-go-type: ClusterClusterConfig + x-kubernetes-immutable: true + InstanceGroupConfig: + description: Optional. The Compute Engine config settings for additional worker + instances in a cluster. + properties: + accelerators: + description: Optional. The Compute Engine accelerator configuration for + these instances. + items: + properties: + acceleratorCount: + description: The number of the accelerator cards of this type exposed + to this instance. + format: int64 + type: integer + x-dcl-go-name: AcceleratorCount + x-kubernetes-immutable: true + acceleratorType: + description: Full URL, partial URI, or short name of the accelerator + type resource to expose to this instance. See (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) + feature, you must use the short name of the accelerator type resource, + for example, `nvidia-tesla-k80`. + type: string + x-dcl-go-name: AcceleratorType + x-kubernetes-immutable: true + type: object + x-dcl-go-type: ClusterInstanceGroupConfigAccelerators + type: array + x-dcl-go-name: Accelerators + x-dcl-list-type: list + x-kubernetes-immutable: true + diskConfig: + description: Optional. Disk option config settings. + properties: + bootDiskSizeGb: + description: Optional. Size in GB of the boot disk (default is 500GB). + format: int64 + type: integer + x-dcl-go-name: BootDiskSizeGb + x-kubernetes-immutable: true + bootDiskType: + description: 'Optional. Type of the boot disk (default is "pd-standard"). + Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" + (Persistent Disk Hard Disk Drive).' + type: string + x-dcl-go-name: BootDiskType + x-kubernetes-immutable: true + numLocalSsds: + description: Optional. Number of attached SSDs, from 0 to 4 (default + is 0). If SSDs are not attached, the boot disk is used to store runtime + logs and (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) + data. If one or more SSDs are attached, this runtime bulk data is + spread across them, and the boot disk contains only basic config and + installed binaries. + format: int64 + type: integer + x-dcl-go-name: NumLocalSsds + x-kubernetes-immutable: true + type: object + x-dcl-go-name: DiskConfig + x-dcl-go-type: ClusterInstanceGroupConfigDiskConfig + x-kubernetes-immutable: true + image: + description: 'Optional. The Compute Engine image resource used for cluster + instances. The URI can represent an image or image family. Image examples: + * `https://www.googleapis.com/compute/beta/projects/` If the URI is unspecified, + it will be inferred from `SoftwareConfig.image_version` or the system + default.' + type: string + x-dcl-go-name: Image + x-dcl-references: + - field: selfLink + resource: Compute/Image + x-kubernetes-immutable: true + instanceNames: + description: Output only. The list of instance names. Dataproc derives the + names from `cluster_name`, `num_instances`, and the instance group. + items: + type: string + x-dcl-go-type: string + x-dcl-references: + - field: selfLink + resource: Compute/Instance + readOnly: true + type: array + x-dcl-go-name: InstanceNames + x-dcl-list-type: list + x-kubernetes-immutable: true + isPreemptible: + description: Output only. Specifies that this instance group contains preemptible + instances. + readOnly: true + type: boolean + x-dcl-go-name: IsPreemptible + x-kubernetes-immutable: true + machineType: + description: 'Optional. The Compute Engine machine type used for cluster + instances. A full URL, partial URI, or short name are valid. Examples: + * `https://www.googleapis.com/compute/v1/projects/(https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) + feature, you must use the short name of the machine type resource, for + example, `n1-standard-2`.' + type: string + x-dcl-go-name: MachineType + x-kubernetes-immutable: true + managedGroupConfig: + description: Output only. The config for Compute Engine Instance Group Manager + that manages this group. This is only used for preemptible instance groups. + properties: + instanceGroupManagerName: + description: Output only. The name of the Instance Group Manager for + this group. + readOnly: true + type: string + x-dcl-go-name: InstanceGroupManagerName + x-kubernetes-immutable: true + instanceTemplateName: + description: Output only. The name of the Instance Template used for + the Managed Instance Group. + readOnly: true + type: string + x-dcl-go-name: InstanceTemplateName + x-kubernetes-immutable: true + readOnly: true + type: object + x-dcl-go-name: ManagedGroupConfig + x-dcl-go-type: ClusterInstanceGroupConfigManagedGroupConfig + x-kubernetes-immutable: true + minCpuPlatform: + description: Optional. Specifies the minimum cpu platform for the Instance + Group. See (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + type: string + x-dcl-go-name: MinCpuPlatform + x-kubernetes-immutable: true + numInstances: + description: Optional. The number of VM instances in the instance group. + For master instance groups, must be set to 1. + format: int64 + type: integer + x-dcl-go-name: NumInstances + x-kubernetes-immutable: true + preemptibility: + description: 'Optional. Specifies the preemptibility of the instance group. + The default value for master and worker groups is `NON_PREEMPTIBLE`. This + default cannot be changed. The default value for secondary instances is + `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED, NON_PREEMPTIBLE, + PREEMPTIBLE' + enum: + - PREEMPTIBILITY_UNSPECIFIED + - NON_PREEMPTIBLE + - PREEMPTIBLE + type: string + x-dcl-go-name: Preemptibility + x-dcl-go-type: ClusterInstanceGroupConfigPreemptibilityEnum + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SecondaryWorkerConfig + x-dcl-go-type: ClusterInstanceGroupConfig + x-kubernetes-immutable: true + WorkflowTemplate: + properties: + createTime: + description: Output only. The time template was created. + format: date-time + readOnly: true + type: string + x-dcl-go-name: CreateTime + x-kubernetes-immutable: true + jobs: + description: Required. The Directed Acyclic Graph of Jobs to submit. + items: + properties: + hadoopJob: + description: Optional. Job is a Hadoop job. + properties: + archiveUris: + description: 'Optional. HCFS URIs of archives to be extracted + in the working directory of Hadoop drivers and tasks. Supported + file types: .jar, .tar, .tar.gz, .tgz, or .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ArchiveUris + x-dcl-list-type: list + x-kubernetes-immutable: true + args: + description: Optional. The arguments to pass to the driver. Do + not include arguments, such as `-libjars` or `-Dfoo=bar`, that + can be set as job properties, since a collision may occur that + causes an incorrect job submission. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Args + x-dcl-list-type: list + x-kubernetes-immutable: true + fileUris: + description: Optional. HCFS (Hadoop Compatible Filesystem) URIs + of files to be copied to the working directory of Hadoop drivers + and distributed tasks. Useful for naively parallel tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: FileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + jarFileUris: + description: Optional. Jar file URIs to add to the CLASSPATHs + of the Hadoop driver and tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsHadoopJobLoggingConfig + x-kubernetes-immutable: true + mainClass: + description: The name of the driver's main class. The jar file + containing the class must be in the default CLASSPATH or specified + in `jar_file_uris`. + type: string + x-dcl-go-name: MainClass + x-kubernetes-immutable: true + mainJarFileUri: + description: 'The HCFS URI of the jar file containing the main + class. Examples: ''gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'' + ''hdfs:/tmp/test-samples/custom-wordcount.jar'' ''file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar''' + type: string + x-dcl-go-name: MainJarFileUri + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure Hadoop. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/hadoop/conf/*-site and classes in user code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + type: object + x-dcl-go-name: HadoopJob + x-dcl-go-type: WorkflowTemplateJobsHadoopJob + x-kubernetes-immutable: true + hiveJob: + description: Optional. Job is a Hive job. + properties: + continueOnFailure: + description: Optional. Whether to continue executing queries if + a query fails. The default value is `false`. Setting to `true` + can be useful when executing independent parallel queries. + type: boolean + x-dcl-go-name: ContinueOnFailure + x-kubernetes-immutable: true + jarFileUris: + description: Optional. HCFS URIs of jar files to add to the CLASSPATH + of the Hive server and Hadoop MapReduce (MR) tasks. Can contain + Hive SerDes and UDFs. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names and values, + used to configure Hive. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, + and classes in user code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + queryFileUri: + description: The HCFS URI of the script that contains Hive queries. + type: string + x-dcl-go-name: QueryFileUri + x-kubernetes-immutable: true + queryList: + description: A list of queries. + properties: + queries: + description: 'Required. The queries to execute. You do not + need to end a query expression with a semicolon. Multiple + queries can be specified in one string by separating each + with a semicolon. Here is an example of a Dataproc API snippet + that uses a QueryList to specify a HiveJob: "hiveJob": { + "queryList": { "queries": } }' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Queries + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - queries + type: object + x-dcl-go-name: QueryList + x-dcl-go-type: WorkflowTemplateJobsHiveJobQueryList + x-kubernetes-immutable: true + scriptVariables: + additionalProperties: + type: string + description: 'Optional. Mapping of query variable names to values + (equivalent to the Hive command: `SET name="value";`).' + type: object + x-dcl-go-name: ScriptVariables + x-kubernetes-immutable: true + type: object + x-dcl-go-name: HiveJob + x-dcl-go-type: WorkflowTemplateJobsHiveJob + x-kubernetes-immutable: true + labels: + additionalProperties: + type: string + description: 'Optional. The labels to associate with this job. Label + keys must be between 1 and 63 characters long, and must conform + to the following regular expression: {0,63} No more than 32 labels + can be associated with a given job.' + type: object + x-dcl-go-name: Labels + x-kubernetes-immutable: true + pigJob: + description: Optional. Job is a Pig job. + properties: + continueOnFailure: + description: Optional. Whether to continue executing queries if + a query fails. The default value is `false`. Setting to `true` + can be useful when executing independent parallel queries. + type: boolean + x-dcl-go-name: ContinueOnFailure + x-kubernetes-immutable: true + jarFileUris: + description: Optional. HCFS URIs of jar files to add to the CLASSPATH + of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain + Pig UDFs. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsPigJobLoggingConfig + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure Pig. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, + and classes in user code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + queryFileUri: + description: The HCFS URI of the script that contains the Pig + queries. + type: string + x-dcl-go-name: QueryFileUri + x-kubernetes-immutable: true + queryList: + description: A list of queries. + properties: + queries: + description: 'Required. The queries to execute. You do not + need to end a query expression with a semicolon. Multiple + queries can be specified in one string by separating each + with a semicolon. Here is an example of a Dataproc API snippet + that uses a QueryList to specify a HiveJob: "hiveJob": { + "queryList": { "queries": } }' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Queries + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - queries + type: object + x-dcl-go-name: QueryList + x-dcl-go-type: WorkflowTemplateJobsPigJobQueryList + x-kubernetes-immutable: true + scriptVariables: + additionalProperties: + type: string + description: 'Optional. Mapping of query variable names to values + (equivalent to the Pig command: `name=`).' + type: object + x-dcl-go-name: ScriptVariables + x-kubernetes-immutable: true + type: object + x-dcl-go-name: PigJob + x-dcl-go-type: WorkflowTemplateJobsPigJob + x-kubernetes-immutable: true + prerequisiteStepIds: + description: Optional. The optional list of prerequisite job step_ids. + If not specified, the job will start at the beginning of workflow. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: PrerequisiteStepIds + x-dcl-list-type: list + x-kubernetes-immutable: true + prestoJob: + description: Optional. Job is a Presto job. + properties: + clientTags: + description: Optional. Presto client tags to attach to this query + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ClientTags + x-dcl-list-type: list + x-kubernetes-immutable: true + continueOnFailure: + description: Optional. Whether to continue executing queries if + a query fails. The default value is `false`. Setting to `true` + can be useful when executing independent parallel queries. + type: boolean + x-dcl-go-name: ContinueOnFailure + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsPrestoJobLoggingConfig + x-kubernetes-immutable: true + outputFormat: + description: Optional. The format in which query output will be + displayed. See the Presto documentation for supported output + formats + type: string + x-dcl-go-name: OutputFormat + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values. + Used to set Presto (https://prestodb.io/docs/current/sql/set-session.html) + Equivalent to using the --session flag in the Presto CLI + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + queryFileUri: + description: The HCFS URI of the script that contains SQL queries. + type: string + x-dcl-go-name: QueryFileUri + x-kubernetes-immutable: true + queryList: + description: A list of queries. + properties: + queries: + description: 'Required. The queries to execute. You do not + need to end a query expression with a semicolon. Multiple + queries can be specified in one string by separating each + with a semicolon. Here is an example of a Dataproc API snippet + that uses a QueryList to specify a HiveJob: "hiveJob": { + "queryList": { "queries": } }' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Queries + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - queries + type: object + x-dcl-go-name: QueryList + x-dcl-go-type: WorkflowTemplateJobsPrestoJobQueryList + x-kubernetes-immutable: true + type: object + x-dcl-go-name: PrestoJob + x-dcl-go-type: WorkflowTemplateJobsPrestoJob + x-kubernetes-immutable: true + pysparkJob: + description: Optional. Job is a PySpark job. + properties: + archiveUris: + description: 'Optional. HCFS URIs of archives to be extracted + into the working directory of each executor. Supported file + types: .jar, .tar, .tar.gz, .tgz, and .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ArchiveUris + x-dcl-list-type: list + x-kubernetes-immutable: true + args: + description: Optional. The arguments to pass to the driver. Do + not include arguments, such as `--conf`, that can be set as + job properties, since a collision may occur that causes an incorrect + job submission. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Args + x-dcl-list-type: list + x-kubernetes-immutable: true + fileUris: + description: Optional. HCFS URIs of files to be placed in the + working directory of each executor. Useful for naively parallel + tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: FileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + jarFileUris: + description: Optional. HCFS URIs of jar files to add to the CLASSPATHs + of the Python driver and tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsPysparkJobLoggingConfig + x-kubernetes-immutable: true + mainPythonFileUri: + description: Required. The HCFS URI of the main Python file to + use as the driver. Must be a .py file. + type: string + x-dcl-go-name: MainPythonFileUri + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure PySpark. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/spark/conf/spark-defaults.conf and classes in user + code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + pythonFileUris: + description: 'Optional. HCFS file URIs of Python files to pass + to the PySpark framework. Supported file types: .py, .egg, and + .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: PythonFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - mainPythonFileUri + type: object + x-dcl-go-name: PysparkJob + x-dcl-go-type: WorkflowTemplateJobsPysparkJob + x-kubernetes-immutable: true + scheduling: + description: Optional. Job scheduling configuration. + properties: + maxFailuresPerHour: + description: Optional. Maximum number of times per hour a driver + may be restarted as a result of driver exiting with non-zero + code before job is reported failed. A job may be reported as + thrashing if driver exits with non-zero code 4 times within + 10 minute window. Maximum value is 10. + format: int64 + type: integer + x-dcl-go-name: MaxFailuresPerHour + x-kubernetes-immutable: true + maxFailuresTotal: + description: Optional. Maximum number of times in total a driver + may be restarted as a result of driver exiting with non-zero + code before job is reported failed. Maximum value is 240 + format: int64 + type: integer + x-dcl-go-name: MaxFailuresTotal + x-kubernetes-immutable: true + type: object + x-dcl-go-name: Scheduling + x-dcl-go-type: WorkflowTemplateJobsScheduling + x-kubernetes-immutable: true + sparkJob: + description: Optional. Job is a Spark job. + properties: + archiveUris: + description: 'Optional. HCFS URIs of archives to be extracted + into the working directory of each executor. Supported file + types: .jar, .tar, .tar.gz, .tgz, and .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ArchiveUris + x-dcl-list-type: list + x-kubernetes-immutable: true + args: + description: Optional. The arguments to pass to the driver. Do + not include arguments, such as `--conf`, that can be set as + job properties, since a collision may occur that causes an incorrect + job submission. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Args + x-dcl-list-type: list + x-kubernetes-immutable: true + fileUris: + description: Optional. HCFS URIs of files to be placed in the + working directory of each executor. Useful for naively parallel + tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: FileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + jarFileUris: + description: Optional. HCFS URIs of jar files to add to the CLASSPATHs + of the Spark driver and tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsSparkJobLoggingConfig + x-kubernetes-immutable: true + mainClass: + description: The name of the driver's main class. The jar file + that contains the class must be in the default CLASSPATH or + specified in `jar_file_uris`. + type: string + x-dcl-go-name: MainClass + x-kubernetes-immutable: true + mainJarFileUri: + description: The HCFS URI of the jar file that contains the main + class. + type: string + x-dcl-go-name: MainJarFileUri + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure Spark. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/spark/conf/spark-defaults.conf and classes in user + code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SparkJob + x-dcl-go-type: WorkflowTemplateJobsSparkJob + x-kubernetes-immutable: true + sparkRJob: + description: Optional. Job is a SparkR job. + properties: + archiveUris: + description: 'Optional. HCFS URIs of archives to be extracted + into the working directory of each executor. Supported file + types: .jar, .tar, .tar.gz, .tgz, and .zip.' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: ArchiveUris + x-dcl-list-type: list + x-kubernetes-immutable: true + args: + description: Optional. The arguments to pass to the driver. Do + not include arguments, such as `--conf`, that can be set as + job properties, since a collision may occur that causes an incorrect + job submission. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Args + x-dcl-list-type: list + x-kubernetes-immutable: true + fileUris: + description: Optional. HCFS URIs of files to be placed in the + working directory of each executor. Useful for naively parallel + tasks. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: FileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsSparkRJobLoggingConfig + x-kubernetes-immutable: true + mainRFileUri: + description: Required. The HCFS URI of the main R file to use + as the driver. Must be a .R file. + type: string + x-dcl-go-name: MainRFileUri + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure SparkR. Properties that conflict with values + set by the Dataproc API may be overwritten. Can include properties + set in /etc/spark/conf/spark-defaults.conf and classes in user + code. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + required: + - mainRFileUri + type: object + x-dcl-go-name: SparkRJob + x-dcl-go-type: WorkflowTemplateJobsSparkRJob + x-kubernetes-immutable: true + sparkSqlJob: + description: Optional. Job is a SparkSql job. + properties: + jarFileUris: + description: Optional. HCFS URIs of jar files to be added to the + Spark CLASSPATH. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: JarFileUris + x-dcl-list-type: list + x-kubernetes-immutable: true + loggingConfig: + description: Optional. The runtime log config for job execution. + properties: + driverLogLevels: + additionalProperties: + type: string + description: 'The per-package log levels for the driver. This + may include "root" package name to configure rootLogger. + Examples: ''com.google = FATAL'', ''root = INFO'', ''org.apache + = DEBUG''' + type: object + x-dcl-go-name: DriverLogLevels + x-kubernetes-immutable: true + type: object + x-dcl-go-name: LoggingConfig + x-dcl-go-type: WorkflowTemplateJobsSparkSqlJobLoggingConfig + x-kubernetes-immutable: true + properties: + additionalProperties: + type: string + description: Optional. A mapping of property names to values, + used to configure Spark SQL's SparkConf. Properties that conflict + with values set by the Dataproc API may be overwritten. + type: object + x-dcl-go-name: Properties + x-kubernetes-immutable: true + queryFileUri: + description: The HCFS URI of the script that contains SQL queries. + type: string + x-dcl-go-name: QueryFileUri + x-kubernetes-immutable: true + queryList: + description: A list of queries. + properties: + queries: + description: 'Required. The queries to execute. You do not + need to end a query expression with a semicolon. Multiple + queries can be specified in one string by separating each + with a semicolon. Here is an example of a Dataproc API snippet + that uses a QueryList to specify a HiveJob: "hiveJob": { + "queryList": { "queries": } }' + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Queries + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - queries + type: object + x-dcl-go-name: QueryList + x-dcl-go-type: WorkflowTemplateJobsSparkSqlJobQueryList + x-kubernetes-immutable: true + scriptVariables: + additionalProperties: + type: string + description: 'Optional. Mapping of query variable names to values + (equivalent to the Spark SQL command: SET `name="value";`).' + type: object + x-dcl-go-name: ScriptVariables + x-kubernetes-immutable: true + type: object + x-dcl-go-name: SparkSqlJob + x-dcl-go-type: WorkflowTemplateJobsSparkSqlJob + x-kubernetes-immutable: true + stepId: + description: Required. The step id. The id must be unique among all + jobs within the template. The step id is used as prefix for job + id, as job `goog-dataproc-workflow-step-id` label, and in field + from other steps. The id must contain only letters (a-z, A-Z), numbers + (0-9), underscores (_), and hyphens (-). Cannot begin or end with + underscore or hyphen. Must consist of between 3 and 50 characters. + type: string + x-dcl-go-name: StepId + x-kubernetes-immutable: true + required: + - stepId + type: object + x-dcl-go-type: WorkflowTemplateJobs + type: array + x-dcl-go-name: Jobs + x-dcl-list-type: list + x-kubernetes-immutable: true + labels: + additionalProperties: + type: string + description: Optional. The labels to associate with this template. These + labels will be propagated to all jobs and clusters created by the workflow + instance. Label **keys** must contain 1 to 63 characters, and must conform + to (https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can + be associated with a template. + type: object + x-dcl-go-name: Labels + x-kubernetes-immutable: true + location: + description: The location for the resource + type: string + x-dcl-go-name: Location + x-kubernetes-immutable: true + name: + description: 'Output only. The resource name of the workflow template, as + described in https://cloud.google.com/apis/design/resource_names. * For + `projects.regions.workflowTemplates`, the resource name of the template + has the following format: `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}` + * For `projects.locations.workflowTemplates`, the resource name of the + template has the following format: `projects/{project_id}/locations/{location}/workflowTemplates/{template_id}`' + type: string + x-dcl-go-name: Name + x-kubernetes-immutable: true + parameters: + description: Optional. Template parameters whose values are substituted + into the template. Values for parameters must be provided when the template + is instantiated. + items: + properties: + description: + description: Optional. Brief description of the parameter. Must not + exceed 1024 characters. + type: string + x-dcl-go-name: Description + x-kubernetes-immutable: true + fields: + description: Required. Paths to all fields that the parameter replaces. + A field is allowed to appear in at most one parameter's list of + field paths. A field path is similar in syntax to a .sparkJob.args + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Fields + x-dcl-list-type: list + x-kubernetes-immutable: true + name: + description: Required. Parameter name. The parameter name is used + as the key, and paired with the parameter value, which are passed + to the template when the template is instantiated. The name must + contain only capital letters (A-Z), numbers (0-9), and underscores + (_), and must not start with a number. The maximum length is 40 + characters. + type: string + x-dcl-go-name: Name + x-kubernetes-immutable: true + validation: + description: Optional. Validation rules to be applied to this parameter's + value. + properties: + regex: + description: Validation based on regular expressions. + properties: + regexes: + description: Required. RE2 regular expressions used to validate + the parameter's value. The value must match the regex in + its entirety (substring matches are not sufficient). + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Regexes + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - regexes + type: object + x-dcl-go-name: Regex + x-dcl-go-type: WorkflowTemplateParametersValidationRegex + x-kubernetes-immutable: true + values: + description: Validation based on a list of allowed values. + properties: + values: + description: Required. List of allowed values for the parameter. + items: + type: string + x-dcl-go-type: string + type: array + x-dcl-go-name: Values + x-dcl-list-type: list + x-kubernetes-immutable: true + required: + - values + type: object + x-dcl-go-name: Values + x-dcl-go-type: WorkflowTemplateParametersValidationValues + x-kubernetes-immutable: true + type: object + x-dcl-go-name: Validation + x-dcl-go-type: WorkflowTemplateParametersValidation + x-kubernetes-immutable: true + required: + - name + - fields + type: object + x-dcl-go-type: WorkflowTemplateParameters + type: array + x-dcl-go-name: Parameters + x-dcl-list-type: list + x-kubernetes-immutable: true + placement: + description: Required. WorkflowTemplate scheduling information. + properties: + clusterSelector: + description: Optional. A selector that chooses target cluster for jobs + based on metadata. The selector is evaluated at the time each job + is submitted. + properties: + clusterLabels: + additionalProperties: + type: string + description: Required. The cluster labels. Cluster must have all + labels to match. + type: object + x-dcl-go-name: ClusterLabels + x-kubernetes-immutable: true + zone: + description: Optional. The zone where workflow process executes. + This parameter does not affect the selection of the cluster. If + unspecified, the zone of the first cluster matching the selector + is used. + type: string + x-dcl-go-name: Zone + x-kubernetes-immutable: true + required: + - clusterLabels + type: object + x-dcl-go-name: ClusterSelector + x-dcl-go-type: WorkflowTemplatePlacementClusterSelector + x-kubernetes-immutable: true + managedCluster: + description: A cluster that is managed by the workflow. + properties: + clusterName: + description: Required. The cluster name prefix. A unique cluster + name will be formed by appending a random suffix. The name must + contain only lower-case letters (a-z), numbers (0-9), and hyphens + (-). Must begin with a letter. Cannot begin or end with hyphen. + Must consist of between 2 and 35 characters. + type: string + x-dcl-go-name: ClusterName + x-kubernetes-immutable: true + config: + $ref: '#/components/schemas/ClusterConfig' + x-dcl-go-name: Config + labels: + additionalProperties: + type: string + description: 'Optional. The labels to associate with this cluster. + Label keys must be between 1 and 63 characters long, and must + conform to the following PCRE regular expression: {0,63} No more + than 32 labels can be associated with a given cluster.' + type: object + x-dcl-go-name: Labels + x-kubernetes-immutable: true + required: + - clusterName + - config + type: object + x-dcl-go-name: ManagedCluster + x-dcl-go-type: WorkflowTemplatePlacementManagedCluster + x-kubernetes-immutable: true + type: object + x-dcl-go-name: Placement + x-dcl-go-type: WorkflowTemplatePlacement + x-kubernetes-immutable: true + project: + description: The project for the resource + type: string + x-dcl-go-name: Project + x-dcl-references: + - field: name + parent: true + resource: Cloudresourcemanager/Project + x-kubernetes-immutable: true + updateTime: + description: Output only. The time template was last updated. + format: date-time + readOnly: true + type: string + x-dcl-go-name: UpdateTime + x-kubernetes-immutable: true + version: + description: Optional. Used to perform a consistent read-modify-write. This + field should be left blank for a `CreateWorkflowTemplate` request. It + is required for an `UpdateWorkflowTemplate` request, and must match the + current server version. A typical update template flow would fetch the + current template with a `GetWorkflowTemplate` request, which will return + the current template with the `version` field filled in with the current + server version. The user updates other fields in the template, then returns + it as part of the `UpdateWorkflowTemplate` request. + format: int64 + type: integer + x-dcl-go-name: Version + x-kubernetes-immutable: true + required: + - name + - placement + - jobs + - project + - location + title: WorkflowTemplate + type: object + x-dcl-id: projects/{{project}}/locations/{{location}}/workflowTemplates/{{name}} + x-dcl-labels: labels + x-dcl-locations: [] + x-dcl-parent-container: project + x-dcl-uses-state-hint: false +info: + description: DCL Specification for the Dataproc WorkflowTemplate resource + title: Dataproc/WorkflowTemplate + x-dcl-has-iam: false +paths: + apply: + description: The function used to apply information about a WorkflowTemplate + parameters: + - description: A full instance of a WorkflowTemplate + name: WorkflowTemplate + required: true + delete: + description: The function used to delete a WorkflowTemplate + parameters: + - description: A full instance of a WorkflowTemplate + name: WorkflowTemplate + required: true + deleteAll: + description: The function used to delete all WorkflowTemplate + parameters: + - name: project + required: true + schema: + type: string + - name: location + required: true + schema: + type: string + get: + description: The function used to get information about a WorkflowTemplate + parameters: + - description: A full instance of a WorkflowTemplate + name: WorkflowTemplate + required: true + list: + description: The function used to list information about many WorkflowTemplate + parameters: + - name: project + required: true + schema: + type: string + - name: location + required: true + schema: + type: string \ No newline at end of file diff --git a/tpgtools/property.go b/tpgtools/property.go index 3c6cd66d08a1..d090bee3546e 100644 --- a/tpgtools/property.go +++ b/tpgtools/property.go @@ -166,16 +166,33 @@ func (p Property) PackageJSONName() string { // PackagePath is the title-cased path of a type (relative to the resource) for // use in naming functions. For example, "MachineType" or "NodeConfigPreemptible". func (p Property) PackagePath() string { - if p.parent != nil { - return p.parent.PackagePath() + p.PackageName - } if p.ref != "" { + glog.Errorf("Returning ref : %v", p.ref) return p.ref } + if p.parent != nil { + return p.parent.PackagePath() + p.PackageName + } return p.PackageName } +func (p Property) ObjectType() string { + parent := p + // Look up chain to see if we are within a reference + // types within a reference should not use the parent resource's type + for { + if parent.ref != "" { + return p.PackagePath() + } + if parent.parent == nil { + break + } + parent = *parent.parent + } + return fmt.Sprintf("%s%s", p.resource.Type(), p.PackagePath()) +} + func (p Property) IsArray() bool { return (p.Type.String() == SchemaTypeList || p.Type.String() == SchemaTypeSet) && !p.Type.IsObject() } @@ -432,7 +449,9 @@ func createPropertiesFromSchema(schema *openapi.Schema, typeFetcher *TypeFetcher if parent == nil { identityFields = idParts(resource.ID) } + for k, v := range schema.Properties { + glog.Errorf("Creating : %v", k) ref := "" packageName := "" @@ -447,7 +466,11 @@ func createPropertiesFromSchema(schema *openapi.Schema, typeFetcher *TypeFetcher return nil, err } ref = typeFetcher.PackagePathForReference(ref, v.Extension["x-dcl-go-type"].(string)) + glog.Errorf("Resolved is : %v", v) + glog.Errorf("Resolved props are : %v", v.Properties) } + glog.Errorf("Ref is : %v", v.Ref) + // Sub-properties are referenced by name, and the explicit title value // won't be set initially. v.Title = k diff --git a/tpgtools/resource.go b/tpgtools/resource.go index eaa89aa06a8e..cce0c71dd64c 100644 --- a/tpgtools/resource.go +++ b/tpgtools/resource.go @@ -20,6 +20,7 @@ import ( "strings" "bitbucket.org/creachadair/stringset" + "github.com/golang/glog" "github.com/nasa9084/go-openapi" ) @@ -381,6 +382,7 @@ func createResource(schema *openapi.Schema, typeFetcher *TypeFetcher, overrides if err != nil { return nil, err } + glog.Errorf("Props are : %#v", props) res.Properties = props diff --git a/tpgtools/templates/resource.go.tmpl b/tpgtools/templates/resource.go.tmpl index 61d47e7ccfe6..36becd191eb7 100644 --- a/tpgtools/templates/resource.go.tmpl +++ b/tpgtools/templates/resource.go.tmpl @@ -456,8 +456,8 @@ func resource{{$.PathType}}Import(d *schema.ResourceData, meta interface{}) ([]* {{ range $v := .Objects -}} {{ if $v.Settable -}} {{ if $v.Collapsed -}} -func expand{{$.PathType}}{{$v.PackagePath}}Collapsed(d *schema.ResourceData) *{{$.Package}}.{{$.Type}}{{$v.PackagePath}} { - collapsed := {{$.Package}}.{{$.Type}}{{$v.PackagePath}}{ +func expand{{$.PathType}}{{$v.PackagePath}}Collapsed(d *schema.ResourceData) *{{$.Package}}.{{$v.ObjectType}} { + collapsed := {{$.Package}}.{{$v.ObjectType}}{ {{- range $p := $v.Properties }} {{- if and ($p.Settable) ($p.StateGetter) }} {{$p.PackageName}}: {{$p.StateGetter}}, @@ -465,7 +465,7 @@ func expand{{$.PathType}}{{$v.PackagePath}}Collapsed(d *schema.ResourceData) *{{ {{ end }} } // Return nil if empty - if ({{$.Package}}.{{$.Type}}{{$v.PackagePath}}{}) == collapsed { + if ({{$.Package}}.{{$v.ObjectType}}{}) == collapsed { return nil } return &collapsed @@ -473,7 +473,7 @@ func expand{{$.PathType}}{{$v.PackagePath}}Collapsed(d *schema.ResourceData) *{{ {{ else -}} {{ if $v.IsArray -}} -func expand{{$.PathType}}{{$v.PackagePath}}Array(o interface{}) []{{$.Package}}.{{$.Type}}{{$v.PackagePath}} { +func expand{{$.PathType}}{{$v.PackagePath}}Array(o interface{}) []{{$.Package}}.{{$v.ObjectType}} { if o == nil { return nil } @@ -487,7 +487,7 @@ func expand{{$.PathType}}{{$v.PackagePath}}Array(o interface{}) []{{$.Package}}. return nil } - items := make([]{{$.Package}}.{{$.Type}}{{$v.PackagePath}}, 0, len(objs)) + items := make([]{{$.Package}}.{{$v.ObjectType}}, 0, len(objs)) for _, item := range objs { i := expand{{$.PathType}}{{$v.PackagePath}}(item) items = append(items, *i) @@ -497,7 +497,7 @@ func expand{{$.PathType}}{{$v.PackagePath}}Array(o interface{}) []{{$.Package}}. } {{- end }} -func expand{{$.PathType}}{{$v.PackagePath}}(o interface{}) *{{$.Package}}.{{$.Type}}{{$v.PackagePath}} { +func expand{{$.PathType}}{{$v.PackagePath}}(o interface{}) *{{$.Package}}.{{$v.ObjectType}} { if o == nil { return nil } @@ -510,7 +510,7 @@ func expand{{$.PathType}}{{$v.PackagePath}}(o interface{}) *{{$.Package}}.{{$.Ty {{- else }} obj := o.(map[string]interface{}) {{- end }} - return &{{$.Package}}.{{$.Type}}{{$v.PackagePath}}{ + return &{{$.Package}}.{{$v.ObjectType}}{ {{- range $p := $v.Properties }} {{- if and ($p.Settable) ($p.ExpandGetter) }} {{$p.PackageName}}: {{$p.ExpandGetter}}, @@ -522,7 +522,7 @@ func expand{{$.PathType}}{{$v.PackagePath}}(o interface{}) *{{$.Package}}.{{$.Ty {{- end }} {{ if $v.IsArray -}} -func flatten{{$.PathType}}{{$v.PackagePath}}Array(objs []{{$.Package}}.{{$.Type}}{{$v.PackagePath}}) []interface{} { +func flatten{{$.PathType}}{{$v.PackagePath}}Array(objs []{{$.Package}}.{{$v.ObjectType}}) []interface{} { if objs == nil { return nil } @@ -537,7 +537,7 @@ func flatten{{$.PathType}}{{$v.PackagePath}}Array(objs []{{$.Package}}.{{$.Type} } {{- end }} -func flatten{{$.PathType}}{{$v.PackagePath}}(obj *{{$.Package}}.{{$.Type}}{{$v.PackagePath}}) interface{} { +func flatten{{$.PathType}}{{$v.PackagePath}}(obj *{{$.Package}}.{{$v.ObjectType}}) interface{} { if obj == nil { return nil } @@ -557,7 +557,7 @@ func flatten{{$.PathType}}{{$v.PackagePath}}(obj *{{$.Package}}.{{$.Type}}{{$v.P {{ end -}} {{ range $v := .EnumArrays -}} -func flatten{{$.PathType}}{{$v.PackagePath}}Array(obj []{{$.Package}}.{{$.Type}}{{$v.PackagePath}}Enum) interface{} { +func flatten{{$.PathType}}{{$v.PackagePath}}Array(obj []{{$.Package}}.{{$v.ObjectType}}Enum) interface{} { if obj == nil { return nil } @@ -568,11 +568,11 @@ func flatten{{$.PathType}}{{$v.PackagePath}}Array(obj []{{$.Package}}.{{$.Type}} return items } -func expand{{$.PathType}}{{$v.PackagePath}}Array(o interface{}) []{{$.Package}}.{{$.Type}}{{$v.PackagePath}}Enum { +func expand{{$.PathType}}{{$v.PackagePath}}Array(o interface{}) []{{$.Package}}.{{$v.ObjectType}}Enum { objs := o.([]interface{}) - items := make([]{{$.Package}}.{{$.Type}}{{$v.PackagePath}}Enum, 0, len(objs)) + items := make([]{{$.Package}}.{{$v.ObjectType}}Enum, 0, len(objs)) for _, item := range objs { - i := {{$.Package}}.{{$.Type}}{{$v.PackagePath}}EnumRef(item.(string)) + i := {{$.Package}}.{{$v.ObjectType}}EnumRef(item.(string)) items = append(items, *i) } return items From 78e176c1657689df509b0e13bc7c62c87aa79ecf Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Thu, 1 Apr 2021 15:16:04 -0700 Subject: [PATCH 07/11] Updating expanders --- mmv1/third_party/terraform/utils/config.go.erb | 5 +++++ tpgtools/handwritten/expanders.go | 17 +++++++++++++++++ tpgtools/ignored_handwritten/expanders.go | 14 -------------- 3 files changed, 22 insertions(+), 14 deletions(-) create mode 100644 tpgtools/handwritten/expanders.go diff --git a/mmv1/third_party/terraform/utils/config.go.erb b/mmv1/third_party/terraform/utils/config.go.erb index e7240b50ba6b..74532b0c84b5 100644 --- a/mmv1/third_party/terraform/utils/config.go.erb +++ b/mmv1/third_party/terraform/utils/config.go.erb @@ -64,6 +64,7 @@ import ( "google.golang.org/api/storagetransfer/v1" "google.golang.org/api/transport" dcl "github.com/GoogleCloudPlatform/declarative-resource-client-library/dcl" + dataprocDcl "github.com/GoogleCloudPlatform/declarative-resource-client-library/services/google/dataproc<% unless version == 'ga' -%>/beta<% end -%>" eventarcDcl "github.com/GoogleCloudPlatform/declarative-resource-client-library/services/google/eventarc<% unless version == 'ga' -%>/beta<% end -%>" ) @@ -121,6 +122,8 @@ type Config struct { // start DCL clients dclConfig *dcl.Config + clientDataprocDCL *dataprocDcl.Client +<% unless version == 'ga' -%> clientEventarcDCL *eventarcDcl.Client } @@ -186,6 +189,8 @@ func (c *Config) LoadAndValidate(ctx context.Context) error { // Start DCL client instantiation // TODO(slevenick): handle user agents c.dclConfig = dcl.NewConfig(dcl.WithHTTPClient(client), dcl.WithUserAgent(c.userAgent), dcl.WithLogger(dclLogger{})) + c.clientDataprocDCL = dataprocDcl.NewClient(c.dclConfig) +<% unless version == 'ga' -%> c.clientEventarcDCL = eventarcDcl.NewClient(c.dclConfig) return nil diff --git a/tpgtools/handwritten/expanders.go b/tpgtools/handwritten/expanders.go new file mode 100644 index 000000000000..c84756ba8c51 --- /dev/null +++ b/tpgtools/handwritten/expanders.go @@ -0,0 +1,17 @@ +package google + +import "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" + +func expandStringArray(v interface{}) []string { + arr, ok := v.([]string) + + if ok { + return arr + } + + if arr, ok := v.(*schema.Set); ok { + return convertStringSet(arr) + } + + return convertStringArr(v.([]interface{})) +} diff --git a/tpgtools/ignored_handwritten/expanders.go b/tpgtools/ignored_handwritten/expanders.go index 976e7384b66c..66ff178cf770 100644 --- a/tpgtools/ignored_handwritten/expanders.go +++ b/tpgtools/ignored_handwritten/expanders.go @@ -2,20 +2,6 @@ package google import "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" -func expandStringArray(v interface{}) []string { - arr, ok := v.([]string) - - if ok { - return arr - } - - if arr, ok := v.(*schema.Set); ok { - return convertStringSet(arr) - } - - return convertStringArr(v.([]interface{})) -} - func expandComputeRouteNextHopInstance(v interface{}, d TerraformResourceData, config *Config) *string { if v == "" { return nil From 7189b1711b0e3fb0902043b25b4a2f9407fdbe35 Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Fri, 2 Apr 2021 11:56:17 -0700 Subject: [PATCH 08/11] Updates to prop, resource to handle refs --- ...rce_dataproc_workflow_template_test.go.erb | 19 ++++++++++--------- .../api/dataproc/beta/workflow_template.yaml | 9 --------- tpgtools/api/dataproc/workflow_template.yaml | 9 --------- tpgtools/property.go | 7 +------ tpgtools/resource.go | 2 -- 5 files changed, 11 insertions(+), 35 deletions(-) diff --git a/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb b/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb index 5f4fa6776cd2..1441d04cfb2e 100644 --- a/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb +++ b/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb @@ -54,18 +54,19 @@ resource "google_dataproc_workflow_template" "template" { num_instances = 1 machine_type = "n1-standard-1" disk_config { - disk_boot_type = "pd-ssd" - disk_boot_size_gb = 15 + boot_disk_type = "pd-ssd" + boot_disk_size_gb = 15 } } worker_config { num_instances = 3 machine_type = "n1-standard-2" + disk_config { + boot_disk_size_gb = 10 + num_local_ssds = 2 + } } - disk_config { - boot_disk_size_gb = 10 - num_local_ssds = 2 - } + secondary_worker_config { num_instances = 2 } @@ -77,14 +78,14 @@ resource "google_dataproc_workflow_template" "template" { } jobs { step_id = "someJob" - spark_job = { + spark_job { main_class = "SomeClass" } } jobs { step_id = "otherJob" - prerequisite_job_steps = ["someJob"] - presto_job = { + prerequisite_step_ids = ["someJob"] + presto_job { query_file_uri = "someuri" } } diff --git a/tpgtools/api/dataproc/beta/workflow_template.yaml b/tpgtools/api/dataproc/beta/workflow_template.yaml index 240360c298e4..d20344dfc328 100644 --- a/tpgtools/api/dataproc/beta/workflow_template.yaml +++ b/tpgtools/api/dataproc/beta/workflow_template.yaml @@ -485,15 +485,6 @@ components: type: string x-dcl-go-name: ImageVersion x-kubernetes-immutable: true - optionalComponents: - description: Optional. The set of components to activate on the cluster. - items: - type: string - x-dcl-go-type: ClusterClusterConfigSoftwareConfigOptionalComponentsEnum - type: array - x-dcl-go-name: OptionalComponents - x-dcl-list-type: list - x-kubernetes-immutable: true properties: additionalProperties: type: string diff --git a/tpgtools/api/dataproc/workflow_template.yaml b/tpgtools/api/dataproc/workflow_template.yaml index 2f800d776ba5..deb5b6e9425f 100644 --- a/tpgtools/api/dataproc/workflow_template.yaml +++ b/tpgtools/api/dataproc/workflow_template.yaml @@ -435,15 +435,6 @@ components: type: string x-dcl-go-name: ImageVersion x-kubernetes-immutable: true - optionalComponents: - description: Optional. The set of components to activate on the cluster. - items: - type: string - x-dcl-go-type: ClusterClusterConfigSoftwareConfigOptionalComponentsEnum - type: array - x-dcl-go-name: OptionalComponents - x-dcl-list-type: list - x-kubernetes-immutable: true properties: additionalProperties: type: string diff --git a/tpgtools/property.go b/tpgtools/property.go index d090bee3546e..95b5bbaf3537 100644 --- a/tpgtools/property.go +++ b/tpgtools/property.go @@ -167,7 +167,6 @@ func (p Property) PackageJSONName() string { // use in naming functions. For example, "MachineType" or "NodeConfigPreemptible". func (p Property) PackagePath() string { if p.ref != "" { - glog.Errorf("Returning ref : %v", p.ref) return p.ref } if p.parent != nil { @@ -228,7 +227,7 @@ func buildGetter(p Property, rawGetter string) string { return fmt.Sprintf("dcl.Bool(%s.(bool))", rawGetter) case SchemaTypeString: if p.Type.IsEnum() { - return fmt.Sprintf("%s.%s%sEnumRef(%s.(string))", p.resource.Package, p.resource.Type(), p.PackagePath(), rawGetter) + return fmt.Sprintf("%s.%sEnumRef(%s.(string))", p.resource.Package, p.ObjectType(), rawGetter) } if p.sendEmpty { return fmt.Sprintf("dcl.String(%s.(string))", rawGetter) @@ -451,7 +450,6 @@ func createPropertiesFromSchema(schema *openapi.Schema, typeFetcher *TypeFetcher } for k, v := range schema.Properties { - glog.Errorf("Creating : %v", k) ref := "" packageName := "" @@ -466,10 +464,7 @@ func createPropertiesFromSchema(schema *openapi.Schema, typeFetcher *TypeFetcher return nil, err } ref = typeFetcher.PackagePathForReference(ref, v.Extension["x-dcl-go-type"].(string)) - glog.Errorf("Resolved is : %v", v) - glog.Errorf("Resolved props are : %v", v.Properties) } - glog.Errorf("Ref is : %v", v.Ref) // Sub-properties are referenced by name, and the explicit title value // won't be set initially. diff --git a/tpgtools/resource.go b/tpgtools/resource.go index cce0c71dd64c..eaa89aa06a8e 100644 --- a/tpgtools/resource.go +++ b/tpgtools/resource.go @@ -20,7 +20,6 @@ import ( "strings" "bitbucket.org/creachadair/stringset" - "github.com/golang/glog" "github.com/nasa9084/go-openapi" ) @@ -382,7 +381,6 @@ func createResource(schema *openapi.Schema, typeFetcher *TypeFetcher, overrides if err != nil { return nil, err } - glog.Errorf("Props are : %#v", props) res.Properties = props From f2ede25e83f793f11bdde2658618a58a54061a8b Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Tue, 6 Apr 2021 16:14:24 -0700 Subject: [PATCH 09/11] Add support for workflow in GA --- .../tests/resource_dataproc_workflow_template_test.go.erb | 2 +- mmv1/third_party/terraform/utils/provider.go.erb | 1 - tpgtools/api/dataproc/beta/workflow_template.yaml | 3 +++ tpgtools/api/dataproc/workflow_template.yaml | 3 +++ 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb b/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb index 1441d04cfb2e..876a784968fa 100644 --- a/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb +++ b/mmv1/third_party/terraform/tests/resource_dataproc_workflow_template_test.go.erb @@ -20,7 +20,7 @@ func TestAccDataprocWorkflowTemplate_basic(t *testing.T) { vcrTest(t, resource.TestCase{ PreCheck: func() { testAccPreCheck(t) }, Providers: testAccProviders, - CheckDestroy: funcAccTestEventarcTriggerCheckDestroy(t), + CheckDestroy: funcAccTestDataprocWorkflowTemplateCheckDestroy(t), ExternalProviders: map[string]resource.ExternalProvider{ "random": {}, }, diff --git a/mmv1/third_party/terraform/utils/provider.go.erb b/mmv1/third_party/terraform/utils/provider.go.erb index de7f0c715216..459d08143180 100644 --- a/mmv1/third_party/terraform/utils/provider.go.erb +++ b/mmv1/third_party/terraform/utils/provider.go.erb @@ -365,7 +365,6 @@ end # products.each do "google_dataproc_cluster": resourceDataprocCluster(), "google_dataproc_job": resourceDataprocJob(), "google_dataproc_workflow_template": resourceDataprocWorkflowTemplate(), - "google_dns_record_set": resourceDnsRecordSet(), "google_endpoints_service": resourceEndpointsService(), "google_eventarc_trigger": resourceEventarcTrigger(), "google_folder": resourceGoogleFolder(), diff --git a/tpgtools/api/dataproc/beta/workflow_template.yaml b/tpgtools/api/dataproc/beta/workflow_template.yaml index d20344dfc328..6396e7cc0a76 100644 --- a/tpgtools/api/dataproc/beta/workflow_template.yaml +++ b/tpgtools/api/dataproc/beta/workflow_template.yaml @@ -88,6 +88,7 @@ components: type: boolean x-dcl-go-name: InternalIPOnly x-kubernetes-immutable: true + x-dcl-server-default: true metadata: additionalProperties: type: string @@ -593,6 +594,7 @@ components: type: integer x-dcl-go-name: NumLocalSsds x-kubernetes-immutable: true + x-dcl-server-default: true type: object x-dcl-go-name: DiskConfig x-dcl-go-type: ClusterInstanceGroupConfigDiskConfig @@ -1674,6 +1676,7 @@ components: type: integer x-dcl-go-name: Version x-kubernetes-immutable: true + x-dcl-server-default: true required: - name - placement diff --git a/tpgtools/api/dataproc/workflow_template.yaml b/tpgtools/api/dataproc/workflow_template.yaml index deb5b6e9425f..e8b91e74891c 100644 --- a/tpgtools/api/dataproc/workflow_template.yaml +++ b/tpgtools/api/dataproc/workflow_template.yaml @@ -88,6 +88,7 @@ components: type: boolean x-dcl-go-name: InternalIPOnly x-kubernetes-immutable: true + x-dcl-server-default: true metadata: additionalProperties: type: string @@ -543,6 +544,7 @@ components: type: integer x-dcl-go-name: NumLocalSsds x-kubernetes-immutable: true + x-dcl-server-default: true type: object x-dcl-go-name: DiskConfig x-dcl-go-type: ClusterInstanceGroupConfigDiskConfig @@ -1612,6 +1614,7 @@ components: type: integer x-dcl-go-name: Version x-kubernetes-immutable: true + x-dcl-server-default: true required: - name - placement From fc1126749a8e781b01cd856bce11981bd6131d1f Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Tue, 13 Apr 2021 12:23:29 -0700 Subject: [PATCH 10/11] fix merge issues --- .../resource_eventarc_trigger_test.go.erb | 210 ------------------ .../third_party/terraform/utils/config.go.erb | 2 - .../docs/r/eventarc_trigger.html.markdown | 15 +- 3 files changed, 1 insertion(+), 226 deletions(-) delete mode 100644 mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb diff --git a/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb b/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb deleted file mode 100644 index a62423354219..000000000000 --- a/mmv1/third_party/terraform/tests/resource_eventarc_trigger_test.go.erb +++ /dev/null @@ -1,210 +0,0 @@ -<% autogen_exception -%> -package google -<% unless version == 'ga' -%> -import ( - "testing" - - "github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource" -) - -func TestAccEventarcTrigger_basic(t *testing.T) { - // DCL currently fails due to transport modification - skipIfVcr(t) - t.Parallel() - - context := map[string]interface{}{ - "random_suffix": randString(t, 10), - "project": getTestProjectFromEnv(), - } - - vcrTest(t, resource.TestCase{ - PreCheck: func() { testAccPreCheck(t) }, - Providers: testAccProvidersOiCS, - CheckDestroy: funcAccTestEventarcTriggerCheckDestroy(t), - Steps: []resource.TestStep{ - { - Config: testAccEventarcTrigger_basic(context), - }, - { - ImportState: true, - ImportStateVerify: true, - ResourceName: "google_eventarc_trigger.trigger", - }, - }, - }) -} - -func TestAccEventarcTrigger_transport(t *testing.T) { - // DCL currently fails due to transport modification - skipIfVcr(t) - t.Parallel() - - context := map[string]interface{}{ - "random_suffix": randString(t, 10), - "project": getTestProjectFromEnv(), - } - - vcrTest(t, resource.TestCase{ - PreCheck: func() { testAccPreCheck(t) }, - Providers: testAccProvidersOiCS, - CheckDestroy: funcAccTestEventarcTriggerCheckDestroy(t), - ExternalProviders: map[string]resource.ExternalProvider{ - "random": {}, - }, - Steps: []resource.TestStep{ - { - Config: testAccEventarcTrigger_transport(context), - }, - { - ImportState: true, - ImportStateVerify: true, - ResourceName: "google_eventarc_trigger.trigger", - }, - }, - }) -} - -func testAccEventarcTrigger_basic(context map[string]interface{}) string { - return Nprintf(` -resource "google_eventarc_trigger" "trigger" { - provider = google-beta - name = "trigger%{random_suffix}" - location = "us-central1" - matching_criteria { - attribute = "type" - value = "google.cloud.pubsub.topic.v1.messagePublished" - } - destination { - cloud_run_service { - service = google_cloud_run_service.default.name - region = "us-central1" - } - } - labels = { - foo = "bar" - } -} - -resource "google_pubsub_topic" "foo" { - provider = google-beta - name = "topic%{random_suffix}" -} - -resource "google_cloud_run_service" "default" { - provider = google-beta - name = "service-eventarc%{random_suffix}" - location = "us-central1" - - metadata { - namespace = "%{project}" - } - - template { - spec { - containers { - image = "gcr.io/cloudrun/hello" - args = ["arrgs"] - } - container_concurrency = 50 - } - } - - traffic { - percent = 100 - latest_revision = true - } -} -`, context) -} - -func testAccEventarcTrigger_transport(context map[string]interface{}) string { - return Nprintf(` -resource "google_eventarc_trigger" "trigger" { - provider = google-beta - name = "trigger%{random_suffix}" - location = "us-central1" - matching_criteria { - attribute = "type" - value = "google.cloud.pubsub.topic.v1.messagePublished" - } - destination { - cloud_run_service { - service = google_cloud_run_service.default.name - region = "us-central1" - } - } - transport { - pubsub { - topic = google_pubsub_topic.foo.id - } - } -} - -resource "google_pubsub_topic" "foo" { - provider = google-beta - name = "topic%{random_suffix}" -} - -resource "google_cloud_run_service" "default" { - provider = google-beta - name = "service-eventarc%{random_suffix}" - location = "us-central1" - - metadata { - namespace = "%{project}" - } - - template { - spec { - containers { - image = "gcr.io/cloudrun/hello" - args = ["arrgs"] - } - container_concurrency = 50 - } - } - - traffic { - percent = 100 - latest_revision = true - } -} -`, context) -} - -func funcAccTestEventarcTriggerCheckDestroy(t *testing.T) func(s *terraform.State) error { - return func(s *terraform.State) error { - for name, rs := range s.RootModule().Resources { - if rs.Type != "google_eventarc_trigger" { - continue - } - if strings.HasPrefix(name, "data.") { - continue - } - - config := googleProviderConfig(t) - - url, err := replaceVarsForTest(config, rs, "{{EventarcBasePath}}projects/{{project}}/locations/{{location}}/triggers/{{name}}") - if err != nil { - return err - } - - billingProject := "" - - if config.BillingProject != "" { - billingProject = config.BillingProject - } - - _, err = sendRequest(config, "GET", billingProject, url, config.userAgent, nil) - if err == nil { - return fmt.Errorf("EventarcTrigger still exists at %s", url) - } - } - - return nil - } -} - -<% else %> -// Magic Modules doesn't let us remove files - blank out beta-only common-compile files for now. -<% end -%> diff --git a/mmv1/third_party/terraform/utils/config.go.erb b/mmv1/third_party/terraform/utils/config.go.erb index 74532b0c84b5..a8c4e24a20b1 100644 --- a/mmv1/third_party/terraform/utils/config.go.erb +++ b/mmv1/third_party/terraform/utils/config.go.erb @@ -123,7 +123,6 @@ type Config struct { // start DCL clients dclConfig *dcl.Config clientDataprocDCL *dataprocDcl.Client -<% unless version == 'ga' -%> clientEventarcDCL *eventarcDcl.Client } @@ -190,7 +189,6 @@ func (c *Config) LoadAndValidate(ctx context.Context) error { // TODO(slevenick): handle user agents c.dclConfig = dcl.NewConfig(dcl.WithHTTPClient(client), dcl.WithUserAgent(c.userAgent), dcl.WithLogger(dclLogger{})) c.clientDataprocDCL = dataprocDcl.NewClient(c.dclConfig) -<% unless version == 'ga' -%> c.clientEventarcDCL = eventarcDcl.NewClient(c.dclConfig) return nil diff --git a/mmv1/third_party/terraform/website/docs/r/eventarc_trigger.html.markdown b/mmv1/third_party/terraform/website/docs/r/eventarc_trigger.html.markdown index 4221aeeef3c1..7aebecde9180 100644 --- a/mmv1/third_party/terraform/website/docs/r/eventarc_trigger.html.markdown +++ b/mmv1/third_party/terraform/website/docs/r/eventarc_trigger.html.markdown @@ -89,7 +89,7 @@ The following arguments are supported: * `matching_criteria` - (Required) - Required. null The criteria by which events are filtered. Only events that match with this criteria will be sent to the destination. + Required. The criteria by which events are filtered. Only events that match with this criteria will be sent to the destination. * `name` - (Required) @@ -144,19 +144,6 @@ The `cloud_run_service` block supports: * `region` - (Optional) Required. The region the Cloud Run service is deployed in. - The `transport` block supports: - -* `pubsub` - - (Optional) - The Pub/Sub topic and subscription used by Eventarc as delivery intermediary. - The `pubsub` block supports: - -* `topic` - - (Optional) - Optional. The name of the Pub/Sub topic created and managed by Eventarc system as a transport for the event delivery. Format: `projects/{PROJECT_ID}/topics/{TOPIC_NAME}`. You may set an existing topic for triggers of the type `google.cloud.pubsub.topic.v1.messagePublished` only. The topic you provide here will not be deleted by Eventarc at trigger deletion. - -* `subscription` - - Output only. The name of the Pub/Sub subscription created and managed by Eventarc system as a transport for the event delivery. Format: `projects/{PROJECT_ID}/subscriptions/{SUBSCRIPTION_NAME}`. The `transport` block supports: From 0c5ed1308d442995f82d093561e0a0732ad5b677 Mon Sep 17 00:00:00 2001 From: Sam Levenick Date: Wed, 14 Apr 2021 11:49:16 -0700 Subject: [PATCH 11/11] Add template markdown --- .../dataproc_workflow_template.html.markdown | 895 ++++++++++++++++++ 1 file changed, 895 insertions(+) create mode 100644 mmv1/third_party/terraform/website/docs/r/dataproc_workflow_template.html.markdown diff --git a/mmv1/third_party/terraform/website/docs/r/dataproc_workflow_template.html.markdown b/mmv1/third_party/terraform/website/docs/r/dataproc_workflow_template.html.markdown new file mode 100644 index 000000000000..e725a51818b3 --- /dev/null +++ b/mmv1/third_party/terraform/website/docs/r/dataproc_workflow_template.html.markdown @@ -0,0 +1,895 @@ +--- +# ---------------------------------------------------------------------------- +# +# *** AUTO GENERATED CODE *** AUTO GENERATED CODE *** +# +# ---------------------------------------------------------------------------- +# +# This file is automatically generated by Magic Modules and manual +# changes will be clobbered when the file is regenerated. +# +# Please read more about how to change this file in +# .github/CONTRIBUTING.md. +# +# ---------------------------------------------------------------------------- +--- + +subcategory: "Dataproc" +layout: "google" +page_title: "Google: google_dataproc_workflow_template" +sidebar_current: "docs-google-dataproc-workflow-template" +description: |- + A Workflow Template is a reusable workflow configuration. +--- + +# google\_dataproc\_workflow\_template + +A Workflow Template is a reusable workflow configuration. It defines a graph of jobs with information on where to run those jobs. + +## Example Usage + +```hcl +resource "google_dataproc_workflow_template" "template" { + name = "template-example" + location = "us-central1" + placement { + managed_cluster { + cluster_name = "my-cluster" + config { + gce_cluster_config { + zone = "us-central1-a" + tags = ["foo", "bar"] + } + master_config { + num_instances = 1 + machine_type = "n1-standard-1" + disk_config { + boot_disk_type = "pd-ssd" + boot_disk_size_gb = 15 + } + } + worker_config { + num_instances = 3 + machine_type = "n1-standard-2" + disk_config { + boot_disk_size_gb = 10 + num_local_ssds = 2 + } + } + + secondary_worker_config { + num_instances = 2 + } + software_config { + image_version = "1.3.7-deb9" + } + } + } + } + jobs { + step_id = "someJob" + spark_job { + main_class = "SomeClass" + } + } + jobs { + step_id = "otherJob" + prerequisite_step_ids = ["someJob"] + presto_job { + query_file_uri = "someuri" + } + } +} +``` + +## Argument Reference + +The following arguments are supported: + +* `jobs` - + (Required) + Required. The Directed Acyclic Graph of Jobs to submit. + +* `location` - + (Required) + The location for the resource + +* `name` - + (Required) + Output only. The resource name of the workflow template, as described in https://cloud.google.com/apis/design/resource_names. * For `projects.regions.workflowTemplates`, the resource name of the template has the following format: `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}` * For `projects.locations.workflowTemplates`, the resource name of the template has the following format: `projects/{project_id}/locations/{location}/workflowTemplates/{template_id}` + +* `placement` - + (Required) + Required. WorkflowTemplate scheduling information. + + +The `jobs` block supports: + +* `hadoop_job` - + (Optional) + Optional. Job is a Hadoop job. + +* `hive_job` - + (Optional) + Optional. Job is a Hive job. + +* `labels` - + (Optional) + Optional. The labels to associate with this job. Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: {0,63} No more than 32 labels can be associated with a given job. + +* `pig_job` - + (Optional) + Optional. Job is a Pig job. + +* `prerequisite_step_ids` - + (Optional) + Optional. The optional list of prerequisite job step_ids. If not specified, the job will start at the beginning of workflow. + +* `presto_job` - + (Optional) + Optional. Job is a Presto job. + +* `pyspark_job` - + (Optional) + Optional. Job is a PySpark job. + +* `scheduling` - + (Optional) + Optional. Job scheduling configuration. + +* `spark_job` - + (Optional) + Optional. Job is a Spark job. + +* `spark_r_job` - + (Optional) + Optional. Job is a SparkR job. + +* `spark_sql_job` - + (Optional) + Optional. Job is a SparkSql job. + +* `step_id` - + (Required) + Required. The step id. The id must be unique among all jobs within the template. The step id is used as prefix for job id, as job `goog-dataproc-workflow-step-id` label, and in field from other steps. The id must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of between 3 and 50 characters. + The `placement` block supports: + +* `cluster_selector` - + (Optional) + Optional. A selector that chooses target cluster for jobs based on metadata. The selector is evaluated at the time each job is submitted. + +* `managed_cluster` - + (Optional) + A cluster that is managed by the workflow. + The `config` block supports: + +* `autoscaling_config` - + (Optional) + Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. + +* `encryption_config` - + (Optional) + Optional. Encryption settings for the cluster. + +* `endpoint_config` - + (Optional) + Optional. Port/endpoint configuration for this cluster + +* `gce_cluster_config` - + (Optional) + Optional. The shared Compute Engine config settings for all instances in a cluster. + +* `gke_cluster_config` - + (Optional) + Optional. The Kubernetes Engine config for Dataproc clusters deployed to Kubernetes. Setting this is considered mutually exclusive with Compute Engine-based options such as `gce_cluster_config`, `master_config`, `worker_config`, `secondary_worker_config`, and `autoscaling_config`. + +* `initialization_actions` - + (Optional) + Optional. Commands to execute on each node after config is completed. By default, executables are run on master and all worker nodes. You can test a node's `role` metadata to run an executable on a master or worker node, as shown below using `curl` (you can also use `wget`): ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) if ; then ... master specific actions ... else ... worker specific actions ... fi + +* `lifecycle_config` - + (Optional) + Optional. Lifecycle setting for the cluster. + +* `master_config` - + (Optional) + Optional. The Compute Engine config settings for additional worker instances in a cluster. + +* `metastore_config` - + (Optional) + Optional. Metastore configuration. + +* `secondary_worker_config` - + (Optional) + Optional. The Compute Engine config settings for additional worker instances in a cluster. + +* `security_config` - + (Optional) + Optional. Security settings for the cluster. + +* `software_config` - + (Optional) + Optional. The config settings for software inside the cluster. + +* `staging_bucket` - + (Optional) + Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). + +* `temp_bucket` - + (Optional) + Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. If you do not specify a temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's temp bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket. The default bucket has a TTL of 90 days, but you can use any TTL (or none) if you specify a bucket. + +* `worker_config` - + (Optional) + Optional. The Compute Engine config settings for additional worker instances in a cluster. + +- - - + +* `dag_timeout` - + (Optional) + (Beta only) Optional. Timeout duration for the DAG of jobs. You can use "s", "m", "h", and "d" suffixes for second, minute, hour, and day duration values, respectively. The timeout duration must be from 10 minutes ("10m") to 24 hours ("24h" or "1d"). The timer begins when the first job is submitted. If the workflow is running at the end of the timeout period, any remaining jobs are cancelled, the workflow is ended, and if the workflow was running on a (/dataproc/docs/concepts/workflows/using-workflows#configuring_or_selecting_a_cluster), the cluster is deleted. + +* `labels` - + (Optional) + Optional. The labels to associate with this template. These labels will be propagated to all jobs and clusters created by the workflow instance. Label **keys** must contain 1 to 63 characters, and must conform to (https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be associated with a template. + +* `parameters` - + (Optional) + Optional. Template parameters whose values are substituted into the template. Values for parameters must be provided when the template is instantiated. + +* `project` - + (Optional) + The project for the resource + +* `version` - + (Optional) + Optional. Used to perform a consistent read-modify-write. This field should be left blank for a `CreateWorkflowTemplate` request. It is required for an `UpdateWorkflowTemplate` request, and must match the current server version. A typical update template flow would fetch the current template with a `GetWorkflowTemplate` request, which will return the current template with the `version` field filled in with the current server version. The user updates other fields in the template, then returns it as part of the `UpdateWorkflowTemplate` request. + + +The `hadoop_job` block supports: + +* `archive_uris` - + (Optional) + Optional. HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. + +* `args` - + (Optional) + Optional. The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. + +* `file_uris` - + (Optional) + Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks. + +* `jar_file_uris` - + (Optional) + Optional. Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks. + +* `logging_config` - + (Optional) + Optional. The runtime log config for job execution. + +* `main_class` - + (Optional) + The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`. + +* `main_jar_file_uri` - + (Optional) + The HCFS URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' + +* `properties` - + (Optional) + Optional. A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code. + The `logging_config` block supports: + +* `driver_log_levels` - + (Optional) + The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' + The `hive_job` block supports: + +* `continue_on_failure` - + (Optional) + Optional. Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. + +* `jar_file_uris` - + (Optional) + Optional. HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs. + +* `properties` - + (Optional) + Optional. A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code. + +* `query_file_uri` - + (Optional) + The HCFS URI of the script that contains Hive queries. + +* `query_list` - + (Optional) + A list of queries. + +* `script_variables` - + (Optional) + Optional. Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`). + The `query_list` block supports: + +* `queries` - + (Required) + Required. The queries to execute. You do not need to end a query expression with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of a Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": } } + The `pig_job` block supports: + +* `continue_on_failure` - + (Optional) + Optional. Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. + +* `jar_file_uris` - + (Optional) + Optional. HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. + +* `logging_config` - + (Optional) + Optional. The runtime log config for job execution. + +* `properties` - + (Optional) + Optional. A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code. + +* `query_file_uri` - + (Optional) + The HCFS URI of the script that contains the Pig queries. + +* `query_list` - + (Optional) + A list of queries. + +* `script_variables` - + (Optional) + Optional. Mapping of query variable names to values (equivalent to the Pig command: `name=`). + The `logging_config` block supports: + +* `driver_log_levels` - + (Optional) + The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' + The `query_list` block supports: + +* `queries` - + (Required) + Required. The queries to execute. You do not need to end a query expression with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of a Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": } } + The `presto_job` block supports: + +* `client_tags` - + (Optional) + Optional. Presto client tags to attach to this query + +* `continue_on_failure` - + (Optional) + Optional. Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. + +* `logging_config` - + (Optional) + Optional. The runtime log config for job execution. + +* `output_format` - + (Optional) + Optional. The format in which query output will be displayed. See the Presto documentation for supported output formats + +* `properties` - + (Optional) + Optional. A mapping of property names to values. Used to set Presto (https://prestodb.io/docs/current/sql/set-session.html) Equivalent to using the --session flag in the Presto CLI + +* `query_file_uri` - + (Optional) + The HCFS URI of the script that contains SQL queries. + +* `query_list` - + (Optional) + A list of queries. + The `logging_config` block supports: + +* `driver_log_levels` - + (Optional) + The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' + The `query_list` block supports: + +* `queries` - + (Required) + Required. The queries to execute. You do not need to end a query expression with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of a Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": } } + The `pyspark_job` block supports: + +* `archive_uris` - + (Optional) + Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. + +* `args` - + (Optional) + Optional. The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. + +* `file_uris` - + (Optional) + Optional. HCFS URIs of files to be placed in the working directory of each executor. Useful for naively parallel tasks. + +* `jar_file_uris` - + (Optional) + Optional. HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks. + +* `logging_config` - + (Optional) + Optional. The runtime log config for job execution. + +* `main_python_file_uri` - + (Required) + Required. The HCFS URI of the main Python file to use as the driver. Must be a .py file. + +* `properties` - + (Optional) + Optional. A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. + +* `python_file_uris` - + (Optional) + Optional. HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip. + The `logging_config` block supports: + +* `driver_log_levels` - + (Optional) + The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' + The `scheduling` block supports: + +* `max_failures_per_hour` - + (Optional) + Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window. Maximum value is 10. + +* `max_failures_total` - + (Optional) + Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240 + The `spark_job` block supports: + +* `archive_uris` - + (Optional) + Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. + +* `args` - + (Optional) + Optional. The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. + +* `file_uris` - + (Optional) + Optional. HCFS URIs of files to be placed in the working directory of each executor. Useful for naively parallel tasks. + +* `jar_file_uris` - + (Optional) + Optional. HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks. + +* `logging_config` - + (Optional) + Optional. The runtime log config for job execution. + +* `main_class` - + (Optional) + The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`. + +* `main_jar_file_uri` - + (Optional) + The HCFS URI of the jar file that contains the main class. + +* `properties` - + (Optional) + Optional. A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. + The `logging_config` block supports: + +* `driver_log_levels` - + (Optional) + The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' + The `spark_r_job` block supports: + +* `archive_uris` - + (Optional) + Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. + +* `args` - + (Optional) + Optional. The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. + +* `file_uris` - + (Optional) + Optional. HCFS URIs of files to be placed in the working directory of each executor. Useful for naively parallel tasks. + +* `logging_config` - + (Optional) + Optional. The runtime log config for job execution. + +* `main_r_file_uri` - + (Required) + Required. The HCFS URI of the main R file to use as the driver. Must be a .R file. + +* `properties` - + (Optional) + Optional. A mapping of property names to values, used to configure SparkR. Properties that conflict with values set by the Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. + The `logging_config` block supports: + +* `driver_log_levels` - + (Optional) + The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' + The `spark_sql_job` block supports: + +* `jar_file_uris` - + (Optional) + Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH. + +* `logging_config` - + (Optional) + Optional. The runtime log config for job execution. + +* `properties` - + (Optional) + Optional. A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Dataproc API may be overwritten. + +* `query_file_uri` - + (Optional) + The HCFS URI of the script that contains SQL queries. + +* `query_list` - + (Optional) + A list of queries. + +* `script_variables` - + (Optional) + Optional. Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`). + The `logging_config` block supports: + +* `driver_log_levels` - + (Optional) + The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' + The `query_list` block supports: + +* `queries` - + (Required) + Required. The queries to execute. You do not need to end a query expression with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of a Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": } } + The `parameters` block supports: + +* `description` - + (Optional) + Optional. Brief description of the parameter. Must not exceed 1024 characters. + +* `fields` - + (Required) + Required. Paths to all fields that the parameter replaces. A field is allowed to appear in at most one parameter's list of field paths. A field path is similar in syntax to a .sparkJob.args + +* `name` - + (Required) + Required. Parameter name. The parameter name is used as the key, and paired with the parameter value, which are passed to the template when the template is instantiated. The name must contain only capital letters (A-Z), numbers (0-9), and underscores (_), and must not start with a number. The maximum length is 40 characters. + +* `validation` - + (Optional) + Optional. Validation rules to be applied to this parameter's value. + The `validation` block supports: + +* `regex` - + (Optional) + Validation based on regular expressions. + +* `values` - + (Optional) + Validation based on a list of allowed values. + The `regex` block supports: + +* `regexes` - + (Required) + Required. RE2 regular expressions used to validate the parameter's value. The value must match the regex in its entirety (substring matches are not sufficient). + The `values` block supports: + +* `values` - + (Required) + Required. List of allowed values for the parameter. + The `cluster_selector` block supports: + +* `cluster_labels` - + (Required) + Required. The cluster labels. Cluster must have all labels to match. + +* `zone` - + (Optional) + Optional. The zone where workflow process executes. This parameter does not affect the selection of the cluster. If unspecified, the zone of the first cluster matching the selector is used. + The `managed_cluster` block supports: + +* `cluster_name` - + (Required) + Required. The cluster name prefix. A unique cluster name will be formed by appending a random suffix. The name must contain only lower-case letters (a-z), numbers (0-9), and hyphens (-). Must begin with a letter. Cannot begin or end with hyphen. Must consist of between 2 and 35 characters. + +* `config` - + (Required) + Required. The cluster configuration. + +* `labels` - + (Optional) + Optional. The labels to associate with this cluster. Label keys must be between 1 and 63 characters long, and must conform to the following PCRE regular expression: {0,63} No more than 32 labels can be associated with a given cluster. + The `secondary_worker_config` block supports: + +* `accelerators` - + (Optional) + Optional. The Compute Engine accelerator configuration for these instances. + +* `disk_config` - + (Optional) + Optional. Disk option config settings. + +* `image` - + (Optional) + Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default. + +* `machine_type` - + (Optional) + Optional. The Compute Engine machine type used for cluster instances. A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/(https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, `n1-standard-2`. + +* `min_cpu_platform` - + (Optional) + Optional. Specifies the minimum cpu platform for the Instance Group. See (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + +* `num_instances` - + (Optional) + Optional. The number of VM instances in the instance group. For master instance groups, must be set to 1. + +* `preemptibility` - + (Optional) + Optional. Specifies the preemptibility of the instance group. The default value for master and worker groups is `NON_PREEMPTIBLE`. This default cannot be changed. The default value for secondary instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED, NON_PREEMPTIBLE, PREEMPTIBLE + +* `instance_names` - + Output only. The list of instance names. Dataproc derives the names from `cluster_name`, `num_instances`, and the instance group. + +* `is_preemptible` - + Output only. Specifies that this instance group contains preemptible instances. + +* `managed_group_config` - + Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + The `accelerators` block supports: + +* `accelerator_count` - + (Optional) + The number of the accelerator cards of this type exposed to this instance. + +* `accelerator_type` - + (Optional) + Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, `nvidia-tesla-k80`. + The `disk_config` block supports: + +* `boot_disk_size_gb` - + (Optional) + Optional. Size in GB of the boot disk (default is 500GB). + +* `boot_disk_type` - + (Optional) + Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). + +* `num_local_ssds` - + (Optional) + Optional. Number of attached SSDs, from 0 to 4 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries. + The `autoscaling_config` block supports: + +* `policy` - + (Optional) + Optional. The autoscaling policy used by the cluster. Only resource names including projectid and location (region) are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/` Note that the policy must be in the same project and Dataproc region. + The `encryption_config` block supports: + +* `gce_pd_kms_key_name` - + (Optional) + Optional. The Cloud KMS key name to use for PD disk encryption for all instances in the cluster. + The `endpoint_config` block supports: + +* `enable_http_port_access` - + (Optional) + Optional. If true, enable http access to specific ports on the cluster from external sources. Defaults to false. + +* `http_ports` - + Output only. The map of port descriptions to URLs. Will only be populated if enable_http_port_access is true. + The `gce_cluster_config` block supports: + +* `internal_ip_only` - + (Optional) + Optional. If true, all instances in the cluster will only have internal IP addresses. By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. This `internal_ip_only` restriction can only be enabled for subnetwork enabled networks, and all off-cluster dependencies must be configured to be accessible without external IP addresses. + +* `metadata` - + (Optional) + The Compute Engine metadata entries to add to all instances (see (https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)). + +* `network` - + (Optional) + Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither `network_uri` nor `subnetwork_uri` is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see /regions/global/default` * `default` + +* `node_group_affinity` - + (Optional) + Optional. Node Group Affinity for sole-tenant clusters. + +* `private_ipv6_google_access` - + (Optional) + Optional. The type of IPv6 access for a cluster. Possible values: PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED, INHERIT_FROM_SUBNETWORK, OUTBOUND, BIDIRECTIONAL + +* `reservation_affinity` - + (Optional) + Optional. Reservation Affinity for consuming Zonal reservation. + +* `service_account` - + (Optional) + Optional. The (https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) is used. + +* `service_account_scopes` - + (Optional) + Optional. The URIs of service account scopes to be included in Compute Engine instances. The following base set of scopes is always included: * https://www.googleapis.com/auth/cloud.useraccounts.readonly * https://www.googleapis.com/auth/devstorage.read_write * https://www.googleapis.com/auth/logging.write If no scopes are specified, the following defaults are also provided: * https://www.googleapis.com/auth/bigquery * https://www.googleapis.com/auth/bigtable.admin.table * https://www.googleapis.com/auth/bigtable.data * https://www.googleapis.com/auth/devstorage.full_control + +* `subnetwork` - + (Optional) + Optional. The Compute Engine subnetwork to be used for machine communications. Cannot be specified with network_uri. A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects//regions/us-east1/subnetworks/sub0` * `sub0` + +* `tags` - + (Optional) + The Compute Engine tags to add to all instances (see (https://cloud.google.com/compute/docs/label-or-tag-resources#tags)). + +* `zone` - + (Optional) + Optional. The zone where the Compute Engine cluster will be located. On a create request, it is required in the "global" region. If omitted in a non-global Dataproc region, the service will pick a zone in the corresponding Compute Engine region. On a get request, zone will always be present. A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/` * `us-central1-f` + The `node_group_affinity` block supports: + +* `node_group` - + (Required) + Required. The URI of a sole-tenant /zones/us-central1-a/nodeGroups/node-group-1` * `node-group-1` + The `reservation_affinity` block supports: + +* `consume_reservation_type` - + (Optional) + Optional. Type of reservation to consume Possible values: TYPE_UNSPECIFIED, NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION + +* `key` - + (Optional) + Optional. Corresponds to the label key of reservation resource. + +* `values` - + (Optional) + Optional. Corresponds to the label values of reservation resource. + The `gke_cluster_config` block supports: + +* `namespaced_gke_deployment_target` - + (Optional) + Optional. A target for the deployment. + The `namespaced_gke_deployment_target` block supports: + +* `cluster_namespace` - + (Optional) + Optional. A namespace within the GKE cluster to deploy into. + +* `target_gke_cluster` - + (Optional) + Optional. The target GKE cluster to deploy to. Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}' + The `initialization_actions` block supports: + +* `executable_file` - + (Optional) + Required. Cloud Storage URI of executable file. + +* `execution_timeout` - + (Optional) + Optional. Amount of time executable has to complete. Default is 10 minutes (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). Cluster creation fails with an explanatory error message (the name of the executable that caused the error and the exceeded timeout period) if the executable is not completed at end of the timeout period. + The `lifecycle_config` block supports: + +* `auto_delete_time` - + (Optional) + Optional. The time when cluster will be auto-deleted (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + +* `auto_delete_ttl` - + (Optional) + Optional. The lifetime duration of cluster. The cluster will be auto-deleted at the end of this period. Minimum value is 10 minutes; maximum value is 14 days (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + +* `idle_delete_ttl` - + (Optional) + Optional. The duration to keep the cluster alive while idling (when no jobs are running). Passing this threshold will cause the cluster to be deleted. Minimum value is 5 minutes; maximum value is 14 days (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json). + +* `idle_start_time` - + Output only. The time when cluster became idle (most recent job finished) and became eligible for deletion due to idleness (see JSON representation of (https://developers.google.com/protocol-buffers/docs/proto3#json)). + The `metastore_config` block supports: + +* `dataproc_metastore_service` - + (Required) + Required. Resource name of an existing Dataproc Metastore service. Example: * `projects/` + The `security_config` block supports: + +* `kerberos_config` - + (Optional) + Kerberos related configuration. + The `kerberos_config` block supports: + +* `cross_realm_trust_admin_server` - + (Optional) + Optional. The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship. + +* `cross_realm_trust_kdc` - + (Optional) + Optional. The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship. + +* `cross_realm_trust_realm` - + (Optional) + Optional. The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust. + +* `cross_realm_trust_shared_password` - + (Optional) + Optional. The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship. + +* `enable_kerberos` - + (Optional) + Optional. Flag to indicate whether to Kerberize the cluster (default: false). Set this field to true to enable Kerberos on a cluster. + +* `kdc_db_key` - + (Optional) + Optional. The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database. + +* `key_password` - + (Optional) + Optional. The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc. + +* `keystore` - + (Optional) + Optional. The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate. + +* `keystore_password` - + (Optional) + Optional. The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificate, this password is generated by Dataproc. + +* `kms_key` - + (Optional) + Optional. The uri of the KMS key used to encrypt various sensitive files. + +* `realm` - + (Optional) + Optional. The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm. + +* `root_principal_password` - + (Optional) + Optional. The Cloud Storage URI of a KMS encrypted file containing the root principal password. + +* `tgt_lifetime_hours` - + (Optional) + Optional. The lifetime of the ticket granting ticket, in hours. If not specified, or user specifies 0, then default value 10 will be used. + +* `truststore` - + (Optional) + Optional. The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate. + +* `truststore_password` - + (Optional) + Optional. The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc. + The `software_config` block supports: + +* `image_version` - + (Optional) + Optional. The version of software inside the cluster. It must be one of the supported (https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions). If unspecified, it defaults to the latest Debian version. + +* `properties` - + (Optional) + Optional. The properties to set on daemon config files. Property keys are specified in `prefix:property` format, for example `core:hadoop.tmp.dir`. The following are supported prefixes and their mappings: * capacity-scheduler: `capacity-scheduler.xml` * core: `core-site.xml` * distcp: `distcp-default.xml` * hdfs: `hdfs-site.xml` * hive: `hive-site.xml` * mapred: `mapred-site.xml` * pig: `pig.properties` * spark: `spark-defaults.conf` * yarn: `yarn-site.xml` For more information, see (https://cloud.google.com/dataproc/docs/concepts/cluster-properties). + +## Attributes Reference + +In addition to the arguments listed above, the following computed attributes are exported: + +* `id` - an identifier for the resource with format `projects/{{project}}/locations/{{location}}/workflowTemplates/{{name}}` + +* `create_time` - + Output only. The time template was created. + +* `update_time` - + Output only. The time template was last updated. + +## Timeouts + +This resource provides the following +[Timeouts](/docs/configuration/resources.html#timeouts) configuration options: + +- `create` - Default is 10 minutes. +- `delete` - Default is 10 minutes. + +## Import + +WorkflowTemplate can be imported using any of these accepted formats: + +``` +$ terraform import google_dataproc_workflow_template.default projects/{{project}}/locations/{{location}}/workflowTemplates/{{name}} +$ terraform import google_dataproc_workflow_template.default {{project}}/{{location}}/{{name}} +$ terraform import google_dataproc_workflow_template.default {{location}}/{{name}} +``` + + +