From 41fe7205028177c68df44e698ff644894a73329f Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Wed, 4 May 2022 16:05:00 +0530 Subject: [PATCH 1/4] spark configuration for sandbox Signed-off-by: Samhita Alla --- rsts/deployment/plugin_setup/k8s/index.rst | 354 +++++++++++++++------ 1 file changed, 252 insertions(+), 102 deletions(-) diff --git a/rsts/deployment/plugin_setup/k8s/index.rst b/rsts/deployment/plugin_setup/k8s/index.rst index 218e515162..37877af5cf 100644 --- a/rsts/deployment/plugin_setup/k8s/index.rst +++ b/rsts/deployment/plugin_setup/k8s/index.rst @@ -178,65 +178,213 @@ This guide gives an overview of setting up the K8s Operator backend plugin in yo .. tabbed:: Spark Operator - * Enable Spark backend plugin + .. tabbed:: Sandbox + + Since sandbox uses minio, it needs additional configuration. + + .. code-block:: yaml + + cluster_resource_manager: + # -- Enables the Cluster resource manager component + enabled: true + # -- Configmap for ClusterResource parameters + config: + # -- ClusterResource parameters + # Refer to the [structure](https://pkg.go.dev/github.com/lyft/flyteadmin@v0.3.37/pkg/runtime/interfaces#ClusterResourceConfig) to customize. + cluster_resources: + refreshInterval: 5m + templatePath: "/etc/flyte/clusterresource/templates" + customData: + - production: + - projectQuotaCpu: + value: "5" + - projectQuotaMemory: + value: "4000Mi" + - staging: + - projectQuotaCpu: + value: "2" + - projectQuotaMemory: + value: "3000Mi" + - development: + - projectQuotaCpu: + value: "4" + - projectQuotaMemory: + value: "5000Mi" + refresh: 5m + + # -- Resource templates that should be applied + templates: + # -- Template for namespaces resources + - key: aa_namespace + value: | + apiVersion: v1 + kind: Namespace + metadata: + name: {{ namespace }} + spec: + finalizers: + - kubernetes + + - key: ab_project_resource_quota + value: | + apiVersion: v1 + kind: ResourceQuota + metadata: + name: project-quota + namespace: {{ namespace }} + spec: + hard: + limits.cpu: {{ projectQuotaCpu }} + limits.memory: {{ projectQuotaMemory }} + + - key: ac_spark_role + value: | + apiVersion: rbac.authorization.k8s.io/v1beta1 + kind: Role + metadata: + name: spark-role + namespace: {{ namespace }} + rules: + - apiGroups: ["*"] + resources: ["pods"] + verbs: ["*"] + - apiGroups: ["*"] + resources: ["services"] + verbs: ["*"] + - apiGroups: ["*"] + resources: ["configmaps", "persistentvolumeclaims"] + verbs: ["*"] + + - key: ad_spark_service_account + value: | + apiVersion: v1 + kind: ServiceAccount + metadata: + name: spark + namespace: {{ namespace }} + + - key: ae_spark_role_binding + value: | + apiVersion: rbac.authorization.k8s.io/v1beta1 + kind: RoleBinding + metadata: + name: spark-role-binding + namespace: {{ namespace }} + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark-role + subjects: + - kind: ServiceAccount + name: spark + namespace: {{ namespace }} + + sparkoperator: + enabled: true + plugin_config: + plugins: + spark: + # -- Spark default configuration + spark-config-default: + # We override the default credentials chain provider for Hadoop so that + # it can use the serviceAccount based IAM role or ec2 metadata based. + # This is more in line with how AWS works + - spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + - spark.hadoop.fs.s3a.endpoint: "http://minio.flyte.svc.cluster.local:9000" + - spark.hadoop.fs.s3a.access.key: "minio" + - spark.hadoop.fs.s3a.secret.key: "miniostorage" + - spark.hadoop.fs.s3a.path.style.access: "true" + - spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version: "2" + - spark.kubernetes.allocation.batch.size: "50" + - spark.hadoop.fs.s3a.acl.default: "BucketOwnerFullControl" + - spark.hadoop.fs.s3n.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3n.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3a.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.multipart.threshold: "536870912" + - spark.excludeOnFailure.enabled: "true" + - spark.excludeOnFailure.timeout: "5m" + - spark.task.maxfailures: "8" + configmap: + enabled_plugins: + # -- Tasks specific configuration [structure](https://pkg.go.dev/github.com/flyteorg/flytepropeller/pkg/controller/nodes/task/config#GetConfig) + tasks: + # -- Plugins configuration, [structure](https://pkg.go.dev/github.com/flyteorg/flytepropeller/pkg/controller/nodes/task/config#TaskPluginConfig) + task-plugins: + # -- [Enabled Plugins](https://pkg.go.dev/github.com/flyteorg/flyteplugins/go/tasks/config#Config). Enable sagemaker*, athena if you install the backend + # plugins + enabled-plugins: + - container + - sidecar + - k8s-array + - spark + default-for-task-types: + container: container + sidecar: sidecar + container_array: k8s-array + spark: spark + + .. tabbed:: AWS .. code-block:: yaml - cluster_resource_manager: - # -- Enables the Cluster resource manager component - enabled: true - # -- Configmap for ClusterResource parameters - config: - # -- ClusterResource parameters - # Refer to the [structure](https://pkg.go.dev/github.com/lyft/flyteadmin@v0.3.37/pkg/runtime/interfaces#ClusterResourceConfig) to customize. - cluster_resources: - refreshInterval: 5m - templatePath: "/etc/flyte/clusterresource/templates" - customData: - - production: - - projectQuotaCpu: - value: "5" - - projectQuotaMemory: - value: "4000Mi" - - staging: - - projectQuotaCpu: - value: "2" - - projectQuotaMemory: - value: "3000Mi" - - development: - - projectQuotaCpu: - value: "4" - - projectQuotaMemory: - value: "3000Mi" - refresh: 5m - - # -- Resource templates that should be applied - templates: - # -- Template for namespaces resources - - key: aa_namespace - value: | - apiVersion: v1 - kind: Namespace - metadata: - name: {{ namespace }} - spec: - finalizers: - - kubernetes - - - key: ab_project_resource_quota - value: | - apiVersion: v1 - kind: ResourceQuota - metadata: - name: project-quota - namespace: {{ namespace }} - spec: - hard: - limits.cpu: {{ projectQuotaCpu }} - limits.memory: {{ projectQuotaMemory }} - - - key: ac_spark_role - value: | + cluster_resource_manager: + # -- Enables the Cluster resource manager component + enabled: true + # -- Configmap for ClusterResource parameters + config: + # -- ClusterResource parameters + # Refer to the [structure](https://pkg.go.dev/github.com/lyft/flyteadmin@v0.3.37/pkg/runtime/interfaces#ClusterResourceConfig) to customize. + cluster_resources: + refreshInterval: 5m + templatePath: "/etc/flyte/clusterresource/templates" + customData: + - production: + - projectQuotaCpu: + value: "5" + - projectQuotaMemory: + value: "4000Mi" + - staging: + - projectQuotaCpu: + value: "2" + - projectQuotaMemory: + value: "3000Mi" + - development: + - projectQuotaCpu: + value: "4" + - projectQuotaMemory: + value: "3000Mi" + refresh: 5m + + # -- Resource templates that should be applied + templates: + # -- Template for namespaces resources + - key: aa_namespace + value: | + apiVersion: v1 + kind: Namespace + metadata: + name: {{ namespace }} + spec: + finalizers: + - kubernetes + + - key: ab_project_resource_quota + value: | + apiVersion: v1 + kind: ResourceQuota + metadata: + name: project-quota + namespace: {{ namespace }} + spec: + hard: + limits.cpu: {{ projectQuotaCpu }} + limits.memory: {{ projectQuotaMemory }} + + - key: ac_spark_role + value: | apiVersion: rbac.authorization.k8s.io/v1beta1 kind: Role metadata: @@ -259,16 +407,16 @@ This guide gives an overview of setting up the K8s Operator backend plugin in yo verbs: - '*' - - key: ad_spark_service_account - value: | + - key: ad_spark_service_account + value: | apiVersion: v1 kind: ServiceAccount metadata: name: spark namespace: {{ namespace }} - - key: ae_spark_role_binding - value: | + - key: ae_spark_role_binding + value: | apiVersion: rbac.authorization.k8s.io/v1beta1 kind: RoleBinding metadata: @@ -283,48 +431,50 @@ This guide gives an overview of setting up the K8s Operator backend plugin in yo name: spark namespace: {{ namespace }} - sparkoperator: - enabled: true - plugin_config: - plugins: - spark: - # -- Spark default configuration - spark-config-default: - # We override the default credentials chain provider for Hadoop so that - # it can use the serviceAccount based IAM role or ec2 metadata based. - # This is more in line with how AWS works - - spark.hadoop.fs.s3a.aws.credentials.provider: "com.amazonaws.auth.DefaultAWSCredentialsProviderChain" - - spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version: "2" - - spark.kubernetes.allocation.batch.size: "50" - - spark.hadoop.fs.s3a.acl.default: "BucketOwnerFullControl" - - spark.hadoop.fs.s3n.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" - - spark.hadoop.fs.AbstractFileSystem.s3n.impl: "org.apache.hadoop.fs.s3a.S3A" - - spark.hadoop.fs.s3.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" - - spark.hadoop.fs.AbstractFileSystem.s3.impl: "org.apache.hadoop.fs.s3a.S3A" - - spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" - - spark.hadoop.fs.AbstractFileSystem.s3a.impl: "org.apache.hadoop.fs.s3a.S3A" - - spark.hadoop.fs.s3a.multipart.threshold: "536870912" - - spark.blacklist.enabled: "true" - - spark.blacklist.timeout: "5m" - - spark.task.maxfailures: "8" - configmap: - enabled_plugins: - # -- Tasks specific configuration [structure](https://pkg.go.dev/github.com/flyteorg/flytepropeller/pkg/controller/nodes/task/config#GetConfig) - tasks: - # -- Plugins configuration, [structure](https://pkg.go.dev/github.com/flyteorg/flytepropeller/pkg/controller/nodes/task/config#TaskPluginConfig) - task-plugins: - # -- [Enabled Plugins](https://pkg.go.dev/github.com/flyteorg/flyteplugins/go/tasks/config#Config). Enable sagemaker*, athena if you install the backend - # plugins - enabled-plugins: - - container - - sidecar - - k8s-array - - spark - default-for-task-types: - container: container - sidecar: sidecar - container_array: k8s-array - spark: spark + sparkoperator: + enabled: true + plugin_config: + plugins: + spark: + # -- Spark default configuration + spark-config-default: + # We override the default credentials chain provider for Hadoop so that + # it can use the serviceAccount based IAM role or ec2 metadata based. + # This is more in line with how AWS works + - spark.hadoop.fs.s3a.aws.credentials.provider: "com.amazonaws.auth.DefaultAWSCredentialsProviderChain" + - spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version: "2" + - spark.kubernetes.allocation.batch.size: "50" + - spark.hadoop.fs.s3a.acl.default: "BucketOwnerFullControl" + - spark.hadoop.fs.s3n.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3n.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3a.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.multipart.threshold: "536870912" + - spark.blacklist.enabled: "true" + - spark.blacklist.timeout: "5m" + - spark.task.maxfailures: "8" + configmap: + enabled_plugins: + # -- Tasks specific configuration [structure](https://pkg.go.dev/github.com/flyteorg/flytepropeller/pkg/controller/nodes/task/config#GetConfig) + tasks: + # -- Plugins configuration, [structure](https://pkg.go.dev/github.com/flyteorg/flytepropeller/pkg/controller/nodes/task/config#TaskPluginConfig) + task-plugins: + # -- [Enabled Plugins](https://pkg.go.dev/github.com/flyteorg/flyteplugins/go/tasks/config#Config). Enable sagemaker*, athena if you install the backend + # plugins + enabled-plugins: + - container + - sidecar + - k8s-array + - spark + default-for-task-types: + container: container + sidecar: sidecar + container_array: k8s-array + spark: spark + + 5. Upgrade the Flyte Helm release. From d178eeace2b83f63a3f4cb2f6473ed05b0e056b3 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Wed, 4 May 2022 16:08:18 +0530 Subject: [PATCH 2/4] remove deprecated config Signed-off-by: Samhita Alla --- rsts/deployment/plugin_setup/k8s/index.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rsts/deployment/plugin_setup/k8s/index.rst b/rsts/deployment/plugin_setup/k8s/index.rst index 37877af5cf..1e1f2b293b 100644 --- a/rsts/deployment/plugin_setup/k8s/index.rst +++ b/rsts/deployment/plugin_setup/k8s/index.rst @@ -452,8 +452,8 @@ This guide gives an overview of setting up the K8s Operator backend plugin in yo - spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" - spark.hadoop.fs.AbstractFileSystem.s3a.impl: "org.apache.hadoop.fs.s3a.S3A" - spark.hadoop.fs.s3a.multipart.threshold: "536870912" - - spark.blacklist.enabled: "true" - - spark.blacklist.timeout: "5m" + - spark.excludeOnFailure.enabled: "true" + - spark.excludeOnFailure.timeout: "5m" - spark.task.maxfailures: "8" configmap: enabled_plugins: @@ -474,8 +474,6 @@ This guide gives an overview of setting up the K8s Operator backend plugin in yo container_array: k8s-array spark: spark - - 5. Upgrade the Flyte Helm release. .. tabbed:: Sandbox From d41bdeaab99baddf155a059f7f59c533591a1a11 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Wed, 4 May 2022 16:11:19 +0530 Subject: [PATCH 3/4] indentation Signed-off-by: Samhita Alla --- rsts/deployment/plugin_setup/k8s/index.rst | 42 +++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/rsts/deployment/plugin_setup/k8s/index.rst b/rsts/deployment/plugin_setup/k8s/index.rst index 1e1f2b293b..7d51efab47 100644 --- a/rsts/deployment/plugin_setup/k8s/index.rst +++ b/rsts/deployment/plugin_setup/k8s/index.rst @@ -385,27 +385,27 @@ This guide gives an overview of setting up the K8s Operator backend plugin in yo - key: ac_spark_role value: | - apiVersion: rbac.authorization.k8s.io/v1beta1 - kind: Role - metadata: - name: spark-role - namespace: {{ namespace }} - rules: - - apiGroups: ["*"] - resources: - - pods - verbs: - - '*' - - apiGroups: ["*"] - resources: - - services - verbs: - - '*' - - apiGroups: ["*"] - resources: - - configmaps - verbs: - - '*' + apiVersion: rbac.authorization.k8s.io/v1beta1 + kind: Role + metadata: + name: spark-role + namespace: {{ namespace }} + rules: + - apiGroups: ["*"] + resources: + - pods + verbs: + - '*' + - apiGroups: ["*"] + resources: + - services + verbs: + - '*' + - apiGroups: ["*"] + resources: + - configmaps + verbs: + - '*' - key: ad_spark_service_account value: | From 49886bf6dfdbbdd9445806ec0e1b34f1c7578c13 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Wed, 4 May 2022 16:12:01 +0530 Subject: [PATCH 4/4] indentation Signed-off-by: Samhita Alla --- rsts/deployment/plugin_setup/k8s/index.rst | 36 +++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/rsts/deployment/plugin_setup/k8s/index.rst b/rsts/deployment/plugin_setup/k8s/index.rst index 7d51efab47..664e525275 100644 --- a/rsts/deployment/plugin_setup/k8s/index.rst +++ b/rsts/deployment/plugin_setup/k8s/index.rst @@ -409,27 +409,27 @@ This guide gives an overview of setting up the K8s Operator backend plugin in yo - key: ad_spark_service_account value: | - apiVersion: v1 - kind: ServiceAccount - metadata: - name: spark - namespace: {{ namespace }} + apiVersion: v1 + kind: ServiceAccount + metadata: + name: spark + namespace: {{ namespace }} - key: ae_spark_role_binding value: | - apiVersion: rbac.authorization.k8s.io/v1beta1 - kind: RoleBinding - metadata: - name: spark-role-binding - namespace: {{ namespace }} - roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: spark-role - subjects: - - kind: ServiceAccount - name: spark - namespace: {{ namespace }} + apiVersion: rbac.authorization.k8s.io/v1beta1 + kind: RoleBinding + metadata: + name: spark-role-binding + namespace: {{ namespace }} + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark-role + subjects: + - kind: ServiceAccount + name: spark + namespace: {{ namespace }} sparkoperator: enabled: true