diff --git a/.gitignore b/.gitignore index bdde967a1d..62bdea5a24 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ docs/flytekit/flytekit.interfaces.html docs/searchindex.js docs/ __pycache__/ +/helm/charts/ \ No newline at end of file diff --git a/helm/.helmignore b/helm/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/helm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm/Chart.yaml b/helm/Chart.yaml new file mode 100644 index 0000000000..b66ba74db3 --- /dev/null +++ b/helm/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +name: flyte +description: A Helm chart for Flyte +type: application +version: 0.2.0 +appVersion: 0.8.0 +dependencies: + - name: contour + version: 4.1.2 + repository: https://charts.bitnami.com/bitnami + condition: contour.enabled + - name: spark-operator + alias: sparkoperator + version: 1.0.6 + repository: https://googlecloudplatform.github.io/spark-on-k8s-operator + condition: spark.enabled + - name: kubernetes-dashboard + version: 4.0.2 + repository: https://kubernetes.github.io/dashboard/ + condition: kubernetes-dashboard.enabled diff --git a/helm/README.MD b/helm/README.MD new file mode 100644 index 0000000000..60a0acbcb1 --- /dev/null +++ b/helm/README.MD @@ -0,0 +1,272 @@ +# flyte + +![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.8.0](https://img.shields.io/badge/AppVersion-0.8.0-informational?style=flat-square) + +A Helm chart for Flyte + +### SANDBOX INSTALLATION: +- [Install helm 3](https://helm.sh/docs/intro/install/) +- Fetch chart dependencies `helm dep up` +- Install Flyte sandbox: + +```bash +cd helm +helm install -n flyte -f values-sandbox.yaml --create-namespace flyte . +``` + +Customize your installation by changing settings in `values-sandbox.yaml`. +You can use the helm diff plugin to review any value changes you've made to your values: + +```bash +helm plugin install https://github.com/databus23/helm-diff +helm diff upgrade -f values-sandbox.yaml flyte . +``` + +Then apply your changes: +```bash +helm upgrade -f values-sandbox.yaml flyte . +``` + +#### Alternative: Generate raw kubernetes yaml with helm template +- `helm template --name-template=flyte-sandbox . -n flyte -f values-sandbox.yaml > flyte_generated_sandbox.yaml` +- Deploy the manifest `kubectl apply -f flyte_generated_sandbox.yaml` + + +- When all pods are running - run end2end tests: `kubectl apply -f ../end2end/tests/endtoend.yaml` +- Get flyte host `minikube service contour -n heptio-contour --url`. And then visit `http:///console` + + +### CONFIGURATION NOTES: +- The docker images, their tags and other default parameters are configured in `values.yaml` file. +- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the choosen config `values-*.yaml` are merged when generating the deployment manifest. +- The configuration in `values-sandbox.yaml` is ready for installation in minikube. But `values-eks.yaml` should be edited before installation: s3 bucket, RDS hosts, iam roles, secrets and etc need to be modified. + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| common.databaseSecret.name | string | `""` | Specify name of K8s Secret which contains Database password. Leave it empty if you don't need this Secret | +| common.databaseSecret.secretManifest | object | `{}` | Specify your Secret (with sensitive data) or pseudo-manifest (without sensitive data). See https://github.com/godaddy/kubernetes-external-secrets | +| common.flyteNamespaceTemplate.enabled | bool | `true` | | +| common.ingress.annotations | object | `{}` | | +| common.ingress.enabled | bool | `true` | | +| configmap.aa_namespace | string | `"apiVersion: v1\nkind: Namespace\nmetadata:\n name: {{ namespace }}\nspec:\n finalizers:\n - kubernetes\n"` | Template for namespaces resources | +| configmap.ad_spark_role | string | `"apiVersion: rbac.authorization.k8s.io/v1beta1\nkind: Role\nmetadata:\n name: spark-role\n namespace: {{ namespace }}\nrules:\n- apiGroups:\n - \"\"\n resources:\n - pods\n verbs:\n - '*'\n- apiGroups:\n - \"\"\n resources:\n - services\n verbs:\n - '*'\n- apiGroups:\n - \"\"\n resources:\n - configmaps\n verbs:\n - '*'\n"` | Template for spark role | +| configmap.admin.admin.endpoint | string | `"flyteadmin:81"` | | +| configmap.admin.admin.insecure | bool | `true` | | +| configmap.admin.event.capacity | int | `1000` | | +| configmap.admin.event.rate | int | `500` | | +| configmap.admin.event.type | string | `"admin"` | | +| configmap.ae_spark_service_account | string | `"apiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: spark\n namespace: {{ namespace }}\n"` | Template for spark SA | +| configmap.af_spark_role_binding | string | `"apiVersion: rbac.authorization.k8s.io/v1beta1\nkind: RoleBinding\nmetadata:\n name: spark-role-binding\n namespace: {{ namespace }} \nroleRef:\n apiGroup: rbac.authorization.k8s.io\n kind: Role\n name: spark-role\nsubjects:\n- kind: ServiceAccount\n name: spark\n namespace: {{ namespace }}\n"` | Template for spark role binding | +| configmap.catalog.catalog-cache.endpoint | string | `"datacatalog:89"` | | +| configmap.catalog.catalog-cache.insecure | bool | `true` | | +| configmap.catalog.catalog-cache.type | string | `"datacatalog"` | | +| configmap.catalog_cache.plugins.catalogCache.reader.maxItems | int | `10000` | | +| configmap.catalog_cache.plugins.catalogCache.writer.maxItems | int | `10000` | | +| configmap.cluster_resources.cluster_resources.customData.development[0].projectQuotaCpu.value | string | `"4"` | | +| configmap.cluster_resources.cluster_resources.customData.development[1].projectQuotaMemory.value | string | `"3000Mi"` | | +| configmap.cluster_resources.cluster_resources.customData.production[0].projectQuotaCpu.value | string | `"5"` | | +| configmap.cluster_resources.cluster_resources.customData.production[1].projectQuotaMemory.value | string | `"4000Mi"` | | +| configmap.cluster_resources.cluster_resources.customData.staging[0].projectQuotaCpu.value | string | `"2"` | | +| configmap.cluster_resources.cluster_resources.customData.staging[1].projectQuotaMemory.value | string | `"3000Mi"` | | +| configmap.cluster_resources.cluster_resources.refresh | string | `"5m"` | | +| configmap.cluster_resources.cluster_resources.templatePath | string | `"/etc/flyte/clusterresource/templates"` | | +| configmap.console.BASE_URL | string | `"/console"` | | +| configmap.console.CONFIG_DIR | string | `"/etc/flyte/config"` | | +| configmap.copilot.plugins.k8s.co-pilot.image | string | `"docker.io/lyft/flytecopilot:v0.3.35"` | | +| configmap.copilot.plugins.k8s.co-pilot.name | string | `"flyte-copilot-"` | | +| configmap.copilot.plugins.k8s.co-pilot.start-timeout | string | `"30s"` | | +| configmap.core.propeller.downstream-eval-duration | string | `"30s"` | | +| configmap.core.propeller.enable-admin-launcher | bool | `true` | | +| configmap.core.propeller.leader-election.enabled | bool | `true` | | +| configmap.core.propeller.leader-election.lease-duration | string | `"15s"` | | +| configmap.core.propeller.leader-election.lock-config-map.name | string | `"propeller-leader"` | | +| configmap.core.propeller.leader-election.lock-config-map.namespace | string | `"flyte"` | | +| configmap.core.propeller.leader-election.renew-deadline | string | `"10s"` | | +| configmap.core.propeller.leader-election.retry-period | string | `"2s"` | | +| configmap.core.propeller.limit-namespace | string | `"all"` | | +| configmap.core.propeller.max-workflow-retries | int | `30` | | +| configmap.core.propeller.metadata-prefix | string | `"metadata/propeller"` | | +| configmap.core.propeller.metrics-prefix | string | `"flyte"` | | +| configmap.core.propeller.prof-port | int | `10254` | | +| configmap.core.propeller.queue.batch-size | int | `-1` | | +| configmap.core.propeller.queue.batching-interval | string | `"2s"` | | +| configmap.core.propeller.queue.queue.capacity | int | `100` | | +| configmap.core.propeller.queue.queue.rate | int | `10` | | +| configmap.core.propeller.queue.queue.type | string | `"bucket"` | | +| configmap.core.propeller.queue.sub-queue.capacity | int | `100` | | +| configmap.core.propeller.queue.sub-queue.rate | int | `10` | | +| configmap.core.propeller.queue.sub-queue.type | string | `"bucket"` | | +| configmap.core.propeller.queue.type | string | `"batch"` | | +| configmap.core.propeller.rawoutput-prefix | string | `"s3://my-s3-bucket/"` | | +| configmap.core.propeller.workers | int | `4` | | +| configmap.core.propeller.workflow-reeval-duration | string | `"30s"` | | +| configmap.datacatalogServer.datacatalog.metrics-scope | string | `"datacatalog"` | | +| configmap.datacatalogServer.datacatalog.profiler-port | int | `10254` | | +| configmap.datacatalogServer.datacatalog.storage-prefix | string | `"metadata/datacatalog"` | | +| configmap.db.database | object | `{}` | | +| configmap.domain.domains[0].id | string | `"development"` | | +| configmap.domain.domains[0].name | string | `"development"` | | +| configmap.domain.domains[1].id | string | `"staging"` | | +| configmap.domain.domains[1].name | string | `"staging"` | | +| configmap.domain.domains[2].id | string | `"production"` | | +| configmap.domain.domains[2].name | string | `"production"` | | +| configmap.enabled_plugins.tasks.max-plugin-phase-versions | int | `1000000` | | +| configmap.enabled_plugins.tasks.task-plugins.enabled-plugins | list | `[]` | | +| configmap.k8s.plugins.k8s.default-cpus | string | `"100m"` | | +| configmap.k8s.plugins.k8s.default-env-vars | list | `[]` | | +| configmap.k8s.plugins.k8s.default-memory | string | `"100Mi"` | | +| configmap.logger.logger.level | int | `4` | | +| configmap.logger.logger.show-source | bool | `true` | | +| configmap.qubole.plugins.qubole.quboleTokenKey | string | `"FLYTE_QUBOLE_CLIENT_TOKEN"` | | +| configmap.remote_data.remoteData.region | string | `"us-east-1"` | | +| configmap.remote_data.remoteData.scheme | string | `"local"` | | +| configmap.remote_data.remoteData.signedUrls.durationMinutes | int | `3` | | +| configmap.resource_manager.propeller.resourcemanager.redis.hostKey | string | `"mypassword"` | | +| configmap.resource_manager.propeller.resourcemanager.redis.hostPath | string | `"redis-resource-manager:6379"` | | +| configmap.resource_manager.propeller.resourcemanager.resourceMaxQuota | int | `10000` | | +| configmap.resource_manager.propeller.resourcemanager.type | string | `"redis"` | | +| configmap.server.flyteadmin.metadataStoragePrefix[0] | string | `"metadata"` | | +| configmap.server.flyteadmin.metadataStoragePrefix[1] | string | `"admin"` | | +| configmap.server.flyteadmin.metricsScope | string | `"flyte:"` | | +| configmap.server.flyteadmin.profilerPort | int | `10254` | | +| configmap.server.flyteadmin.roleNameKey | string | `"iam.amazonaws.com/role"` | | +| configmap.server.flyteadmin.testing.host | string | `"http://flyteadmin"` | | +| configmap.server.server.grpcPort | int | `8089` | | +| configmap.server.server.httpPort | int | `8088` | | +| configmap.server.server.security.allowCors | bool | `true` | | +| configmap.server.server.security.allowedHeaders[0] | string | `"Content-Type"` | | +| configmap.server.server.security.allowedOrigins[0] | string | `"*"` | | +| configmap.server.server.security.secure | bool | `false` | | +| configmap.server.server.security.useAuth | bool | `false` | | +| configmap.spark.plugins.spark.spark-config-default | list | `[{"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version":"2"},{"spark.kubernetes.allocation.batch.size":"50"},{"spark.hadoop.fs.s3a.acl.default":"BucketOwnerFullControl"},{"spark.hadoop.fs.s3n.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem"},{"spark.hadoop.fs.AbstractFileSystem.s3n.impl":"org.apache.hadoop.fs.s3a.S3A"},{"spark.hadoop.fs.s3.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem"},{"spark.hadoop.fs.AbstractFileSystem.s3.impl":"org.apache.hadoop.fs.s3a.S3A"},{"spark.hadoop.fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem"},{"spark.hadoop.fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A"},{"spark.hadoop.fs.s3a.multipart.threshold":"536870912"},{"spark.blacklist.enabled":"true"},{"spark.blacklist.timeout":"5m"},{"spark.task.maxfailures":"8"}]` | Spark default configuration | +| configmap.storage.storage.connection.auth-type | string | `"accesskey"` | | +| configmap.storage.storage.connection.region | string | `"us-east-1"` | | +| configmap.storage.storage.container | string | `"my-s3-bucket"` | | +| configmap.storage.storage.type | string | `"minio"` | | +| configmap.task_logs.plugins.logs.kubernetes-enabled | bool | `false` | | +| configmap.task_resource_defaults.task_resources.defaults.cpu | string | `"100m"` | | +| configmap.task_resource_defaults.task_resources.defaults.memory | string | `"1000Mi"` | | +| configmap.task_resource_defaults.task_resources.defaults.storage | string | `"5Mi"` | | +| configmap.task_resource_defaults.task_resources.limits.cpu | string | `"15000m"` | | +| configmap.task_resource_defaults.task_resources.limits.memory | string | `"100Gi"` | | +| configmap.task_resource_defaults.task_resources.limits.storage | string | `"400Gi"` | | +| contour.affinity | object | `{}` | affinity for Contour deployment | +| contour.contour.image.pullPolicy | string | `"IfNotPresent"` | | +| contour.contour.image.repository | string | `"gcr.io/heptio-images/contour"` | Docker image for Contour | +| contour.contour.image.tag | string | `"v0.6.1"` | | +| contour.contour.resources | object | `{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"10m","memory":"50Mi"}}` | Default resources requests and limits for Contour | +| contour.enabled | bool | `true` | | +| contour.envoy.image.pullPolicy | string | `"IfNotPresent"` | | +| contour.envoy.image.repository | string | `"docker.io/envoyproxy/envoy-alpine"` | Docker image for Envoy | +| contour.envoy.image.tag | string | `"v1.6.0"` | | +| contour.envoy.resources | object | `{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"10m","memory":"50Mi"}}` | Default resources requests and limits for Envoy | +| contour.nodeSelector | object | `{}` | nodeSelector for Contour deployment | +| contour.podAnnotations | object | `{}` | Annotations for Contour pods | +| contour.replicaCount | int | `1` | Replicas count for Contour deployment | +| contour.service | object | `{"annotations":{},"ports":[{"nodePort":30081,"port":80,"protocol":"TCP"}],"type":"NodePort"}` | Service settings for Contour | +| contour.serviceAccountAnnotations | object | `{}` | Annotations for ServiceAccount attached to Contour pods | +| contour.tolerations | list | `[]` | tolerations for Contour deployment | +| datacatalog.affinity | object | `{}` | affinity for Datacatalog deployment | +| datacatalog.configPath | string | `"/etc/datacatalog/config/*.yaml"` | Default regex string for searching configuration files | +| datacatalog.image.pullPolicy | string | `"IfNotPresent"` | | +| datacatalog.image.repository | string | `"docker.io/lyft/datacatalog"` | Docker image for Datacatalog deployment | +| datacatalog.image.tag | string | `"v0.2.2"` | | +| datacatalog.nodeSelector | object | `{}` | nodeSelector for Datacatalog deployment | +| datacatalog.podAnnotations | object | `{}` | Annotations for Datacatalog pods | +| datacatalog.replicaCount | int | `1` | Replicas count for Datacatalog deployment | +| datacatalog.resources | object | `{"limits":{"cpu":"500m","ephemeral-storage":"100Mi","memory":"500Mi"},"requests":{"cpu":"10m","ephemeral-storage":"50Mi","memory":"50Mi"}}` | Default resources requests and limits for Datacatalog deployment | +| datacatalog.service | object | `{"annotations":{"contour.heptio.com/upstream-protocol.h2c":"grpc"},"type":"NodePort"}` | Service settings for Datacatalog | +| datacatalog.serviceAccountAnnotations | object | `{}` | Annotations for ServiceAccount attached to Datacatalog pods | +| datacatalog.tolerations | list | `[]` | tolerations for Datacatalog deployment | +| flyteadmin.affinity | object | `{}` | affinity for Flyteadmin deployment | +| flyteadmin.configPath | string | `"/etc/flyte/config/*.yaml"` | Default regex string for searching configuration files | +| flyteadmin.image.pullPolicy | string | `"IfNotPresent"` | | +| flyteadmin.image.repository | string | `"docker.io/lyft/flyteadmin"` | Docker image for Flyteadmin deployment | +| flyteadmin.image.tag | string | `"v0.3.7"` | | +| flyteadmin.nodeSelector | object | `{}` | nodeSelector for Flyteadmin deployment | +| flyteadmin.podAnnotations | object | `{}` | Annotations for Flyteadmin pods | +| flyteadmin.replicaCount | int | `1` | Replicas count for Flyteadmin deployment | +| flyteadmin.resources | object | `{"limits":{"cpu":"250m","ephemeral-storage":"100Mi","memory":"500Mi"},"requests":{"cpu":"10m","ephemeral-storage":"50Mi","memory":"50Mi"}}` | Default resources requests and limits for Flyteadmin deployment | +| flyteadmin.service | object | `{"annotations":{"contour.heptio.com/upstream-protocol.h2c":"grpc"},"type":"ClusterIP"}` | Service settings for Flyteadmin | +| flyteadmin.serviceAccountAnnotations | object | `{}` | Annotations for ServiceAccount attached to Flyteadmin pods | +| flyteadmin.tolerations | list | `[]` | tolerations for Flyteadmin deployment | +| flyteconsole.affinity | object | `{}` | affinity for Flyteconsole deployment | +| flyteconsole.image.pullPolicy | string | `"IfNotPresent"` | | +| flyteconsole.image.repository | string | `"docker.io/lyft/flyteconsole"` | Docker image for Flyteconsole deployment | +| flyteconsole.image.tag | string | `"v0.12.1"` | | +| flyteconsole.nodeSelector | object | `{}` | nodeSelector for Flyteconsole deployment | +| flyteconsole.podAnnotations | object | `{}` | Annotations for Flyteconsole pods | +| flyteconsole.replicaCount | int | `1` | Replicas count for Flyteconsole deployment | +| flyteconsole.resources | object | `{"limits":{"cpu":"500m","memory":"250Mi"},"requests":{"cpu":"10m","memory":"50Mi"}}` | Default resources requests and limits for Flyteconsole deployment | +| flyteconsole.service | object | `{"annotations":{},"type":"ClusterIP"}` | Service settings for Flyteconsole | +| flyteconsole.tolerations | list | `[]` | tolerations for Flyteconsole deployment | +| flytepropeller.affinity | object | `{}` | affinity for Flytepropeller deployment | +| flytepropeller.configPath | string | `"/etc/flyte/config/*.yaml"` | Default regex string for searching configuration files | +| flytepropeller.image.pullPolicy | string | `"IfNotPresent"` | | +| flytepropeller.image.repository | string | `"docker.io/lyft/flytepropeller"` | Docker image for Flytepropeller deployment | +| flytepropeller.image.tag | string | `"v0.4.2"` | | +| flytepropeller.nodeSelector | object | `{}` | nodeSelector for Flytepropeller deployment | +| flytepropeller.podAnnotations | object | `{}` | Annotations for Flytepropeller pods | +| flytepropeller.replicaCount | int | `1` | Replicas count for Flytepropeller deployment | +| flytepropeller.resources | object | `{"limits":{"cpu":"200m","ephemeral-storage":"100Mi","memory":"200Mi"},"requests":{"cpu":"10m","ephemeral-storage":"50Mi","memory":"50Mi"}}` | Default resources requests and limits for Flytepropeller deployment | +| flytepropeller.serviceAccountAnnotations | object | `{}` | Annotations for ServiceAccount attached to Flytepropeller pods | +| flytepropeller.tolerations | list | `[]` | tolerations for Flytepropeller deployment | +| minio.affinity | object | `{}` | affinity for Minio deployment | +| minio.enabled | bool | `true` | | +| minio.image.pullPolicy | string | `"IfNotPresent"` | | +| minio.image.repository | string | `"minio/minio"` | Docker image for Minio deployment | +| minio.image.tag | string | `"RELEASE.2019-06-04T01-15-58Z"` | | +| minio.nodeSelector | object | `{}` | nodeSelector for Minio deployment | +| minio.podAnnotations | object | `{}` | Annotations for Minio pods | +| minio.replicaCount | int | `1` | Replicas count for Minio deployment | +| minio.resources | object | `{"limits":{"cpu":"200m","memory":"512Mi"},"requests":{"cpu":"10m","memory":"128Mi"}}` | Default resources requests and limits for Minio deployment | +| minio.service | object | `{"annotations":{},"type":"ClusterIP"}` | Service settings for Minio | +| minio.tolerations | list | `[]` | tolerations for Minio deployment | +| postgres.affinity | object | `{}` | affinity for Postgres deployment | +| postgres.enabled | bool | `true` | | +| postgres.image.pullPolicy | string | `"IfNotPresent"` | | +| postgres.image.repository | string | `"postgres"` | Docker image for Postgres deployment | +| postgres.image.tag | string | `"10.1"` | | +| postgres.nodeSelector | object | `{}` | nodeSelector for Postgres deployment | +| postgres.podAnnotations | object | `{}` | Annotations for Postgres pods | +| postgres.replicaCount | int | `1` | Replicas count for Postgres deployment | +| postgres.resources | object | `{"limits":{"cpu":"1000m","memory":"512Mi"},"requests":{"cpu":"10m","memory":"128Mi"}}` | Default resources requests and limits for Postgres deployment | +| postgres.service | object | `{"annotations":{},"type":"ClusterIP"}` | Service settings for Postgres | +| postgres.tolerations | list | `[]` | tolerations for Postgres deployment | +| pytorchoperator.affinity | object | `{}` | affinity for Pytorchoperator deployment | +| pytorchoperator.enabled | bool | `true` | | +| pytorchoperator.image.pullPolicy | string | `"IfNotPresent"` | | +| pytorchoperator.image.repository | string | `"gcr.io/kubeflow-images-public/pytorch-operator"` | Docker image for Pytorchoperator | +| pytorchoperator.image.tag | string | `"v1.0.0-g047cf0f"` | | +| pytorchoperator.nodeSelector | object | `{}` | nodeSelector for Pytorchoperator deployment | +| pytorchoperator.podAnnotations | object | `{}` | Annotations for Pytorchoperator pods | +| pytorchoperator.replicaCount | int | `1` | Replicas count for Pytorchoperator deployment | +| pytorchoperator.resources | object | `{"limits":{"cpu":"500m","memory":"1000M"},"requests":{"cpu":"10m","memory":"50M"}}` | Default resources requests and limits for Pytorchoperator | +| pytorchoperator.service | object | `{"annotations":{},"type":"ClusterIP"}` | Service settings for Pytorchoperator | +| pytorchoperator.serviceAccountAnnotations | object | `{}` | Annotations for ServiceAccount attached to Pytorchoperator pods | +| pytorchoperator.tolerations | list | `[]` | tolerations for Pytorchoperator deployment | +| redis.affinity | object | `{}` | affinity for Redis Statefulset | +| redis.enabled | bool | `true` | | +| redis.image.pullPolicy | string | `"IfNotPresent"` | | +| redis.image.repository | string | `"docker.io/bitnami/redis"` | Docker image for Redis Statefulset | +| redis.image.tag | string | `"4.0.2-r1"` | | +| redis.nodeSelector | object | `{}` | nodeSelector for Redis Statefulset | +| redis.podAnnotations | object | `{}` | Annotations for Redis pods | +| redis.replicaCount | int | `1` | Replicas count for Redis Statefulset | +| redis.resources | object | `{"limits":{"cpu":"1000m","memory":"1Gi"},"requests":{"cpu":"10m","memory":"50Mi"}}` | Default resources requests and limits for Redis Statefulset | +| redis.service | object | `{"annotations":{},"type":"ClusterIP"}` | Service settings for Redis | +| redis.tolerations | list | `[]` | tolerations for Redis Statefulset | +| sparkoperator.affinity | object | `{}` | affinity for Sparkoperator deployment | +| sparkoperator.enabled | bool | `true` | | +| sparkoperator.image.pullPolicy | string | `"IfNotPresent"` | | +| sparkoperator.image.repository | string | `"gcr.io/spark-operator/spark-operator"` | Docker image for Sparkoperator | +| sparkoperator.image.tag | string | `"v2.4.0-v1beta1-0.9.0"` | | +| sparkoperator.nodeSelector | object | `{}` | nodeSelector for Sparkoperator deployment | +| sparkoperator.podAnnotations | object | `{}` | Annotations for Sparkoperator pods | +| sparkoperator.replicaCount | int | `1` | Replicas count for Sparkoperator deployment | +| sparkoperator.resources | object | `{"limits":{"cpu":"1000m","memory":"500M"},"requests":{"cpu":"10m","memory":"50M"}}` | Default resources requests and limits for Sparkoperator | +| sparkoperator.service | object | `{"annotations":{},"type":"ClusterIP"}` | Service settings for Contour | +| sparkoperator.serviceAccountAnnotations | object | `{}` | Annotations for ServiceAccount attached to Sparkoperator pods | +| sparkoperator.tolerations | list | `[]` | tolerations for Sparkoperator deployment | diff --git a/helm/README.md.gotmpl b/helm/README.md.gotmpl new file mode 100644 index 0000000000..eb4f8c63d5 --- /dev/null +++ b/helm/README.md.gotmpl @@ -0,0 +1,28 @@ +{{ template "chart.header" . }} +{{ template "chart.deprecationWarning" . }} + +{{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }} + +{{ template "chart.description" . }} + +{{ template "chart.homepageLine" . }} + +{{ template "chart.maintainersSection" . }} + +{{ template "chart.sourcesSection" . }} + +{{ template "chart.requirementsSection" . }} + +### SANDBOX INSTALLATION: +- Install `helm 3` realese version +- Generate `helm template --name-template=flyte-sandbox . -n flyte -f values-sandbox.yaml > flyte_generated_sandbox.yaml` +- Deploy the manifest `kubectl apply -f flyte_generated_sandbox.yaml` +- When all pods are running - run end2end tests: `kubectl apply -f ../end2end/tests/endtoend.yaml` +- Get flyte host `minikube service contour -n heptio-contour --url`. And then visit `http:///console` + +### CONFIGURATION NOTES: +- The docker images, their tags and other default parameters are configured in `values.yaml` file. +- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the choosen config `values-*.yaml` are merged when generating the deployment manifest. +- The configuration in `values-sandbox.yaml` is ready for installation in minikube. But `values-eks.yaml` should be edited before installation: s3 bucket, RDS hosts, iam roles, secrets and etc need to be modified. + +{{ template "chart.valuesSection" . }} diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl new file mode 100644 index 0000000000..244619bbad --- /dev/null +++ b/helm/templates/_helpers.tpl @@ -0,0 +1,161 @@ +{{/* vim: set filetype=mustache: */}} + +{{- define "flyte.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "flyte.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "flyte.namespace" -}} +{{- default .Release.Namespace .Values.forceNamespace | trunc 63 | trimSuffix "-" -}} +{{- end -}} + + +{{- define "flyteadmin.name" -}} +flyteadmin +{{- end -}} + +{{- define "flyteadmin.selectorLabels" -}} +app.kubernetes.io/name: {{ template "flyteadmin.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "flyteadmin.labels" -}} +{{ include "flyteadmin.selectorLabels" . }} +helm.sh/chart: {{ include "flyte.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + + +{{- define "datacatalog.name" -}} +datacatalog +{{- end -}} + +{{- define "datacatalog.selectorLabels" -}} +app.kubernetes.io/name: {{ template "datacatalog.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "datacatalog.labels" -}} +{{ include "datacatalog.selectorLabels" . }} +helm.sh/chart: {{ include "flyte.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + + +{{- define "flytepropeller.name" -}} +flytepropeller +{{- end -}} + +{{- define "flytepropeller.selectorLabels" -}} +app.kubernetes.io/name: {{ template "flytepropeller.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "flytepropeller.labels" -}} +{{ include "flytepropeller.selectorLabels" . }} +helm.sh/chart: {{ include "flyte.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + + +{{- define "flyteconsole.name" -}} +flyteconsole +{{- end -}} + +{{- define "flyteconsole.selectorLabels" -}} +app.kubernetes.io/name: {{ template "flyteconsole.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "flyteconsole.labels" -}} +{{ include "flyteconsole.selectorLabels" . }} +helm.sh/chart: {{ include "flyte.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + + +{{- define "redis.name" -}} +redis +{{- end -}} + +{{- define "redis.selectorLabels" -}} +app.kubernetes.io/name: {{ template "redis.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "redis.labels" -}} +{{ include "redis.selectorLabels" . }} +helm.sh/chart: {{ include "flyte.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + + +{{- define "postgres.name" -}} +postgres +{{- end -}} + +{{- define "postgres.selectorLabels" -}} +app.kubernetes.io/name: {{ template "postgres.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "postgres.labels" -}} +{{ include "postgres.selectorLabels" . }} +helm.sh/chart: {{ include "flyte.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + + +{{- define "minio.name" -}} +minio +{{- end -}} + +{{- define "minio.selectorLabels" -}} +app.kubernetes.io/name: {{ template "minio.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "minio.labels" -}} +{{ include "minio.selectorLabels" . }} +helm.sh/chart: {{ include "flyte.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{- define "pytorch-operator.name" -}} +pytorch-operator +{{- end -}} + +{{- define "pytorch-operator.namespace" -}} +pytorch-operator +{{- end -}} + +{{- define "pytorch-operator.selectorLabels" -}} +app.kubernetes.io/name: {{ template "pytorch-operator.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "pytorch-operator.labels" -}} +{{ include "pytorch-operator.selectorLabels" . }} +helm.sh/chart: {{ include "flyte.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +# Optional blocks for secret mount + +{{- define "databaseSecret.volume" -}} +{{- with .Values.common.databaseSecret.name -}} +- name: {{ . }} + secret: + secretName: {{ . }} +{{- end }} +{{- end }} + +{{- define "databaseSecret.volumeMount" -}} +{{- with .Values.common.databaseSecret.name -}} +- mountPath: /etc/db + name: {{ . }} +{{- end }} +{{- end }} diff --git a/helm/templates/admin/configmap.yaml b/helm/templates/admin/configmap.yaml new file mode 100644 index 0000000000..a061058caa --- /dev/null +++ b/helm/templates/admin/configmap.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: flyte-admin-config + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteadmin.labels" . | nindent 4 }} +data: +{{- with .Values.configmap.cluster_resources }} + cluster_resources.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.db }} + db.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.domain }} + domain.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.logger }} + logger.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.remote_data }} + remote_data.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.server }} + server.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.storage }} + storage.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.task_resource_defaults }} + task_resource_defaults.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} diff --git a/helm/templates/admin/cronjob.yaml b/helm/templates/admin/cronjob.yaml new file mode 100644 index 0000000000..28fe50d395 --- /dev/null +++ b/helm/templates/admin/cronjob.yaml @@ -0,0 +1,40 @@ +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: syncresources + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteadmin.labels" . | nindent 4 }} +spec: + schedule: '*/1 * * * *' + jobTemplate: + spec: + template: + {{- with .Values.flyteadmin.podAnnotations }} + metadata: + annotations: {{ toYaml . | nindent 12 }} + {{- end }} + spec: + containers: + - command: + - flyteadmin + - --config + - {{ .Values.flyteadmin.configPath }} + - clusterresource + - sync + image: "{{ .Values.flyteadmin.image.repository }}:{{ .Values.flyteadmin.image.tag }}" + imagePullPolicy: "{{ .Values.flyteadmin.image.pullPolicy }}" + name: sync-cluster-resources + volumeMounts: {{- include "databaseSecret.volumeMount" . | nindent 12 }} + - mountPath: /etc/flyte/clusterresource/templates + name: resource-templates + - mountPath: /etc/flyte/config + name: config-volume + restartPolicy: OnFailure + serviceAccountName: flyteadmin + volumes: {{- include "databaseSecret.volume" . | nindent 10 }} + - configMap: + name: clusterresource-template + name: resource-templates + - configMap: + name: flyte-admin-config + name: config-volume diff --git a/helm/templates/admin/deployment.yaml b/helm/templates/admin/deployment.yaml new file mode 100644 index 0000000000..1f7003c88d --- /dev/null +++ b/helm/templates/admin/deployment.yaml @@ -0,0 +1,129 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "flyteadmin.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteadmin.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.flyteadmin.replicaCount }} + selector: + matchLabels: {{ include "flyteadmin.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + configChecksum: {{ include (print .Template.BasePath "/admin/configmap.yaml") . | sha256sum | trunc 63 | quote }} + {{- with .Values.flyteadmin.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: {{ include "flyteadmin.labels" . | nindent 8 }} + spec: + initContainers: + {{- if .Values.postgres.enabled }} + - name: check-db-ready + image: postgres:10.16-alpine + command: + - sh + - -c + - until pg_isready -h postgres -p 5432; do echo waiting for database; sleep 2; done; + {{- end }} + - command: + - flyteadmin + - --config + - {{ .Values.flyteadmin.configPath }} + - migrate + - run + image: "{{ .Values.flyteadmin.image.repository }}:{{ .Values.flyteadmin.image.tag }}" + imagePullPolicy: "{{ .Values.flyteadmin.image.pullPolicy }}" + name: run-migrations + volumeMounts: {{- include "databaseSecret.volumeMount" . | nindent 8 }} + - mountPath: /etc/flyte/config + name: config-volume + - command: + - flyteadmin + - --config + - {{ .Values.flyteadmin.configPath }} + - migrate + - seed-projects + - flytesnacks + - flytetester + - flyteexamples + image: "{{ .Values.flyteadmin.image.repository }}:{{ .Values.flyteadmin.image.tag }}" + imagePullPolicy: "{{ .Values.flyteadmin.image.pullPolicy }}" + name: seed-projects + volumeMounts: {{- include "databaseSecret.volumeMount" . | nindent 8 }} + - mountPath: /etc/flyte/config + name: config-volume + - command: + - flyteadmin + - --config + - {{ .Values.flyteadmin.configPath }} + - clusterresource + - sync + image: "{{ .Values.flyteadmin.image.repository }}:{{ .Values.flyteadmin.image.tag }}" + imagePullPolicy: "{{ .Values.flyteadmin.image.pullPolicy }}" + name: sync-cluster-resources + volumeMounts: {{- include "databaseSecret.volumeMount" . | nindent 8 }} + - mountPath: /etc/flyte/clusterresource/templates + name: resource-templates + - mountPath: /etc/flyte/config + name: config-volume + containers: + - command: + - flyteadmin + - --config + - {{ .Values.flyteadmin.configPath }} + {{- with .Values.flyteadmin.extraArgs }} + {{- toYaml . | nindent 8 }} + {{- end }} + - serve + image: "{{ .Values.flyteadmin.image.repository }}:{{ .Values.flyteadmin.image.tag }}" + imagePullPolicy: "{{ .Values.flyteadmin.image.pullPolicy }}" + name: flyteadmin + ports: + - containerPort: 8088 + - containerPort: 8089 + resources: {{ toYaml .Values.flyteadmin.resources | nindent 10 }} + volumeMounts: {{- include "databaseSecret.volumeMount" . | nindent 8 }} + - mountPath: /srv/flyte + name: shared-data + - mountPath: /etc/flyte/config + name: config-volume + - command: + - sh + - -c + - ln -s /usr/share/nginx/html /usr/share/nginx/html/openapi && sh /usr/local/bin/docker-run.sh + env: + - name: PAGE_TITLE + value: Flyte Admin OpenAPI + - name: SPEC_URL + value: /api/v1/openapi + - name: PORT + value: "8087" + image: docker.io/redocly/redoc + imagePullPolicy: IfNotPresent + name: redoc + ports: + - containerPort: 8087 + resources: + limits: + cpu: "0.1" + memory: 200Mi + serviceAccountName: {{ template "flyteadmin.name" . }} + volumes: {{- include "databaseSecret.volume" . | nindent 6 }} + - emptyDir: {} + name: shared-data + - configMap: + name: flyte-admin-config + name: config-volume + - configMap: + name: clusterresource-template + name: resource-templates + {{- with .Values.flyteadmin.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.flyteadmin.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.flyteadmin.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} diff --git a/helm/templates/admin/rbac.yaml b/helm/templates/admin/rbac.yaml new file mode 100644 index 0000000000..9696437360 --- /dev/null +++ b/helm/templates/admin/rbac.yaml @@ -0,0 +1,52 @@ + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "flyteadmin.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteadmin.labels" . | nindent 4 }} + {{- with .Values.flyteadmin.serviceAccountAnnotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end}} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "flyteadmin.name" . }} + labels: {{ include "flyteadmin.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + - flyte.lyft.com + - rbac.authorization.k8s.io + resources: + - configmaps + - flyteworkflows + - namespaces + - pods + - resourcequotas + - roles + - rolebindings + - secrets + - services + - serviceaccounts + - spark-role + verbs: + - '*' + +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: {{ template "flyteadmin.name" . }}-binding + labels: {{ include "flyteadmin.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "flyteadmin.name" . }} +subjects: +- kind: ServiceAccount + name: {{ template "flyteadmin.name" . }} + namespace: {{ template "flyte.namespace" . }} diff --git a/helm/templates/admin/service.yaml b/helm/templates/admin/service.yaml new file mode 100644 index 0000000000..78b5acba81 --- /dev/null +++ b/helm/templates/admin/service.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "flyteadmin.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteadmin.labels" . | nindent 4 }} + {{- with .Values.flyteadmin.service.annotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.flyteadmin.service.type}} + type: {{ . }} + {{- end }} + {{- with .Values.flyteadmin.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{ . }} + {{- end }} + ports: + - name: redoc + port: 87 + protocol: TCP + targetPort: 8087 + - name: http + port: 80 + protocol: TCP + targetPort: 8088 + - name: grpc + port: 81 + protocol: TCP + targetPort: 8089 + selector: {{ include "flyteadmin.selectorLabels" . | nindent 4 }} diff --git a/helm/templates/common/configmap.yaml b/helm/templates/common/configmap.yaml new file mode 100644 index 0000000000..4291ec676f --- /dev/null +++ b/helm/templates/common/configmap.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: clusterresource-template + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteadmin.labels" . | nindent 4 }} +data: +{{- with .Values.configmap.aa_namespace }} + aa_namespace.yaml: | {{ . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.ab_project_resource_quota }} + ab_project-resource-quota.yaml: | {{ . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.ac_project_copilot_dataconfig }} + ac_project-copilot-dataconfig.yaml: | {{ . | nindent 4 }} +{{- end }} +{{- if .Values.sparkoperator.enabled }} +{{- with .Values.configmap.ad_spark_role }} + ad_spark-role.yaml: | {{ . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.ae_spark_service_account }} + ae_spark-service-account.yaml: | {{ . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.af_spark_role_binding }} + af_spark-role-binding.yaml: | {{ . | nindent 4 }} +{{- end }} +{{- end }} diff --git a/helm/templates/common/ingress.yaml b/helm/templates/common/ingress.yaml new file mode 100644 index 0000000000..f5087bf9f3 --- /dev/null +++ b/helm/templates/common/ingress.yaml @@ -0,0 +1,185 @@ +{{- if .Values.common.ingress.enabled }} +apiVersion: networking.k8s.io/v1beta1 +kind: Ingress +metadata: + name: {{ template "flyte.name" . }} + namespace: {{ template "flyte.namespace" . }} + {{- with .Values.common.ingress.annotations }} + annotations: {{ toYaml . | nindent 4}} + {{- end }} +spec: + rules: + - http: + paths: + {{- if .Values.common.ingress.albSSLRedirect }} + - path: /* + pathType: ImplementationSpecific + backend: + serviceName: ssl-redirect + servicePort: use-annotation + {{- end }} + # This is useful only for frontend development + {{- if .Values.common.ingress.webpackHMR }} + - path: /__webpack_hmr + pathType: ImplementationSpecific + backend: + serviceName: flyteconsole + servicePort: 80 + {{- end }} + # Port 87 in FlyteAdmin maps to the redoc container. + - path: /openapi + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 87 + # NOTE: If you change this, you must update the BASE_URL value in flyteconsole.yaml + - path: /console + pathType: ImplementationSpecific + backend: + serviceName: flyteconsole + servicePort: 80 + - path: /console/* + pathType: ImplementationSpecific + backend: + serviceName: flyteconsole + servicePort: 80 + - path: /api + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /api/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /healthcheck + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /v1/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + # Port 87 in FlyteAdmin maps to the redoc container. + - path: /openapi/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /.well-known/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /login + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /login/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /logout + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /logout/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /callback + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /callback/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /me + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /config + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /config/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 80 + {{- if not .Values.common.ingress.separateGrpcIngress }} + # NOTE: Port 81 in flyteadmin is the GRPC server port for FlyteAdmin. + - path: /flyteidl.service.AdminService + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 81 + - path: /flyteidl.service.AdminService/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 81 + {{- end }} + {{- with .Values.common.ingress.host }} + host: {{ . }} + {{- end }} + {{- if .Values.common.ingress.tls.enabled }} + tls: + - secretName: {{ .Release.Name }}-tls + hosts: + - {{ .Values.common.ingress.host }} + {{ end }} +{{- if .Values.common.ingress.separateGrpcIngress }} +# Certain ingress controllers like nginx cannot serve HTTP 1 and GRPC with a single ingress because GRPC can only +# enabled on the ingress object, not on backend services (GRPC annotation is set on the ingress, not on the services). +--- +apiVersion: networking.k8s.io/v1beta1 +kind: Ingress +metadata: + name: {{ template "flyte.name" . }}-grpc + namespace: {{ template "flyte.namespace" . }} + annotations: + {{- with .Values.common.ingress.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.common.ingress.separateGrpcIngressAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} +spec: + rules: + - http: + paths: + # NOTE: Port 81 in flyteadmin is the GRPC server port for FlyteAdmin. + - path: /flyteidl.service.AdminService + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 81 + - path: /flyteidl.service.AdminService/* + pathType: ImplementationSpecific + backend: + serviceName: flyteadmin + servicePort: 81 + {{- with .Values.common.ingress.host }} + host: {{ . }} + {{- end }} + {{- if .Values.common.ingress.tls.enabled }} + tls: + - secretName: {{ .Release.Name }}-tls + hosts: + - {{ .Values.common.ingress.host }} + {{ end }} +{{- end }} +{{- end }} diff --git a/helm/templates/common/namespace.yaml b/helm/templates/common/namespace.yaml new file mode 100644 index 0000000000..7e4f26d461 --- /dev/null +++ b/helm/templates/common/namespace.yaml @@ -0,0 +1,9 @@ +{{- if .Values.common.flyteNamespaceTemplate.enabled }} +apiVersion: v1 +kind: Namespace +metadata: + name: {{ template "flyte.namespace" . }} +spec: + finalizers: + - kubernetes +{{- end }} diff --git a/helm/templates/common/secret.yaml b/helm/templates/common/secret.yaml new file mode 100644 index 0000000000..0282eda01d --- /dev/null +++ b/helm/templates/common/secret.yaml @@ -0,0 +1,3 @@ +{{- with .Values.common.databaseSecret.secretManifest }} +{{ toYaml . }} +{{- end }} diff --git a/helm/templates/console/configmap.yaml b/helm/templates/console/configmap.yaml new file mode 100644 index 0000000000..084091b781 --- /dev/null +++ b/helm/templates/console/configmap.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: flyte-console-config + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteconsole.labels" . | nindent 4 }} +data: {{ toYaml .Values.configmap.console | nindent 2 }} diff --git a/helm/templates/console/deployment.yaml b/helm/templates/console/deployment.yaml new file mode 100644 index 0000000000..298fb2661e --- /dev/null +++ b/helm/templates/console/deployment.yaml @@ -0,0 +1,44 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "flyteconsole.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteconsole.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.flyteconsole.replicaCount }} + selector: + matchLabels: {{ include "flyteconsole.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + configChecksum: {{ include (print .Template.BasePath "/console/configmap.yaml") . | sha256sum | trunc 63 | quote }} + {{- with .Values.flyteconsole.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: {{ include "flyteconsole.labels" . | nindent 8 }} + spec: + containers: + - image: "{{ .Values.flyteconsole.image.repository }}:{{ .Values.flyteconsole.image.tag }}" + imagePullPolicy: "{{ .Values.flyteconsole.image.pullPolicy }}" + name: flyteconsole + envFrom: + - configMapRef: + name: flyte-console-config + ports: + - containerPort: 8080 + resources: {{ toYaml .Values.flyteconsole.resources | nindent 10 }} + volumeMounts: + - mountPath: /srv/flyte + name: shared-data + volumes: + - emptyDir: {} + name: shared-data + {{- with .Values.flyteconsole.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.flyteconsole.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.flyteconsole.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} \ No newline at end of file diff --git a/helm/templates/console/service.yaml b/helm/templates/console/service.yaml new file mode 100644 index 0000000000..c007243455 --- /dev/null +++ b/helm/templates/console/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "flyteconsole.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteconsole.labels" . | nindent 4 }} + {{- with .Values.flyteconsole.service.annotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.flyteconsole.service.type}} + type: {{ . }} + {{- end }} + ports: + - port: 80 + protocol: TCP + targetPort: 8080 + selector: {{ include "flyteconsole.selectorLabels" . | nindent 4 }} diff --git a/helm/templates/datacatalog/configmap.yaml b/helm/templates/datacatalog/configmap.yaml new file mode 100644 index 0000000000..dce73e47e4 --- /dev/null +++ b/helm/templates/datacatalog/configmap.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: datacatalog-config + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "datacatalog.labels" . | nindent 4 }} +data: +{{- with .Values.configmap.db }} + db.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.logger }} + logger.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.datacatalogServer }} + server.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.storage }} + storage.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} diff --git a/helm/templates/datacatalog/deployment.yaml b/helm/templates/datacatalog/deployment.yaml new file mode 100644 index 0000000000..1e3e2b6c6a --- /dev/null +++ b/helm/templates/datacatalog/deployment.yaml @@ -0,0 +1,67 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "datacatalog.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "datacatalog.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.datacatalog.replicaCount }} + selector: + matchLabels: {{ include "datacatalog.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + configChecksum: {{ include (print .Template.BasePath "/datacatalog/configmap.yaml") . | sha256sum | trunc 63 | quote }} + {{- with .Values.datacatalog.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: {{ include "datacatalog.labels" . | nindent 8 }} + spec: + initContainers: + - command: + - datacatalog + - --config + - {{ .Values.datacatalog.configPath }} + - migrate + - run + image: "{{ .Values.datacatalog.image.repository }}:{{ .Values.datacatalog.image.tag }}" + imagePullPolicy: "{{ .Values.datacatalog.image.pullPolicy }}" + name: run-migrations + volumeMounts: {{- include "databaseSecret.volumeMount" . | nindent 8 }} + - mountPath: /etc/datacatalog/config + name: config-volume + containers: + - command: + - datacatalog + - --config + - {{ .Values.datacatalog.configPath }} + {{- with .Values.datacatalog.extraArgs }} + {{- toYaml . | nindent 8 }} + {{- end }} + - serve + image: "{{ .Values.datacatalog.image.repository }}:{{ .Values.datacatalog.image.tag }}" + imagePullPolicy: "{{ .Values.datacatalog.image.pullPolicy }}" + name: datacatalog + ports: + - containerPort: 8088 + - containerPort: 8089 + resources: {{ toYaml .Values.datacatalog.resources | nindent 10 }} + volumeMounts: {{- include "databaseSecret.volumeMount" . | nindent 8 }} + - mountPath: /etc/datacatalog/config + name: config-volume + serviceAccountName: {{ template "datacatalog.name" . }} + volumes: {{- include "databaseSecret.volume" . | nindent 6 }} + - emptyDir: {} + name: shared-data + - configMap: + name: datacatalog-config + name: config-volume + {{- with .Values.datacatalog.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.datacatalog.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.datacatalog.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} diff --git a/helm/templates/datacatalog/rbac.yaml b/helm/templates/datacatalog/rbac.yaml new file mode 100644 index 0000000000..ca1ab89afd --- /dev/null +++ b/helm/templates/datacatalog/rbac.yaml @@ -0,0 +1,11 @@ + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "datacatalog.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "datacatalog.labels" . | nindent 4 }} + {{- with .Values.datacatalog.serviceAccountAnnotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} diff --git a/helm/templates/datacatalog/service.yaml b/helm/templates/datacatalog/service.yaml new file mode 100644 index 0000000000..07958d2f34 --- /dev/null +++ b/helm/templates/datacatalog/service.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "datacatalog.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "datacatalog.labels" . | nindent 4 }} + {{- with .Values.datacatalog.service.annotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.datacatalog.service.type}} + type: {{ . }} + {{- end }} + ports: + - name: grpc-2 + port: 8089 + protocol: TCP + targetPort: 8089 + - name: http + port: 88 + protocol: TCP + targetPort: 8088 + - name: grpc + port: 89 + protocol: TCP + targetPort: 8089 + selector: {{ include "datacatalog.selectorLabels" . | nindent 4 }} diff --git a/helm/templates/minio/deployment.yaml b/helm/templates/minio/deployment.yaml new file mode 100644 index 0000000000..63b57d293c --- /dev/null +++ b/helm/templates/minio/deployment.yaml @@ -0,0 +1,50 @@ +{{- if .Values.minio.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "minio.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "minio.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.minio.replicaCount }} + selector: + matchLabels: {{ include "minio.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.minio.podAnnotations }} + annotations: {{ toYaml . | nindent 8 }} + {{- end }} + labels: {{ include "minio.labels" . | nindent 8 }} + spec: + containers: + - image: "{{ .Values.minio.image.repository }}:{{ .Values.minio.image.tag }}" + imagePullPolicy: "{{ .Values.minio.image.pullPolicy }}" + name: minio + args: + - server + - /data + env: + - name: MINIO_ACCESS_KEY + value: minio + - name: MINIO_SECRET_KEY + value: miniostorage + ports: + - containerPort: 9000 + name: minio + resources: {{ toYaml .Values.minio.resources | nindent 10 }} + volumeMounts: + - name: minio-storage + mountPath: /var/lib/minioql/data + volumes: + - name: minio-storage + emptyDir: {} + {{- with .Values.minio.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.minio.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.minio.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/templates/minio/service.yaml b/helm/templates/minio/service.yaml new file mode 100644 index 0000000000..b9b56cc1bc --- /dev/null +++ b/helm/templates/minio/service.yaml @@ -0,0 +1,21 @@ +{{- if .Values.minio.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "minio.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "minio.labels" . | nindent 4 }} + {{- with .Values.minio.service.annotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.minio.service.type}} + type: {{ . }} + {{- end }} + ports: + - name: minio + port: 9000 + protocol: TCP + targetPort: minio + selector: {{ include "minio.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/helm/templates/postgres/deployment.yaml b/helm/templates/postgres/deployment.yaml new file mode 100644 index 0000000000..ff678d36ad --- /dev/null +++ b/helm/templates/postgres/deployment.yaml @@ -0,0 +1,45 @@ +{{- if .Values.postgres.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "postgres.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "postgres.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.postgres.replicaCount }} + selector: + matchLabels: {{ include "postgres.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.postgres.podAnnotations }} + annotations: {{ toYaml . | nindent 8 }} + {{- end }} + labels: {{ include "postgres.labels" . | nindent 8 }} + spec: + containers: + - image: "{{ .Values.postgres.image.repository }}:{{ .Values.postgres.image.tag }}" + imagePullPolicy: "{{ .Values.postgres.image.pullPolicy }}" + name: postgres + env: + - name: POSTGRES_HOST_AUTH_METHOD + value: trust + ports: + - containerPort: 5432 + name: postgres + resources: {{ toYaml .Values.postgres.resources | nindent 10 }} + volumeMounts: + - name: postgres-storage + mountPath: /var/lib/postgresql/data + volumes: + - name: postgres-storage + emptyDir: {} + {{- with .Values.postgres.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.postgres.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.postgres.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/templates/postgres/service.yaml b/helm/templates/postgres/service.yaml new file mode 100644 index 0000000000..4309df215e --- /dev/null +++ b/helm/templates/postgres/service.yaml @@ -0,0 +1,21 @@ +{{- if .Values.minio.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "postgres.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "postgres.labels" . | nindent 4 }} + {{- with .Values.postgres.service.annotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.postgres.service.type}} + type: {{ . }} + {{- end }} + ports: + - name: postgres + port: 5432 + protocol: TCP + targetPort: postgres + selector: {{ include "postgres.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/helm/templates/propeller/configmap.yaml b/helm/templates/propeller/configmap.yaml new file mode 100644 index 0000000000..70b4d58a0e --- /dev/null +++ b/helm/templates/propeller/configmap.yaml @@ -0,0 +1,48 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: flyte-propeller-config + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flyteadmin.labels" . | nindent 4 }} +data: +{{- with .Values.configmap.admin }} + admin.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.catalog }} + catalog.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.catalog_cache }} + catalog_cache.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.copilot }} + copilot.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.core }} + core.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.enabled_plugins }} + enabled_plugins.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.k8s }} + k8s.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.logger }} + logger.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.qubole }} + qubole.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.resource_manager }} + resource_manager.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- if .Values.sparkoperator.enabled }} +{{- with .Values.configmap.spark }} + spark.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- end }} +{{- with .Values.configmap.storage }} + storage.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.configmap.task_logs }} + task_logs.yaml: | {{ toYaml . | nindent 4 }} +{{- end }} diff --git a/helm/templates/propeller/crds/flyteworkflow.yaml b/helm/templates/propeller/crds/flyteworkflow.yaml new file mode 100644 index 0000000000..1095feca6b --- /dev/null +++ b/helm/templates/propeller/crds/flyteworkflow.yaml @@ -0,0 +1,14 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: flyteworkflows.flyte.lyft.com +spec: + group: flyte.lyft.com + names: + kind: FlyteWorkflow + plural: flyteworkflows + shortNames: + - fly + singular: flyteworkflow + scope: Namespaced + version: v1alpha1 diff --git a/helm/templates/propeller/deployment.yaml b/helm/templates/propeller/deployment.yaml new file mode 100644 index 0000000000..fc11b1eb3d --- /dev/null +++ b/helm/templates/propeller/deployment.yaml @@ -0,0 +1,55 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "flytepropeller.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flytepropeller.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.flytepropeller.replicaCount }} + selector: + matchLabels: {{ include "flytepropeller.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + configChecksum: {{ include (print .Template.BasePath "/propeller/configmap.yaml") . | sha256sum | trunc 63 | quote }} + {{- with .Values.flytepropeller.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: {{ include "flytepropeller.labels" . | nindent 8 }} + spec: + containers: + - command: + - flytepropeller + - --config + - {{ .Values.flytepropeller.configPath }} + {{- with .Values.flytepropeller.extraArgs }} + {{- toYaml . | nindent 8 }} + {{- end }} + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + image: "{{ .Values.flytepropeller.image.repository }}:{{ .Values.flytepropeller.image.tag }}" + imagePullPolicy: "{{ .Values.flytepropeller.image.pullPolicy }}" + name: flytepropeller + ports: + - containerPort: 10254 + resources: {{ toYaml .Values.flytepropeller.resources | nindent 10 }} + volumeMounts: + - mountPath: /etc/flyte/config + name: config-volume + serviceAccountName: {{ template "flytepropeller.name" . }} + volumes: + - configMap: + name: flyte-propeller-config + name: config-volume + {{- with .Values.flytepropeller.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.flytepropeller.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.flytepropeller.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} diff --git a/helm/templates/propeller/rbac.yaml b/helm/templates/propeller/rbac.yaml new file mode 100644 index 0000000000..dd0cee2788 --- /dev/null +++ b/helm/templates/propeller/rbac.yaml @@ -0,0 +1,88 @@ + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "flytepropeller.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "flytepropeller.labels" . | nindent 4 }} + {{- with .Values.flytepropeller.serviceAccountAnnotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end}} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "flytepropeller.name" . }} + labels: {{ include "flytepropeller.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - create + - update + - delete + - patch +- apiGroups: + - '*' + resources: + - '*' + verbs: + - get + - list + - watch + - create + - update + - delete + - patch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get + - list + - watch + - create + - delete + - update +- apiGroups: + - flyte.lyft.com + resources: + - flyteworkflows + verbs: + - get + - list + - watch + - create + - update + - delete + - patch + - post + - deletecollection + +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: {{ template "flytepropeller.name" . }} + labels: {{ include "flytepropeller.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "flytepropeller.name" . }} +subjects: +- kind: ServiceAccount + name: {{ template "flytepropeller.name" . }} + namespace: {{ template "flyte.namespace" . }} diff --git a/helm/templates/pytorch-operator/crds/pytorchjobs.yaml b/helm/templates/pytorch-operator/crds/pytorchjobs.yaml new file mode 100644 index 0000000000..ae2b7b7e78 --- /dev/null +++ b/helm/templates/pytorch-operator/crds/pytorchjobs.yaml @@ -0,0 +1,44 @@ +{{- if .Values.pytorchoperator.enabled }} +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: pytorchjobs.kubeflow.org +spec: + additionalPrinterColumns: + - JSONPath: .status.conditions[-1:].type + name: State + type: string + - JSONPath: .metadata.creationTimestamp + name: Age + type: date + group: kubeflow.org + names: + kind: PyTorchJob + plural: pytorchjobs + singular: pytorchjob + scope: Namespaced + subresources: + status: {} + validation: + openAPIV3Schema: + properties: + spec: + properties: + pytorchReplicaSpecs: + properties: + Master: + properties: + replicas: + maximum: 1 + minimum: 1 + type: integer + Worker: + properties: + replicas: + minimum: 1 + type: integer + versions: + - name: v1 + served: true + storage: true +{{- end }} diff --git a/helm/templates/pytorch-operator/deployment.yaml b/helm/templates/pytorch-operator/deployment.yaml new file mode 100644 index 0000000000..02dc98fb01 --- /dev/null +++ b/helm/templates/pytorch-operator/deployment.yaml @@ -0,0 +1,48 @@ +{{- if .Values.pytorchoperator.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pytorch-operator.name" . }} + namespace: {{ template "pytorch-operator.namespace" . }} + labels: {{ include "pytorch-operator.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.pytorchoperator.replicaCount }} + selector: + matchLabels: {{ include "pytorch-operator.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.pytorchoperator.podAnnotations }} + annotations: {{ toYaml . | nindent 8 }} + {{- end }} + labels: {{ include "pytorch-operator.labels" . | nindent 8 }} + spec: + containers: + - image: "{{ .Values.pytorchoperator.image.repository }}:{{ .Values.pytorchoperator.image.tag }}" + imagePullPolicy: "{{ .Values.pytorchoperator.image.pullPolicy }}" + name: pytorch-operator + command: + - /pytorch-operator.v1 + - --alsologtostderr + - -v=1 + - --monitoring-port=8443 + env: + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + resources: {{ toYaml .Values.pytorchoperator.resources | nindent 10 }} + serviceAccountName: {{ template "pytorch-operator.name" . }} + {{- with .Values.pytorchoperator.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.pytorchoperator.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.pytorchoperator.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/templates/pytorch-operator/namespace.yaml b/helm/templates/pytorch-operator/namespace.yaml new file mode 100644 index 0000000000..6e15ce3dc0 --- /dev/null +++ b/helm/templates/pytorch-operator/namespace.yaml @@ -0,0 +1,6 @@ +{{- if .Values.pytorchoperator.enabled }} +apiVersion: v1 +kind: Namespace +metadata: + name: {{ template "pytorch-operator.namespace" . }} +{{- end }} diff --git a/helm/templates/pytorch-operator/rbac.yaml b/helm/templates/pytorch-operator/rbac.yaml new file mode 100644 index 0000000000..fe980a9b87 --- /dev/null +++ b/helm/templates/pytorch-operator/rbac.yaml @@ -0,0 +1,108 @@ +{{- if .Values.pytorchoperator.enabled }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pytorch-operator.name" . }} + namespace: {{ template "pytorch-operator.namespace" . }} + labels: {{ include "pytorch-operator.labels" . | nindent 4 }} + {{- with .Values.pytorchoperator.serviceAccountAnnotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pytorch-operator.name" . }}-admin + labels: {{ include "pytorch-operator.labels" . | nindent 4 }} +rules: [] +aggregationRule: + clusterRoleSelectors: + - matchLabels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-pytorchjobs-admin: "true" + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pytorch-operator.name" . }}-edit + labels: {{ include "pytorch-operator.labels" . | nindent 4 }} +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + - pytorchjobs/status + verbs: + - get + - list + - watch + - create + - delete + - deletecollection + - patch + - update + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pytorch-operator.name" . }}-view + labels: {{ include "pytorch-operator.labels" . | nindent 4 }} +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + - pytorchjobs/status + verbs: + - get + - list + - watch + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pytorch-operator.name" . }} + labels: {{ include "pytorch-operator.labels" . | nindent 4 }} +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + - pytorchjobs/status + verbs: + - '*' +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - '*' +- apiGroups: + - "" + resources: + - pods + - services + - endpoints + - events + verbs: + - '*' + +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pytorch-operator.name" . }} + labels: {{ include "pytorch-operator.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pytorch-operator.name" . }} +subjects: +- kind: ServiceAccount + name: {{ template "pytorch-operator.name" . }} + namespace: {{ template "pytorch-operator.namespace" . }} +{{- end }} diff --git a/helm/templates/pytorch-operator/service.yaml b/helm/templates/pytorch-operator/service.yaml new file mode 100644 index 0000000000..3deea516e7 --- /dev/null +++ b/helm/templates/pytorch-operator/service.yaml @@ -0,0 +1,20 @@ +{{- if .Values.pytorchoperator.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pytorch-operator.name" . }} + namespace: {{ template "pytorch-operator.namespace" . }} + labels: {{ include "pytorch-operator.labels" . | nindent 4 }} + {{- with .Values.pytorchoperator.service.annotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.pytorchoperator.service.type}} + type: {{ . }} + {{- end }} + ports: + - name: monitoring-port + port: 8443 + targetPort: 8443 + selector: {{ include "pytorch-operator.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/helm/templates/redis/service.yaml b/helm/templates/redis/service.yaml new file mode 100644 index 0000000000..46afc98df5 --- /dev/null +++ b/helm/templates/redis/service.yaml @@ -0,0 +1,21 @@ +{{- if .Values.redis.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "redis.name" . }}-resource-manager + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "redis.labels" . | nindent 4 }} + {{- with .Values.redis.service.annotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.redis.service.type}} + type: {{ . }} + {{- end }} + ports: + - name: redis + port: 6379 + protocol: TCP + targetPort: redis + selector: {{ include "redis.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/helm/templates/redis/statefulset.yaml b/helm/templates/redis/statefulset.yaml new file mode 100644 index 0000000000..93062efb22 --- /dev/null +++ b/helm/templates/redis/statefulset.yaml @@ -0,0 +1,70 @@ +{{- if .Values.redis.enabled }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ template "redis.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: {{ include "redis.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.redis.replicaCount }} + selector: + matchLabels: {{ include "redis.selectorLabels" . | nindent 6 }} + serviceName: {{ template "redis.name" . }}-resource-manager + template: + metadata: + {{- with .Values.redis.podAnnotations }} + annotations: {{ toYaml . | nindent 8 }} + {{- end }} + labels: {{ include "redis.labels" . | nindent 8 }} + spec: + containers: + - image: "{{ .Values.redis.image.repository }}:{{ .Values.redis.image.tag }}" + imagePullPolicy: "{{ .Values.redis.image.pullPolicy }}" + name: redis + env: + - name: REDIS_PASSWORD + value: mypassword + livenessProbe: + exec: + command: + - redis-cli + - ping + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + exec: + command: + - redis-cli + - ping + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + ports: + - containerPort: 6379 + name: redis + protocol: TCP + resources: {{ toYaml .Values.redis.resources | nindent 10 }} + volumeMounts: + - mountPath: /bitnami + name: redis-data + + dnsPolicy: ClusterFirst + restartPolicy: Always + volumes: + - emptyDir: {} + name: redis-data + {{- with .Values.redis.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.redis.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.redis.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/values-eks.yaml b/helm/values-eks.yaml new file mode 100644 index 0000000000..a74a897c52 --- /dev/null +++ b/helm/values-eks.yaml @@ -0,0 +1,596 @@ + +# +# FLYTEADMIN +# + +flyteadmin: + replicaCount: 2 + + # IAM role for SA: https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html + serviceAccountAnnotations: + eks.amazonaws.com/role-arn: arn:aws:iam:::role/iam-role-flyte + + resources: + limits: + cpu: 250m + ephemeral-storage: 200Mi + memory: 500Mi + requests: + cpu: 50m + ephemeral-storage: 200Mi + memory: 200Mi + + service: + annotations: + service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "600" + # TODO add security groups + service.beta.kubernetes.io/aws-load-balancer-extra-security-groups: "sg-...,sg-...,sg-..." + # TODO alter domain + external-dns.alpha.kubernetes.io/hostname: "flyteadmin.subdomain.mydomain.com" + type: LoadBalancer + loadBalancerSourceRanges: + # TODO change source ip range if desired + - 0.0.0.0 + + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: flyteadmin + topologyKey: kubernetes.io/hostname + +# +# DATACATALOG +# + +datacatalog: + replicaCount: 2 + + # IAM role for SA: https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html + serviceAccountAnnotations: + eks.amazonaws.com/role-arn: arn:aws:iam:::role/iam-role-flyte + + resources: + limits: + cpu: 500m + ephemeral-storage: 200Mi + memory: 500Mi + requests: + cpu: 50m + ephemeral-storage: 200Mi + memory: 200Mi + + configPath: /etc/datacatalog/config/*.yaml + + service: + annotations: + cloud.google.com/load-balancer-type: Internal + type: LoadBalancer + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: datacatalog + topologyKey: kubernetes.io/hostname + +# +# FLYTEPROPELLER +# + +flytepropeller: + replicaCount: 2 + + # IAM role for SA: https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html + serviceAccountAnnotations: + eks.amazonaws.com/role-arn: arn:aws:iam:::role/iam-role-flyte + + resources: + limits: + cpu: 500m + ephemeral-storage: 200Mi + memory: 500Mi + requests: + cpu: 50m + ephemeral-storage: 200Mi + memory: 200Mi + configPath: /etc/flyte/config*/*.yaml + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: flytepropeller + topologyKey: kubernetes.io/hostname + +# +# FLYTECONSOLE +# + +flyteconsole: + replicaCount: 2 + resources: + limits: + cpu: 250m + memory: 250Mi + requests: + cpu: 10m + memory: 50Mi + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: flyteconsole + topologyKey: kubernetes.io/hostname + +# +# REDIS +# + +redis: + enabled: true + replicaCount: 1 + resources: + requests: + cpu: 100m + memory: 250Mi + +# +# POSTGRES +# + +postgres: + enabled: false + +# +# MINIO +# + +minio: + enabled: false + +# +# CONTOUR +# + +contour: + enabled: false + +# +# SPARKOPERATOR +# + +sparkoperator: + enabled: true + replicaCount: 1 + image: + repository: gcr.io/spark-operator/spark-operator + tag: v2.4.0-v1beta1-0.9.0 + pullPolicy: IfNotPresent + resources: + limits: + cpu: 1000m + memory: 1000Mi + requests: + cpu: 50m + memory: 250Mi + +# +# PYTORCHOPERATOR +# + +pytorchoperator: + enabled: true + replicaCount: 1 + image: + repository: gcr.io/kubeflow-images-public/pytorch-operator + tag: v1.0.0-g047cf0f + pullPolicy: IfNotPresent + resources: + limits: + cpu: 1000m + memory: 1000Mi + requests: + cpu: 50m + memory: 250Mi + +# +# COMMON +# + +common: + ingress: + host: flyte.example.com + annotations: + # aws-load-balancer-controller v2.1 or higher is required - https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.1/ + kubernetes.io/ingress.class: alb + alb.ingress.kubernetes.io/tags: service_instance=production + alb.ingress.kubernetes.io/scheme: internet-facing + alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' + # Instruct ALB Controller to not create multiple load balancers (and hence maintain a single endpoint for both GRPC and Http) + alb.ingress.kubernetes.io/group.name: flytesystem + # Replace certificate Arn with one deployed to your EKS cluster. Follow instructions in README.md + alb.ingress.kubernetes.io/certificate-arn: arn:aws:acm:us-east-2:111111111111:certificate/e92fefd8-6197-4249-a524-431d611c9af6 + alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS":443}]' + databaseSecret: + name: db-pass + secretManifest: + # # Leave it empty if your secret already exists + + # # Else you can create your own secret object: + # # Necessary dependencies: + # # - https://github.com/hashicorp/vault + # # - https://github.com/godaddy/kubernetes-external-secrets + + # apiVersion: kubernetes-client.io/v1 + # kind: ExternalSecret + # metadata: + # name: db-pass + # namespace: flyte + # spec: + # backendType: vault + # vaultMountPoint: + # vaultRole: + # data: + # - name: pass.txt + # key: k8s/data/path/to/secret + # property: password + +# +# CONFIGMAPS +# + +configmap: + + console: + BASE_URL: /console + CONFIG_DIR: /etc/flyte/config + + cluster_resources: + cluster_resources: + templatePath: "/etc/flyte/clusterresource/templates" + customData: + production: + - projectQuotaCpu: + value: "5" + - projectQuotaMemory: + value: "4000Mi" + staging: + - projectQuotaCpu: + value: "2" + - projectQuotaMemory: + value: "3000Mi" + development: + - projectQuotaCpu: + value: "4" + - projectQuotaMemory: + value: "3000Mi" + refresh: 5m + + db: + database: + port: 5432 + username: postgres + host: + dbname: flyte-development + passwordPath: /etc/db/pass.txt + + domain: + domains: + - id: development + name: development + - id: staging + name: staging + - id: production + name: production + + remote_data: + remoteData: + region: us-east-1 + scheme: aws + signedUrls: + durationMinutes: 3 + + server: + server: + httpPort: 8088 + grpcPort: 8089 + security: + secure: false + useAuth: false + allowCors: true + allowedOrigins: + # Accepting all domains for Sandbox installation + - "*" + allowedHeaders: + - "Content-Type" + flyteadmin: + roleNameKey: "iam.amazonaws.com/role" + profilerPort: 10254 + metricsScope: "flyte:" + metadataStoragePrefix: + - "metadata" + - "admin" + eventVersion: 1 + testing: + host: http://flyteadmin + scheduler: + eventScheduler: + scheme: aws + region: us-east-1 + scheduleRole: arn:aws:iam:::role/flyte_cron_scheduler_role + targetName: arn:aws:sqs:us-east-1::flyte-cron-scheduler-queue + scheduleNamePrefix: flyte + workflowExecutor: + scheme: aws + region: us-east-1 + scheduleQueueName: flyte-cron-scheduler-queue + accountId: + reconnectAttempts: 10 + reconnectDelaySeconds: 30 + notifications: + type: aws + region: us-east-1 + publisher: + topicName: arn:aws:sns:us-east-1::flyte-notifications-topic + processor: + queueName: flyte-notifications-queue + accountId: + emailer: + subject: "Flyte: {{ project }}/{{ domain }}/{{ launch_plan.name }} has '{{ phase }}'" + sender: "flyte@example.com" + body: | + Execution {{ workflow.project }}/{{ workflow.domain }}/{{ workflow.name }}/{{ name }} has {{ phase }}. + Details: https://flyte.example.com/console/projects/{{ project }}/domains/{{ domain }}/executions/{{ name }}. + {{ error }} + task_type_whitelist: + spark: + - project: flytetester + - project: spark-workflows + + + datacatalogServer: + datacatalog: + storage-prefix: metadata/datacatalog + metrics-scope: datacatalog + profiler-port: 10254 + + storage: + storage: + type: s3 + container: s3-bucket-for-flyte + connection: + auth-type: iam + region: us-east-1 + + task_resource_defaults: + task_resources: + defaults: + cpu: 1000m + memory: 1000Mi + storage: 1000Mi + limits: + cpu: 2 + memory: 8Gi + storage: 2000Mi + gpu: 1 + + admin: + event: + type: admin + rate: 500 + capacity: 1000 + admin: + endpoint: flyteadmin:81 + insecure: true + + catalog: + catalog-cache: + endpoint: datacatalog:89 + type: datacatalog + insecure: true + + catalog_cache: + plugins: + catalogCache: + reader: + maxItems: 10000 + writer: + maxItems: 10000 + + core: + propeller: + rawoutput-prefix: s3://s3-bucket-for-flyte/ + metadata-prefix: metadata/propeller + workers: 40 + gc-interval: 12h + max-workflow-retries: 50 + workflow-reeval-duration: 30s + downstream-eval-duration: 30s + limit-namespace: "all" + prof-port: 10254 + metrics-prefix: flyte + enable-admin-launcher: true + leader-election: + lock-config-map: + name: propeller-leader + namespace: flyte + enabled: true + lease-duration: 15s + renew-deadline: 10s + retry-period: 2s + kube-client-config: + qps: 100 + burst: 25 + timeout: 30s + queue: + type: batch + batching-interval: 2s + batch-size: -1 + queue: + type: maxof + rate: 100 + capacity: 1000 + base-delay: 5s + max-delay: 120s + sub-queue: + type: bucket + rate: 100 + capacity: 1000 + workflowStore: + policy: "ResourceVersionCache" + + enabled_plugins: + tasks: + max-plugin-phase-versions: 1000000 + task-plugins: + enabled-plugins: + - container + - sidecar + - spark + - k8s-array + - pytorch + default-for-task-types: + container: container + sidecar: sidecar + spark: spark + container_array: k8s-array + pytorch: pytorch + + k8s: + plugins: + k8s: + default-cpus: 100m + default-memory: 100Mi + + logger: + logger: + show-source: true + level: 5 + + qubole: + plugins: + qubole: + quboleTokenKey: "FLYTE_QUBOLE_CLIENT_TOKEN" + + resource_manager: + propeller: + resourcemanager: + type: redis + resourceMaxQuota: 10000 + redis: + hostPath: redis-resource-manager:6379 + hostKey: mypassword + + spark: + plugins: + spark: + spark-config-default: + - # We override the default credentials chain provider for Hadoop so that + # it can use the serviceAccount based IAM role or ec2 metadata based. + # This is more in line with how AWS works + - spark.hadoop.fs.s3a.aws.credentials.provider: "com.amazonaws.auth.DefaultAWSCredentialsProviderChain" + - spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version: "2" + - spark.kubernetes.allocation.batch.size: "50" + - spark.hadoop.fs.s3a.acl.default: "BucketOwnerFullControl" + - spark.hadoop.fs.s3n.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3n.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3a.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.multipart.threshold: "536870912" + - spark.blacklist.enabled: "true" + - spark.blacklist.timeout: "5m" + - spark.task.maxfailures: "8" + + task_logs: + plugins: + logs: + kubernetes-enabled: false + + aa_namespace: | + apiVersion: v1 + kind: Namespace + metadata: + name: {{ namespace }} + spec: + finalizers: + - kubernetes + + ab_project_resource_quota: | + apiVersion: v1 + kind: ResourceQuota + metadata: + name: project-quota + namespace: {{ namespace }} + spec: + hard: + limits.cpu: {{ projectQuotaCpu }} + limits.memory: {{ projectQuotaMemory }} + + ac_project_copilot_dataconfig: | + kind: ConfigMap + apiVersion: v1 + metadata: + name: flyte-data-config + namespace: {{ namespace }} + data: + config.yaml: | + storage: + connection: + auth-type: iam + region: us-east-1 + type: s3 + container: my-s3-bucket + enable-multicontainer: true + + ad_spark_role: | + apiVersion: rbac.authorization.k8s.io/v1beta1 + kind: Role + metadata: + name: spark-role + namespace: {{ namespace }} + rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - '*' + - apiGroups: + - "" + resources: + - services + verbs: + - '*' + - apiGroups: + - "" + resources: + - configmaps + verbs: + - '*' + + ae_spark_service_account: | + apiVersion: v1 + kind: ServiceAccount + metadata: + name: spark + namespace: {{ namespace }} + + af_spark_role_binding: | + apiVersion: rbac.authorization.k8s.io/v1beta1 + kind: RoleBinding + metadata: + name: spark-role-binding + namespace: {{ namespace }} + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark-role + subjects: + - kind: ServiceAccount + name: spark + namespace: {{ namespace }} diff --git a/helm/values-gcp.yaml b/helm/values-gcp.yaml new file mode 100644 index 0000000000..cbe96ceee7 --- /dev/null +++ b/helm/values-gcp.yaml @@ -0,0 +1,28 @@ +# +# COMMON +# + +common: + databaseSecret: {} + ingress: + +# +# CONFIGMAPS +# + +configmap: + tasks: + max-plugin-phase-versions: 1000000 + task-plugins: + enabled-plugins: + - container + - sidecar + - spark + - k8s-array + - pytorch + default-for-task-types: + container: container + sidecar: sidecar + spark: spark + container_array: k8s-array + pytorch: pytorch \ No newline at end of file diff --git a/helm/values-sandbox.yaml b/helm/values-sandbox.yaml new file mode 100644 index 0000000000..dff617bb7b --- /dev/null +++ b/helm/values-sandbox.yaml @@ -0,0 +1,205 @@ + +# +# FLYTEADMIN +# + +flyteadmin: {} # use default config + +# +# DATACATALOG +# + +datacatalog: {} + +# +# FLYTEPROPELLER +# + +flytepropeller: {} + +# +# FLYTECONSOLE +# + +flyteconsole: {} + +# +# REDIS +# + +redis: + enabled: false + +# +# POSTGRES +# + +postgres: + image: + # Override postgres image to use alpine based (rather smaller) docker image + tag: "10.16-alpine" + +# +# MINIO +# + +minio: {} + +# +# CONTOUR +# + +contour: + contour: {} + envoy: + service: + type: NodePort + ports: + http: 80 + nodePorts: + http: 30081 + +# +# SPARKOPERATOR +# + +sparkoperator: + enabled: false + +# +# PYTORCHOPERATOR +# + +pytorchoperator: + enabled: false + +# +# KUBERNETES DASHBOARD +# +kubernetes-dashboard: + enabled: true + extraArgs: + - --enable-skip-login + - --enable-insecure-login + - --disable-settings-authorizer + protocolHttp: true + service: + type: NodePort + externalPort: 30082 + +# +# COMMON +# + +common: + databaseSecret: {} + ingress: + # enable HMR route to flyteconsole for frontend development. + webpackHMR: true + tls: + enabled: false + flyteNamespaceTemplate: {} + +# +# CONFIGMAPS +# + +configmap: + + db: + database: + port: 5432 + username: postgres + host: postgres + dbname: flyte_development + + storage: + storage: + type: minio + container: my-s3-bucket + connection: + access-key: minio + auth-type: accesskey + secret-key: miniostorage + disable-ssl: true + endpoint: http://minio.flyte.svc.cluster.local:9000 + region: us-east-1 + + # Task default resources configuration + task_resource_defaults: + task_resources: + defaults: + memory: 200Mi + + enabled_plugins: + tasks: + task-plugins: + enabled-plugins: + - container + - sidecar + - k8s-array + default-for-task-types: + container: container + sidecar: sidecar + container_array: k8s-array + + k8s: + plugins: + k8s: + default-env-vars: + - FLYTE_AWS_ENDPOINT: "http://minio.flyte:9000" + - FLYTE_AWS_ACCESS_KEY_ID: minio + - FLYTE_AWS_SECRET_ACCESS_KEY: miniostorage + default-memory: 200Mi + + task_logs: + plugins: + logs: + kubernetes-enabled: true + kubernetes-template-uri: "http://localhost:30082/#/log/{{ .namespace }}/{{ .podName }}/pod?namespace={{ .namespace }}" + + logger: + logger: + show-source: true + level: 5 + + # Disable Qubole + qubole: null + + # Use noop resource-manager + resource_manager: + propeller: + resourcemanager: + type: noop + redis: null + + ab_project_resource_quota: | + apiVersion: v1 + kind: ResourceQuota + metadata: + name: project-quota + namespace: {{ namespace }} + spec: + hard: + limits.cpu: {{ projectQuotaCpu }} + limits.memory: {{ projectQuotaMemory }} + + ac_project_copilot_dataconfig: | + kind: ConfigMap + apiVersion: v1 + metadata: + name: flyte-data-config + namespace: {{ namespace }} + data: + config.yaml: | + storage: + connection: + access-key: minio + auth-type: accesskey + disable-ssl: true + endpoint: http://minio.flyte.svc.cluster.local:9000 + region: us-east-1 + secret-key: miniostorage + type: minio + container: my-s3-bucket + enable-multicontainer: true diff --git a/helm/values.yaml b/helm/values.yaml new file mode 100644 index 0000000000..5f45bde061 --- /dev/null +++ b/helm/values.yaml @@ -0,0 +1,704 @@ + +# +# FLYTEADMIN SETTINGS +# + +flyteadmin: + # -- Replicas count for Flyteadmin deployment + replicaCount: 1 + image: + # -- Docker image for Flyteadmin deployment + repository: ghcr.io/flyteorg/flyteadmin + tag: v0.4.0 + pullPolicy: IfNotPresent + # -- Default resources requests and limits for Flyteadmin deployment + resources: + limits: + cpu: 250m + ephemeral-storage: 100Mi + memory: 500Mi + requests: + cpu: 10m + ephemeral-storage: 50Mi + memory: 50Mi + # -- Default regex string for searching configuration files + configPath: /etc/flyte/config/*.yaml + # -- Service settings for Flyteadmin + service: + annotations: + projectcontour.io/upstream-protocol.h2c: grpc + alb.ingress.kubernetes.io/backend-protocol-version: GRPC + type: ClusterIP + loadBalancerSourceRanges: [] + # -- Annotations for ServiceAccount attached to Flyteadmin pods + serviceAccountAnnotations: {} + # -- Annotations for Flyteadmin pods + podAnnotations: {} + # -- nodeSelector for Flyteadmin deployment + nodeSelector: {} + # -- tolerations for Flyteadmin deployment + tolerations: [] + # -- affinity for Flyteadmin deployment + affinity: {} + +# +# DATACATALOG SETTINGS +# + +datacatalog: + # -- Replicas count for Datacatalog deployment + replicaCount: 1 + image: + # -- Docker image for Datacatalog deployment + repository: ghcr.io/flyteorg/datacatalog + tag: v0.3.0 + pullPolicy: IfNotPresent + # -- Default resources requests and limits for Datacatalog deployment + resources: + limits: + cpu: 500m + ephemeral-storage: 100Mi + memory: 500Mi + requests: + cpu: 10m + ephemeral-storage: 50Mi + memory: 50Mi + # -- Default regex string for searching configuration files + configPath: /etc/datacatalog/config/*.yaml + # -- Service settings for Datacatalog + service: + annotations: + projectcontour.io/upstream-protocol.h2c: grpc + alb.ingress.kubernetes.io/backend-protocol-version: GRPC + type: NodePort + # -- Annotations for ServiceAccount attached to Datacatalog pods + serviceAccountAnnotations: {} + # -- Annotations for Datacatalog pods + podAnnotations: {} + # -- nodeSelector for Datacatalog deployment + nodeSelector: {} + # -- tolerations for Datacatalog deployment + tolerations: [] + # -- affinity for Datacatalog deployment + affinity: {} + +# +# FLYTEPROPELLER SETTINGS +# + +flytepropeller: + # -- Replicas count for Flytepropeller deployment + replicaCount: 1 + image: + # -- Docker image for Flytepropeller deployment + repository: ghcr.io/flyteorg/flytepropeller + tag: v0.7.8 + pullPolicy: IfNotPresent + # -- Default resources requests and limits for Flytepropeller deployment + resources: + limits: + cpu: 200m + ephemeral-storage: 100Mi + memory: 200Mi + requests: + cpu: 10m + ephemeral-storage: 50Mi + memory: 50Mi + # -- Default regex string for searching configuration files + configPath: /etc/flyte/config/*.yaml + # -- Annotations for ServiceAccount attached to Flytepropeller pods + serviceAccountAnnotations: {} + # -- Annotations for Flytepropeller pods + podAnnotations: {} + # -- nodeSelector for Flytepropeller deployment + nodeSelector: {} + # -- tolerations for Flytepropeller deployment + tolerations: [] + # -- affinity for Flytepropeller deployment + affinity: {} + +# +# FLYTECONSOLE SETTINGS +# + +flyteconsole: + # -- Replicas count for Flyteconsole deployment + replicaCount: 1 + image: + # -- Docker image for Flyteconsole deployment + repository: ghcr.io/flyteorg/flyteconsole + tag: v0.19.6 + pullPolicy: IfNotPresent + # -- Default resources requests and limits for Flyteconsole deployment + resources: + limits: + cpu: 500m + memory: 250Mi + requests: + cpu: 10m + memory: 50Mi + # -- Service settings for Flyteconsole + service: + annotations: {} + type: ClusterIP + # -- Annotations for Flyteconsole pods + podAnnotations: {} + # -- nodeSelector for Flyteconsole deployment + nodeSelector: {} + # -- tolerations for Flyteconsole deployment + tolerations: [] + # -- affinity for Flyteconsole deployment + affinity: {} + +# +# REDIS SETTINGS +# + +redis: + # --- enable or disable Redis Statefulset installation + enabled: true + # -- Replicas count for Redis Statefulset + replicaCount: 1 + image: + # -- Docker image for Redis Statefulset + repository: docker.io/bitnami/redis + tag: 4.0.2-r1 + pullPolicy: IfNotPresent + # -- Default resources requests and limits for Redis Statefulset + resources: + requests: + cpu: 10m + memory: 50Mi + limits: + cpu: 1000m + memory: 1Gi + # -- Service settings for Redis + service: + annotations: {} + type: ClusterIP + # -- Annotations for Redis pods + podAnnotations: {} + # -- nodeSelector for Redis Statefulset + nodeSelector: {} + # -- tolerations for Redis Statefulset + tolerations: [] + # -- affinity for Redis Statefulset + affinity: {} + +# +# POSTGRES SETTINGS +# + +postgres: + # --- enable or disable Postgres deployment installation + enabled: true + # -- Replicas count for Postgres deployment + replicaCount: 1 + image: + # -- Docker image for Postgres deployment + repository: postgres + tag: "10.16" + pullPolicy: IfNotPresent + # -- Default resources requests and limits for Postgres deployment + resources: + requests: + cpu: 10m + memory: 128Mi + limits: + cpu: 1000m + memory: 512Mi + # -- Service settings for Postgres + service: + annotations: {} + type: ClusterIP + # -- Annotations for Postgres pods + podAnnotations: {} + # -- nodeSelector for Postgres deployment + nodeSelector: {} + # -- tolerations for Postgres deployment + tolerations: [] + # -- affinity for Postgres deployment + affinity: {} + +# +# MINIO SETTINGS +# + +minio: + # --- enable or disable Minio deployment installation + enabled: true + # -- Replicas count for Minio deployment + replicaCount: 1 + image: + # -- Docker image for Minio deployment + repository: minio/minio + tag: RELEASE.2020-12-16T05-05-17Z + pullPolicy: IfNotPresent + # -- Default resources requests and limits for Minio deployment + resources: + requests: + cpu: 10m + memory: 128Mi + limits: + cpu: 200m + memory: 512Mi + # -- Service settings for Minio + service: + annotations: {} + type: ClusterIP + # -- Annotations for Minio pods + podAnnotations: {} + # -- nodeSelector for Minio deployment + nodeSelector: {} + # -- tolerations for Minio deployment + tolerations: [] + # -- affinity for Minio deployment + affinity: {} + +# +# CONTOUR SETTINGS +# + +contour: + # --- enable or disable Contour deployment installation + enabled: true + # -- Replicas count for Contour deployment + replicaCount: 1 + contour: + # -- Default resources requests and limits for Contour + resources: + requests: + cpu: 10m + memory: 50Mi + limits: + cpu: 100m + memory: 100Mi + envoy: + # -- Default resources requests and limits for Envoy + resources: + requests: + cpu: 10m + memory: 50Mi + limits: + cpu: 100m + memory: 100Mi + # -- Annotations for ServiceAccount attached to Contour pods + serviceAccountAnnotations: {} + # -- Annotations for Contour pods + podAnnotations: {} + # -- nodeSelector for Contour deployment + nodeSelector: {} + # -- tolerations for Contour deployment + tolerations: [] + # -- affinity for Contour deployment + affinity: {} + +# +# SPARKOPERATOR SETTINGS +# + +sparkoperator: + # --- enable or disable Sparkoperator deployment installation + enabled: true + # -- Replicas count for Sparkoperator deployment + replicaCount: 1 + image: + # -- Docker image for Sparkoperator + tag: v1beta2-1.2.0-3.0.0 # Set to v1beta2-1.1.2-2.4.5 for Spark 2 + # -- Default resources requests and limits for Sparkoperator + resources: + limits: + cpu: 1000m + memory: 500M + requests: + cpu: 10m + memory: 50M + +# +# PYTORCHOPERATOR SETTINGS +# + +pytorchoperator: + # --- enable or disable Pytorchoperator deployment installation + enabled: true # Set false to disable + # -- Replicas count for Pytorchoperator deployment + replicaCount: 1 + image: + # -- Docker image for Pytorchoperator + repository: gcr.io/kubeflow-images-public/pytorch-operator + tag: v1.0.0-g047cf0f + pullPolicy: IfNotPresent + # -- Default resources requests and limits for Pytorchoperator + resources: + limits: + cpu: 500m + memory: 1000M + requests: + cpu: 10m + memory: 50M + # -- Service settings for Pytorchoperator + service: + annotations: {} + type: ClusterIP + # -- Annotations for ServiceAccount attached to Pytorchoperator pods + serviceAccountAnnotations: {} + # -- Annotations for Pytorchoperator pods + podAnnotations: {} + # -- nodeSelector for Pytorchoperator deployment + nodeSelector: {} + # -- tolerations for Pytorchoperator deployment + tolerations: [] + # -- affinity for Pytorchoperator deployment + affinity: {} + +# +# KUBERNETES DASHBOARD +# + +kubernetes-dashboard: + enabled: false + +# +# COMMON SETTINGS +# + +common: + databaseSecret: + # -- Specify name of K8s Secret which contains Database password. Leave it empty if you don't need this Secret + name: "" + # -- Specify your Secret (with sensitive data) or pseudo-manifest (without sensitive data). See https://github.com/godaddy/kubernetes-external-secrets + secretManifest: {} + ingress: + # --- Enable or disable creating Ingress for Flyte. Relevant to disable when using e.g. Istio as ingress controller. + enabled: true + # --- Enable or disable HMR route to flyteconsole. This is useful only for frontend development. + webpackHMR: false + # --- separateGrpcIngress puts GRPC routes into a separate ingress if true. Required for certain ingress controllers like nginx. + separateGrpcIngress: false + # --- Extra Ingress annotations applied only to the GRPC ingress. Only makes sense if `separateGrpcIngress` is enabled. + separateGrpcIngressAnnotations: + nginx.ingress.kubernetes.io/backend-protocol: "GRPC" + # --- Ingress annotations applied to both HTTP and GRPC ingresses. + annotations: {} + # --- albSSLRedirect adds a special route for ssl redirect. Only useful in combination with the AWS LoadBalancer Controller. + albSSLRedirect: false + # --- Ingress hostname + # host: + tls: + enabled: true + flyteNamespaceTemplate: + # --- Enable or disable creating Flyte namespace in template. Enable when using helm as template-engine only. Disable when using `helm install ...`. + enabled: false + +# +# CONFIGMAPS SETTINGS +# + +configmap: + + # Configmap for Flyte console UI + console: + BASE_URL: /console + CONFIG_DIR: /etc/flyte/config + + # Configmap for ClusterResource parameters + cluster_resources: + # -- ClusterResource parameters + cluster_resources: + templatePath: "/etc/flyte/clusterresource/templates" + customData: + production: + - projectQuotaCpu: + value: "5" + - projectQuotaMemory: + value: "4000Mi" + staging: + - projectQuotaCpu: + value: "2" + - projectQuotaMemory: + value: "3000Mi" + development: + - projectQuotaCpu: + value: "4" + - projectQuotaMemory: + value: "3000Mi" + refresh: 5m + + # Database configuration + db: + database: {} + # port: 5432 + # username: postgres + # host: postgres + # dbname: flyte + # passwordPath: /etc/db/pass.txt + + # Domains configuration for Flyte projects + domain: + domains: + - id: development + name: development + - id: staging + name: staging + - id: production + name: production + + # Remote data configuration + remote_data: + remoteData: + region: us-east-1 + scheme: local + signedUrls: + durationMinutes: 3 + + # -- Flyteadmin server config + server: + server: + httpPort: 8088 + grpcPort: 8089 + security: + secure: false + useAuth: false + allowCors: true + flyteadmin: + roleNameKey: "iam.amazonaws.com/role" + profilerPort: 10254 + metricsScope: "flyte:" + metadataStoragePrefix: + - "metadata" + - "admin" + eventVersion: 1 + testing: + host: http://flyteadmin + + # Datacatalog server config + datacatalogServer: + datacatalog: + storage-prefix: metadata/datacatalog + metrics-scope: datacatalog + profiler-port: 10254 + application: + grpcPort: 8089 + httpPort: 8080 + grpcServerReflection: true + + # Storage configuration + storage: + storage: + type: minio + container: my-s3-bucket + connection: + auth-type: accesskey + region: us-east-1 + + # Task default resources configuration + task_resource_defaults: + # -- Task default resources parameters + task_resources: + defaults: + cpu: 100m + memory: 100Mi + storage: 5Mi + limits: + cpu: 2 + memory: 8Gi + storage: 20Mi + gpu: 1 + + # Admin configuration + admin: + event: + type: admin + rate: 500 + capacity: 1000 + admin: + endpoint: flyteadmin:81 + insecure: true + + # Catalog configuration + catalog: + catalog-cache: + endpoint: datacatalog:89 + type: datacatalog + insecure: true + + # Catalog cache configuration + catalog_cache: + plugins: + catalogCache: + reader: + maxItems: 10000 + writer: + maxItems: 10000 + + # Copilot configuration + copilot: + plugins: + k8s: + co-pilot: + name: flyte-copilot- + image: ghcr.io/lyft/flyteplugins/flytecopilot:dc4bdbd61cac88a39a5ff43e40f026bdbc2c78a2 + start-timeout: 30s + + # Core propeller configuration + core: + propeller: + rawoutput-prefix: s3://my-s3-bucket/ + metadata-prefix: metadata/propeller + workers: 4 + max-workflow-retries: 30 + workflow-reeval-duration: 30s + downstream-eval-duration: 30s + limit-namespace: "all" + prof-port: 10254 + metrics-prefix: flyte + enable-admin-launcher: true + leader-election: + lock-config-map: + name: propeller-leader + namespace: flyte + enabled: true + lease-duration: 15s + renew-deadline: 10s + retry-period: 2s + queue: + type: batch + batching-interval: 2s + batch-size: -1 + queue: + type: maxof + rate: 100 + capacity: 1000 + base-delay: 5s + max-delay: 120s + sub-queue: + type: bucket + rate: 10 + capacity: 100 + + # Plugins configuration + enabled_plugins: + tasks: + max-plugin-phase-versions: 1000000 + task-plugins: + enabled-plugins: [] + + # Kubernetes plugin configuration + k8s: + plugins: + k8s: + default-env-vars: [] + # - DEFAULT_ENV_VAR: VALUE + + default-cpus: 100m + default-memory: 100Mi + + # Logger configuration + logger: + logger: + show-source: true + level: 4 + + # Qubole plugin configuration + qubole: + plugins: + qubole: + quboleTokenKey: "FLYTE_QUBOLE_CLIENT_TOKEN" + + # Resource manager configuration + resource_manager: + # -- resource manager configuration + propeller: + resourcemanager: + type: redis + resourceMaxQuota: 10000 + redis: + hostPath: redis-resource-manager:6379 + hostKey: mypassword + + # Spark plugin configuration + spark: + plugins: + spark: + # -- Spark default configuration + spark-config-default: + - spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version: "2" + - spark.kubernetes.allocation.batch.size: "50" + - spark.hadoop.fs.s3a.acl.default: "BucketOwnerFullControl" + - spark.hadoop.fs.s3n.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3n.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3a.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.multipart.threshold: "536870912" + - spark.blacklist.enabled: "true" + - spark.blacklist.timeout: "5m" + - spark.task.maxfailures: "8" + + # Tasks logs plugin configuration + task_logs: + plugins: + logs: + kubernetes-enabled: false + + # -- Template for namespaces resources + aa_namespace: | + apiVersion: v1 + kind: Namespace + metadata: + name: {{ namespace }} + spec: + finalizers: + - kubernetes + + # -- Template for spark role + ad_spark_role: | + apiVersion: rbac.authorization.k8s.io/v1beta1 + kind: Role + metadata: + name: spark-role + namespace: {{ namespace }} + rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - '*' + - apiGroups: + - "" + resources: + - services + verbs: + - '*' + - apiGroups: + - "" + resources: + - configmaps + verbs: + - '*' + + # -- Template for spark SA + ae_spark_service_account: | + apiVersion: v1 + kind: ServiceAccount + metadata: + name: spark + namespace: {{ namespace }} + + # -- Template for spark role binding + af_spark_role_binding: | + apiVersion: rbac.authorization.k8s.io/v1beta1 + kind: RoleBinding + metadata: + name: spark-role-binding + namespace: {{ namespace }} + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark-role + subjects: + - kind: ServiceAccount + name: spark + namespace: {{ namespace }}