diff --git a/.github/workflows/check-release.yaml b/.github/workflows/check-release.yaml new file mode 100644 index 000000000..8a18e4975 --- /dev/null +++ b/.github/workflows/check-release.yaml @@ -0,0 +1,64 @@ +name: Check Release + +on: + pull_request: + branches: + - release-* + paths: + - VERSION + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + SEMVER_PATTERN: '^v([0-9]+)\.([0-9]+)\.([0-9]+)(-rc\.([0-9]+))?$' + +jobs: + check: + runs-on: ubuntu-latest + + steps: + - name: Checkout source code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check whether version matches semver pattern + run: | + VERSION=$(cat VERSION) + if [[ ${VERSION} =~ ${{ env.SEMVER_PATTERN }} ]]; then + echo "Version '${VERSION}' matches semver pattern." + else + echo "Version '${VERSION}' does not match semver pattern." + exit 1 + fi + echo "VERSION=${VERSION}" >> $GITHUB_ENV + + - name: Check whether chart version and appVersion matches version + run: | + VERSION=${VERSION#v} + CHART_VERSION=$(cat charts/spark-operator-chart/Chart.yaml | grep version | awk '{print $2}') + CHART_APP_VERSION=$(cat charts/spark-operator-chart/Chart.yaml | grep appVersion | awk '{print $2}') + if [[ ${CHART_VERSION} == ${VERSION} ]]; then + echo "Chart version '${CHART_VERSION}' matches version '${VERSION}'." + else + echo "Chart version '${CHART_VERSION}' does not match version '${VERSION}'." + exit 1 + fi + if [[ ${CHART_APP_VERSION} == ${VERSION} ]]; then + echo "Chart appVersion '${CHART_APP_VERSION}' matches version '${VERSION}'." + else + echo "Chart appVersion '${CHART_APP_VERSION}' does not match version '${VERSION}'." + exit 1 + fi + + - name: Check if tag exists + run: | + git fetch --tags + if git tag -l | grep -q "^${VERSION}$"; then + echo "Tag '${VERSION}' already exists." + exit 1 + else + echo "Tag '${VERSION}' does not exist." + fi diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml index 9380dfb2d..663404025 100644 --- a/.github/workflows/integration.yaml +++ b/.github/workflows/integration.yaml @@ -72,26 +72,11 @@ jobs: - name: Run unit tests run: make unit-test - - name: Build Spark-Operator Docker Image - run: make docker-build IMAGE_TAG=latest - - - name: Check changes in resources used in docker file - run: | - DOCKERFILE_RESOURCES=$(cat Dockerfile | grep -P -o "COPY [a-zA-Z0-9].*? " | cut -c6-) - for resource in $DOCKERFILE_RESOURCES; do - # If the resource is different - if ! git diff --quiet origin/master -- $resource; then - ## And the appVersion hasn't been updated - if ! git diff origin/master -- charts/spark-operator-chart/Chart.yaml | grep +appVersion; then - echo "resource used in docker.io/kubeflow/spark-operator has changed in $resource, need to update the appVersion in charts/spark-operator-chart/Chart.yaml" - git diff origin/master -- $resource; - echo "failing the build... " && false - fi - fi - done + - name: Build Spark operator + run: make build-operator build-helm-chart: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Determine branch name id: get_branch @@ -131,7 +116,7 @@ jobs: - name: Run chart-testing (lint) if: steps.list-changed.outputs.changed == 'true' env: - BRANCH: ${{ steps.get_branch.outputs.BRANCH }} + BRANCH: ${{ steps.get_branch.outputs.BRANCH }} run: ct lint --check-version-increment=false --target-branch $BRANCH - name: Detect CRDs drift between chart and manifest @@ -163,37 +148,25 @@ jobs: minikube image load docker.io/kubeflow/spark-operator:local ct install - integration-test: - runs-on: ubuntu-22.04 + e2e-test: + runs-on: ubuntu-latest steps: - name: Checkout source code uses: actions/checkout@v4 with: - fetch-depth: "0" + fetch-depth: 0 - name: Set up Go uses: actions/setup-go@v5 with: - go-version-file: "go.mod" + go-version-file: go.mod - - name: setup minikube - uses: manusa/actions-setup-minikube@v2.11.0 - with: - minikube version: v1.33.0 - kubernetes version: v1.30.0 - start args: --memory 6g --cpus=2 --addons ingress - github token: ${{ inputs.github-token }} + - name: Create a Kind cluster + run: make kind-create-cluster - - name: Build local spark-operator docker image for minikube testing + - name: Build and load image to Kind cluster run: | - docker build -t docker.io/kubeflow/spark-operator:local . - minikube image load docker.io/kubeflow/spark-operator:local - - # The integration tests are currently broken see: https://github.com/kubeflow/spark-operator/issues/1416 - # - name: Run chart-testing (integration test) - # run: make integration-test + make kind-load-image IMAGE_TAG=local - - name: Setup tmate session - if: failure() - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 15 + - name: Run e2e tests + run: make e2e-test diff --git a/.github/workflows/push-tag.yaml b/.github/workflows/push-tag.yaml deleted file mode 100644 index f9329f080..000000000 --- a/.github/workflows/push-tag.yaml +++ /dev/null @@ -1,44 +0,0 @@ -name: Push Tag on VERSION change - -on: - push: - branches: - - master - - release-* - paths: - - VERSION - -jobs: - push_tag: - runs-on: ubuntu-latest - - steps: - - name: Checkout source code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Git - run: | - git config user.name "$GITHUB_ACTOR" - git config user.email "$GITHUB_ACTOR@users.noreply.github.com" - - - name: Read version from VERSION file - run: | - VERSION=$(cat VERSION) - echo "VERSION=$VERSION" >> $GITHUB_ENV - - - name: Check if tag exists - run: | - git fetch --tags - if git tag -l | grep -q "^${VERSION}$"; then - echo "TAG_EXISTS=true" >> $GITHUB_ENV - else - echo "TAG_EXISTS=false" >> $GITHUB_ENV - fi - - - name: Create and push tag - if: env.TAG_EXISTS == 'false' - run: | - git tag -a "$VERSION" -m "Release $VERSION" - git push origin "$VERSION" diff --git a/.github/workflows/release-docker.yaml b/.github/workflows/release-docker.yaml deleted file mode 100644 index 849a0e109..000000000 --- a/.github/workflows/release-docker.yaml +++ /dev/null @@ -1,120 +0,0 @@ -name: Release Docker images - -on: - push: - tags: - - v*.*.* - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -env: - IMAGE_REGISTRY: docker.io - IMAGE_REPOSITORY: kubeflow/spark-operator - -# Ref: https://docs.docker.com/build/ci/github-actions/multi-platform/#distribute-build-across-multiple-runners. -jobs: - build: - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - platform: - - linux/amd64 - - linux/arm64 - - steps: - - name: Prepare - run: | - platform=${{ matrix.platform }} - echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV - - - name: Checkout source code - uses: actions/checkout@v4 - - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }} - tags: | - type=ref,event=branch - type=semver,pattern={{version}} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to container registry - uses: docker/login-action@v3 - with: - registry: ${{ env.IMAGE_REGISTRY }} - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and push by digest - id: build - uses: docker/build-push-action@v6 - with: - platforms: ${{ matrix.platform }} - labels: ${{ steps.meta.outputs.labels }} - outputs: type=image,name=${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }},push-by-digest=true,name-canonical=true,push=true - - - name: Export digest - run: | - mkdir -p /tmp/digests - digest="${{ steps.build.outputs.digest }}" - touch "/tmp/digests/${digest#sha256:}" - - - name: Upload digest - uses: actions/upload-artifact@v4 - with: - name: digests-${{ env.PLATFORM_PAIR }} - path: /tmp/digests/* - if-no-files-found: error - retention-days: 1 - - merge: - runs-on: ubuntu-latest - needs: - - build - steps: - - name: Download digests - uses: actions/download-artifact@v4 - with: - path: /tmp/digests - pattern: digests-* - merge-multiple: true - - - name: Set up Docker buildx - uses: docker/setup-buildx-action@v3 - - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }} - tags: | - type=ref,event=branch - type=semver,pattern={{version}} - - - name: Login to container registry - uses: docker/login-action@v3 - with: - registry: ${{ env.IMAGE_REGISTRY }} - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Create manifest list and push - working-directory: /tmp/digests - run: | - docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ - $(printf '${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }}@sha256:%s ' *) - - - name: Inspect image - run: | - docker buildx imagetools inspect ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }}:${{ steps.meta.outputs.version }} diff --git a/.github/workflows/release-charts.yaml b/.github/workflows/release-helm-charts.yaml similarity index 55% rename from .github/workflows/release-charts.yaml rename to .github/workflows/release-helm-charts.yaml index 874696f09..f69884887 100644 --- a/.github/workflows/release-charts.yaml +++ b/.github/workflows/release-helm-charts.yaml @@ -2,17 +2,25 @@ name: Release Helm charts on: release: - types: [published] + types: + - published concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +env: + HELM_REGISTRY: ghcr.io + HELM_REPOSITORY: ${{ github.repository_owner }}/helm-charts + jobs: - build: + release_helm_charts: permissions: contents: write + packages: write + runs-on: ubuntu-latest + steps: - name: Checkout source code uses: actions/checkout@v4 @@ -27,10 +35,28 @@ jobs: with: version: v3.14.4 + - name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.HELM_REGISTRY }} + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Read version from VERSION file + run: | + VERSION=$(cat VERSION) + echo "VERSION=${VERSION}" >> $GITHUB_ENV + - name: Package Helm charts run: | for chart in $(ls charts); do - helm package charts/$chart + helm package charts/${chart} + done + + - name: Upload charts to GHCR + run: | + for pkg in $(ls *.tgz); do + helm push ${pkg} oci://${{ env.HELM_REGISTRY }}/${{ env.HELM_REPOSITORY }} done - name: Save packaged charts to temp directory @@ -44,7 +70,7 @@ jobs: ref: gh-pages fetch-depth: 0 - - name: Copy packages charts + - name: Copy packaged charts run: | cp /tmp/charts/*.tgz . @@ -52,7 +78,7 @@ jobs: env: CHART_URL: https://github.com/${{ github.repository }}/releases/download/${{ github.ref_name }} run: | - helm repo index --merge index.yaml --url $CHART_URL . + helm repo index --merge index.yaml --url ${CHART_URL} . git add index.yaml - git commit -s -m "Update index.yaml" || exit 0 + git commit -s -m "Add index for Spark operator chart ${VERSION}" || exit 0 git push diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ebd0e62a5..cd9f09a5b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,28 +1,244 @@ -name: Create draft release +name: Release on: push: - tags: - - v*.*.* + branches: + - release-* + paths: + - VERSION concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +env: + SEMVER_PATTERN: '^v([0-9]+)\.([0-9]+)\.([0-9]+)(-rc\.([0-9]+))?$' + IMAGE_REGISTRY: docker.io + IMAGE_REPOSITORY: kubeflow/spark-operator + jobs: - release: + check-release: + runs-on: ubuntu-latest + + steps: + - name: Checkout source code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check whether version matches semver pattern + run: | + VERSION=$(cat VERSION) + if [[ ${VERSION} =~ ${{ env.SEMVER_PATTERN }} ]]; then + echo "Version '${VERSION}' matches semver pattern." + else + echo "Version '${VERSION}' does not match semver pattern." + exit 1 + fi + echo "VERSION=${VERSION}" >> $GITHUB_ENV + + - name: Check whether chart version and appVersion matches version + run: | + VERSION=${VERSION#v} + CHART_VERSION=$(cat charts/spark-operator-chart/Chart.yaml | grep version | awk '{print $2}') + CHART_APP_VERSION=$(cat charts/spark-operator-chart/Chart.yaml | grep appVersion | awk '{print $2}') + if [[ ${CHART_VERSION} == ${VERSION} ]]; then + echo "Chart version '${CHART_VERSION}' matches version '${VERSION}'." + else + echo "Chart version '${CHART_VERSION}' does not match version '${VERSION}'." + exit 1 + fi + if [[ ${CHART_APP_VERSION} == ${VERSION} ]]; then + echo "Chart appVersion '${CHART_APP_VERSION}' matches version '${VERSION}'." + else + echo "Chart appVersion '${CHART_APP_VERSION}' does not match version '${VERSION}'." + exit 1 + fi + + - name: Check if tag exists + run: | + git fetch --tags + if git tag -l | grep -q "^${VERSION}$"; then + echo "Tag '${VERSION}' already exists." + exit 1 + else + echo "Tag '${VERSION}' does not exist." + fi + + build_images: + needs: + - check-release + + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout source code + uses: actions/checkout@v4 + + - name: Read version from VERSION file + run: | + VERSION=$(cat VERSION) + if [[ ! ${VERSION} =~ ${{ env.SEMVER_PATTERN }} ]]; then + echo "Version '${VERSION}' does not match semver pattern." + exit 1 + fi + echo "VERSION=${VERSION}" >> $GITHUB_ENV + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }} + tags: | + type=semver,pattern={{version}},value=${{ env.VERSION }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.IMAGE_REGISTRY }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 + with: + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + release_images: + needs: + - build_images + + runs-on: ubuntu-latest + + steps: + - name: Checkout source code + uses: actions/checkout@v4 + + - name: Read version from VERSION file + run: | + VERSION=$(cat VERSION) + echo "VERSION=${VERSION}" >> $GITHUB_ENV + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }} + tags: | + type=semver,pattern={{version}},value=${{ env.VERSION }} + + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-* + merge-multiple: true + + - name: Set up Docker buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.IMAGE_REGISTRY }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }}:${{ steps.meta.outputs.version }} + + push_tag: + needs: + - release_images + + runs-on: ubuntu-latest + + steps: + - name: Checkout source code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Configure Git + run: | + git config user.name "$GITHUB_ACTOR" + git config user.email "$GITHUB_ACTOR@users.noreply.github.com" + + - name: Read version from VERSION file + run: | + VERSION=$(cat VERSION) + echo "VERSION=${VERSION}" >> $GITHUB_ENV + + - name: Create and push tag + run: | + git tag -a "${VERSION}" -m "Spark Operator Official Release ${VERSION}" + git push origin "${VERSION}" + + draft_release: + needs: + - push_tag + permissions: contents: write + runs-on: ubuntu-latest + steps: - name: Checkout uses: actions/checkout@v4 - + - name: Configure Git run: | git config user.name "$GITHUB_ACTOR" git config user.email "$GITHUB_ACTOR@users.noreply.github.com" + - name: Read version from VERSION file + run: | + VERSION=$(cat VERSION) + echo "VERSION=${VERSION}" >> $GITHUB_ENV + - name: Set up Helm uses: azure/setup-helm@v4.2.0 with: @@ -31,17 +247,18 @@ jobs: - name: Package Helm charts run: | for chart in $(ls charts); do - helm package charts/$chart + helm package charts/${chart} done - + - name: Release id: release uses: softprops/action-gh-release@v2 with: token: ${{ secrets.GITHUB_TOKEN }} - draft: true - prerelease: ${{ contains(github.ref, 'rc') }} + name: "Spark Operator ${{ env.VERSION }}" + tag_name: ${{ env.VERSION }} + prerelease: ${{ contains(env.VERSION, 'rc') }} target_commitish: ${{ github.sha }} + draft: true files: | *.tgz - diff --git a/Dockerfile b/Dockerfile index 61815e195..5cd34b6c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,9 @@ # limitations under the License. # -ARG SPARK_IMAGE=spark:3.5.0 +ARG SPARK_IMAGE=spark:3.5.2 -FROM golang:1.22.5 AS builder +FROM golang:1.23.1 AS builder WORKDIR /workspace diff --git a/Makefile b/Makefile index 30ba67c7c..d1e824944 100644 --- a/Makefile +++ b/Makefile @@ -13,20 +13,20 @@ SHELL = /usr/bin/env bash -o pipefail .SHELLFLAGS = -ec # Version information. -VERSION=$(shell cat VERSION | sed "s/^v//") -BUILD_DATE = $(shell date -u +"%Y-%m-%dT%H:%M:%S%:z") -GIT_COMMIT = $(shell git rev-parse HEAD) -GIT_TAG = $(shell if [ -z "`git status --porcelain`" ]; then git describe --exact-match --tags HEAD 2>/dev/null; fi) -GIT_TREE_STATE = $(shell if [ -z "`git status --porcelain`" ]; then echo "clean" ; else echo "dirty"; fi) -GIT_SHA = $(shell git rev-parse --short HEAD || echo "HEAD") -GIT_VERSION = ${VERSION}-${GIT_SHA} - -REPO=github.com/kubeflow/spark-operator -SPARK_OPERATOR_GOPATH=/go/src/github.com/kubeflow/spark-operator -SPARK_OPERATOR_CHART_PATH=charts/spark-operator-chart -DEP_VERSION:=`grep DEP_VERSION= Dockerfile | awk -F\" '{print $$2}'` -BUILDER=`grep "FROM golang:" Dockerfile | awk '{print $$2}'` -UNAME:=`uname | tr '[:upper:]' '[:lower:]'` +VERSION ?= $(shell cat VERSION | sed "s/^v//") +BUILD_DATE := $(shell date -u +"%Y-%m-%dT%H:%M:%S%:z") +GIT_COMMIT := $(shell git rev-parse HEAD) +GIT_TAG := $(shell if [ -z "`git status --porcelain`" ]; then git describe --exact-match --tags HEAD 2>/dev/null; fi) +GIT_TREE_STATE := $(shell if [ -z "`git status --porcelain`" ]; then echo "clean" ; else echo "dirty"; fi) +GIT_SHA := $(shell git rev-parse --short HEAD || echo "HEAD") +GIT_VERSION := ${VERSION}+${GIT_SHA} + +REPO := github.com/kubeflow/spark-operator +SPARK_OPERATOR_GOPATH := /go/src/github.com/kubeflow/spark-operator +SPARK_OPERATOR_CHART_PATH := charts/spark-operator-chart +DEP_VERSION := `grep DEP_VERSION= Dockerfile | awk -F\" '{print $$2}'` +BUILDER := `grep "FROM golang:" Dockerfile | awk '{print $$2}'` +UNAME := `uname | tr '[:upper:]' '[:lower:]'` # CONTAINER_TOOL defines the container tool to be used for building images. # Be aware that the target commands are only tested with Docker which is @@ -45,8 +45,34 @@ KIND_CLUSTER_NAME ?= spark-operator KIND_CONFIG_FILE ?= charts/spark-operator-chart/ci/kind-config.yaml KIND_KUBE_CONFIG ?= $(HOME)/.kube/config +## Location to install binaries +LOCALBIN ?= $(shell pwd)/bin + +## Versions +KUSTOMIZE_VERSION ?= v5.4.1 +CONTROLLER_TOOLS_VERSION ?= v0.15.0 +KIND_VERSION ?= v0.23.0 +ENVTEST_VERSION ?= release-0.18 # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. -ENVTEST_K8S_VERSION = 1.29.3 +ENVTEST_K8S_VERSION ?= 1.29.3 +GOLANGCI_LINT_VERSION ?= v1.57.2 +GEN_CRD_API_REFERENCE_DOCS_VERSION ?= v0.3.0 +HELM_VERSION ?= v3.15.3 +HELM_UNITTEST_VERSION ?= 0.5.1 +HELM_DOCS_VERSION ?= v1.14.2 + +## Binaries +SPARK_OPERATOR ?= $(LOCALBIN)/spark-operator +SPARKCTL ?= $(LOCALBIN)/sparkctl +KUBECTL ?= kubectl +KUSTOMIZE ?= $(LOCALBIN)/kustomize-$(KUSTOMIZE_VERSION) +CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen-$(CONTROLLER_TOOLS_VERSION) +KIND ?= $(LOCALBIN)/kind-$(KIND_VERSION) +ENVTEST ?= $(LOCALBIN)/setup-envtest-$(ENVTEST_VERSION) +GOLANGCI_LINT ?= $(LOCALBIN)/golangci-lint-$(GOLANGCI_LINT_VERSION) +GEN_CRD_API_REFERENCE_DOCS ?= $(LOCALBIN)/gen-crd-api-reference-docs-$(GEN_CRD_API_REFERENCE_DOCS_VERSION) +HELM ?= $(LOCALBIN)/helm-$(HELM_VERSION) +HELM_DOCS ?= $(LOCALBIN)/helm-docs-$(HELM_DOCS_VERSION) ##@ General @@ -68,6 +94,12 @@ help: ## Display this help. .PHONY: version version: ## Print version information. @echo "Version: ${VERSION}" + @echo "Build Date: ${BUILD_DATE}" + @echo "Git Commit: ${GIT_COMMIT}" + @echo "Git Tag: ${GIT_TAG}" + @echo "Git Tree State: ${GIT_TREE_STATE}" + @echo "Git SHA: ${GIT_SHA}" + @echo "Git Version: ${GIT_VERSION}" ##@ Development @@ -83,8 +115,8 @@ generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and update-crd: manifests ## Update CRD files in the Helm chart. cp config/crd/bases/* charts/spark-operator-chart/crds/ -.PHONY: clean -clean: ## Clean up caches and output. +.PHONY: go-clean +go-clean: ## Clean up caches and output. @echo "cleaning up caches and output" go clean -cache -testcache -r -x 2>&1 >/dev/null -rm -rf _output @@ -128,43 +160,31 @@ e2e-test: envtest ## Run the e2e tests against a Kind k8s instance that is spun ##@ Build override LDFLAGS += \ - -X ${REPO}.version=v${VERSION} \ + -X ${REPO}.version=${GIT_VERSION} \ -X ${REPO}.buildDate=${BUILD_DATE} \ -X ${REPO}.gitCommit=${GIT_COMMIT} \ -X ${REPO}.gitTreeState=${GIT_TREE_STATE} \ -extldflags "-static" .PHONY: build-operator -build-operator: ## Build Spark operator - go build -o bin/spark-operator -ldflags '${LDFLAGS}' cmd/main.go +build-operator: ## Build Spark operator. + echo "Building spark-operator binary..." + go build -o $(SPARK_OPERATOR) -ldflags '${LDFLAGS}' cmd/main.go .PHONY: build-sparkctl build-sparkctl: ## Build sparkctl binary. - [ ! -f "sparkctl/sparkctl-darwin-amd64" ] || [ ! -f "sparkctl/sparkctl-linux-amd64" ] && \ - echo building using $(BUILDER) && \ - docker run -w $(SPARK_OPERATOR_GOPATH) \ - -v $$(pwd):$(SPARK_OPERATOR_GOPATH) $(BUILDER) sh -c \ - "apk add --no-cache bash git && \ - cd sparkctl && \ - bash build.sh" || true + echo "Building sparkctl binary..." + CGO_ENABLED=0 go build -o $(SPARKCTL) -buildvcs=false sparkctl/main.go .PHONY: install-sparkctl -install-sparkctl: | sparkctl/sparkctl-darwin-amd64 sparkctl/sparkctl-linux-amd64 ## Install sparkctl binary. - @if [ "$(UNAME)" = "linux" ]; then \ - echo "installing linux binary to /usr/local/bin/sparkctl"; \ - sudo cp sparkctl/sparkctl-linux-amd64 /usr/local/bin/sparkctl; \ - sudo chmod +x /usr/local/bin/sparkctl; \ - elif [ "$(UNAME)" = "darwin" ]; then \ - echo "installing macOS binary to /usr/local/bin/sparkctl"; \ - cp sparkctl/sparkctl-darwin-amd64 /usr/local/bin/sparkctl; \ - chmod +x /usr/local/bin/sparkctl; \ - else \ - echo "$(UNAME) not supported"; \ - fi +install-sparkctl: build-sparkctl ## Install sparkctl binary. + echo "Installing sparkctl binary to /usr/local/bin..."; \ + sudo cp $(SPARKCTL) /usr/local/bin -.PHONY: clean-sparkctl -clean-sparkctl: ## Clean sparkctl binary. - rm -f sparkctl/sparkctl-darwin-amd64 sparkctl/sparkctl-linux-amd64 +.PHONY: clean +clean: ## Clean spark-operator and sparktcl binaries. + rm -f $(SPARK_OPERATOR) + rm -f $(SPARKCTL) .PHONY: build-api-docs build-api-docs: gen-crd-api-reference-docs ## Build api documentaion. @@ -202,12 +222,12 @@ docker-buildx: ## Build and push docker image for the operator for cross-platfor ##@ Helm .PHONY: detect-crds-drift -detect-crds-drift: - diff -q charts/spark-operator-chart/crds config/crd/bases +detect-crds-drift: manifests ## Detect CRD drift. + diff -q $(SPARK_OPERATOR_CHART_PATH)/crds config/crd/bases .PHONY: helm-unittest helm-unittest: helm-unittest-plugin ## Run Helm chart unittests. - helm unittest charts/spark-operator-chart --strict --file "tests/**/*_test.yaml" + $(HELM) unittest $(SPARK_OPERATOR_CHART_PATH) --strict --file "tests/**/*_test.yaml" .PHONY: helm-lint helm-lint: ## Run Helm chart lint test. @@ -226,17 +246,16 @@ endif .PHONY: kind-create-cluster kind-create-cluster: kind ## Create a kind cluster for integration tests. if ! $(KIND) get clusters 2>/dev/null | grep -q "^$(KIND_CLUSTER_NAME)$$"; then \ - kind create cluster --name $(KIND_CLUSTER_NAME) --config $(KIND_CONFIG_FILE) --kubeconfig $(KIND_KUBE_CONFIG); \ + kind create cluster --name $(KIND_CLUSTER_NAME) --config $(KIND_CONFIG_FILE) --kubeconfig $(KIND_KUBE_CONFIG) --wait=1m; \ fi .PHONY: kind-load-image kind-load-image: kind-create-cluster docker-build ## Load the image into the kind cluster. - kind load docker-image --name $(KIND_CLUSTER_NAME) $(IMAGE) + $(KIND) load docker-image --name $(KIND_CLUSTER_NAME) $(IMAGE) .PHONY: kind-delete-custer kind-delete-custer: kind ## Delete the created kind cluster. - $(KIND) delete cluster --name $(KIND_CLUSTER_NAME) && \ - rm -f $(KIND_KUBE_CONFIG) + $(KIND) delete cluster --name $(KIND_CLUSTER_NAME) --kubeconfig $(KIND_KUBE_CONFIG) .PHONY: install install-crd: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. @@ -257,33 +276,9 @@ undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/. ##@ Dependencies -## Location to install dependencies to -LOCALBIN ?= $(shell pwd)/bin $(LOCALBIN): mkdir -p $(LOCALBIN) -## Tool Binaries -KUBECTL ?= kubectl -KUSTOMIZE ?= $(LOCALBIN)/kustomize-$(KUSTOMIZE_VERSION) -CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen-$(CONTROLLER_TOOLS_VERSION) -KIND ?= $(LOCALBIN)/kind-$(KIND_VERSION) -ENVTEST ?= $(LOCALBIN)/setup-envtest-$(ENVTEST_VERSION) -GOLANGCI_LINT = $(LOCALBIN)/golangci-lint-$(GOLANGCI_LINT_VERSION) -GEN_CRD_API_REFERENCE_DOCS ?= $(LOCALBIN)/gen-crd-api-reference-docs-$(GEN_CRD_API_REFERENCE_DOCS_VERSION) -HELM ?= helm -HELM_UNITTEST ?= unittest -HELM_DOCS ?= $(LOCALBIN)/helm-docs-$(HELM_DOCS_VERSION) - -## Tool Versions -KUSTOMIZE_VERSION ?= v5.4.1 -CONTROLLER_TOOLS_VERSION ?= v0.15.0 -KIND_VERSION ?= v0.23.0 -ENVTEST_VERSION ?= release-0.18 -GOLANGCI_LINT_VERSION ?= v1.57.2 -GEN_CRD_API_REFERENCE_DOCS_VERSION ?= v0.3.0 -HELM_UNITTEST_VERSION ?= 0.5.1 -HELM_DOCS_VERSION ?= v1.14.2 - .PHONY: kustomize kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. $(KUSTOMIZE): $(LOCALBIN) @@ -314,15 +309,21 @@ gen-crd-api-reference-docs: $(GEN_CRD_API_REFERENCE_DOCS) ## Download gen-crd-ap $(GEN_CRD_API_REFERENCE_DOCS): $(LOCALBIN) $(call go-install-tool,$(GEN_CRD_API_REFERENCE_DOCS),github.com/ahmetb/gen-crd-api-reference-docs,$(GEN_CRD_API_REFERENCE_DOCS_VERSION)) +.PHONY: helm +helm: $(HELM) ## Download helm locally if necessary. +$(HELM): $(LOCALBIN) + $(call go-install-tool,$(HELM),helm.sh/helm/v3/cmd/helm,$(HELM_VERSION)) + .PHONY: helm-unittest-plugin -helm-unittest-plugin: ## Download helm unittest plugin locally if necessary. - if [ -z "$(shell helm plugin list | grep unittest)" ]; then \ +helm-unittest-plugin: helm ## Download helm unittest plugin locally if necessary. + if [ -z "$(shell $(HELM) plugin list | grep unittest)" ]; then \ echo "Installing helm unittest plugin"; \ - helm plugin install https://github.com/helm-unittest/helm-unittest.git --version $(HELM_UNITTEST_VERSION); \ + $(HELM) plugin install https://github.com/helm-unittest/helm-unittest.git --version $(HELM_UNITTEST_VERSION); \ fi .PHONY: helm-docs-plugin -helm-docs-plugin: ## Download helm-docs plugin locally if necessary. +helm-docs-plugin: $(HELM_DOCS) ## Download helm-docs plugin locally if necessary. +$(HELM_DOCS): $(LOCALBIN) $(call go-install-tool,$(HELM_DOCS),github.com/norwoodj/helm-docs/cmd/helm-docs,$(HELM_DOCS_VERSION)) # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist diff --git a/api/v1beta2/sparkapplication_types.go b/api/v1beta2/sparkapplication_types.go index 4a9e13efb..c66868a35 100644 --- a/api/v1beta2/sparkapplication_types.go +++ b/api/v1beta2/sparkapplication_types.go @@ -536,6 +536,9 @@ type DriverSpec struct { // Ports settings for the pods, following the Kubernetes specifications. // +optional Ports []Port `json:"ports,omitempty"` + // PriorityClassName is the name of the PriorityClass for the driver pod. + // +optional + PriorityClassName *string `json:"priorityClassName,omitempty"` } // ExecutorSpec is specification of the executor. @@ -563,6 +566,9 @@ type ExecutorSpec struct { // Ports settings for the pods, following the Kubernetes specifications. // +optional Ports []Port `json:"ports,omitempty"` + // PriorityClassName is the name of the PriorityClass for the executor pod. + // +optional + PriorityClassName *string `json:"priorityClassName,omitempty"` } // NamePath is a pair of a name and a path to which the named objects should be mounted to. diff --git a/api/v1beta2/zz_generated.deepcopy.go b/api/v1beta2/zz_generated.deepcopy.go index c369db9e0..19ef00b0c 100644 --- a/api/v1beta2/zz_generated.deepcopy.go +++ b/api/v1beta2/zz_generated.deepcopy.go @@ -239,6 +239,11 @@ func (in *DriverSpec) DeepCopyInto(out *DriverSpec) { *out = make([]Port, len(*in)) copy(*out, *in) } + if in.PriorityClassName != nil { + in, out := &in.PriorityClassName, &out.PriorityClassName + *out = new(string) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverSpec. @@ -320,6 +325,11 @@ func (in *ExecutorSpec) DeepCopyInto(out *ExecutorSpec) { *out = make([]Port, len(*in)) copy(*out, *in) } + if in.PriorityClassName != nil { + in, out := &in.PriorityClassName, &out.PriorityClassName + *out = new(string) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExecutorSpec. diff --git a/charts/spark-operator-chart/README.md b/charts/spark-operator-chart/README.md index d07b315b3..cf0420edf 100644 --- a/charts/spark-operator-chart/README.md +++ b/charts/spark-operator-chart/README.md @@ -90,6 +90,8 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | controller.uiIngress.enable | bool | `false` | Specifies whether to create ingress for Spark web UI. `controller.uiService.enable` must be `true` to enable ingress. | | controller.uiIngress.urlFormat | string | `""` | Ingress URL format. Required if `controller.uiIngress.enable` is true. | | controller.batchScheduler.enable | bool | `false` | Specifies whether to enable batch scheduler for spark jobs scheduling. If enabled, users can specify batch scheduler name in spark application. | +| controller.batchScheduler.kubeSchedulerNames | list | `[]` | Specifies a list of kube-scheduler names for scheduling Spark pods. | +| controller.batchScheduler.default | string | `""` | Default batch scheduler to be used if not specified by the user. If specified, this value must be either "volcano" or "yunikorn". Specifying any other value will cause the controller to error on startup. | | controller.serviceAccount.create | bool | `true` | Specifies whether to create a service account for the controller. | | controller.serviceAccount.name | string | `""` | Optional name for the controller service account. | | controller.serviceAccount.annotations | object | `{}` | Extra annotations for the controller service account. | @@ -112,6 +114,10 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | controller.sidecars | list | `[]` | Sidecar containers for controller pods. | | controller.podDisruptionBudget.enable | bool | `false` | Specifies whether to create pod disruption budget for controller. Ref: [Specifying a Disruption Budget for your Application](https://kubernetes.io/docs/tasks/run-application/configure-pdb/) | | controller.podDisruptionBudget.minAvailable | int | `1` | The number of pods that must be available. Require `controller.replicas` to be greater than 1 | +| controller.pprof.enable | bool | `false` | Specifies whether to enable pprof. | +| controller.pprof.port | int | `6060` | Specifies pprof port. | +| controller.pprof.portName | string | `"pprof"` | Specifies pprof service port name. | +| webhook.enable | bool | `true` | Specifies whether to enable webhook. | | webhook.replicas | int | `1` | Number of replicas of webhook server. | | webhook.logLevel | string | `"info"` | Configure the verbosity of logging, can be one of `debug`, `info`, `error`. | | webhook.port | int | `9443` | Specifies webhook port. | diff --git a/charts/spark-operator-chart/ci/kind-config.yaml b/charts/spark-operator-chart/ci/kind-config.yaml new file mode 100644 index 000000000..4e8cae8d9 --- /dev/null +++ b/charts/spark-operator-chart/ci/kind-config.yaml @@ -0,0 +1,7 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + image: kindest/node:v1.29.2 + - role: worker + image: kindest/node:v1.29.2 diff --git a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml index 7f77e1bb9..f5318f036 100644 --- a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml +++ b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml @@ -3179,6 +3179,10 @@ spec: - protocol type: object type: array + priorityClassName: + description: PriorityClassName is the name of the PriorityClass + for the driver pod. + type: string schedulerName: description: SchedulerName specifies the scheduler that will be used for scheduling @@ -7946,6 +7950,10 @@ spec: - protocol type: object type: array + priorityClassName: + description: PriorityClassName is the name of the PriorityClass + for the executor pod. + type: string schedulerName: description: SchedulerName specifies the scheduler that will be used for scheduling diff --git a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml index afc07c253..c56effc2f 100644 --- a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml +++ b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml @@ -3127,6 +3127,10 @@ spec: - protocol type: object type: array + priorityClassName: + description: PriorityClassName is the name of the PriorityClass + for the driver pod. + type: string schedulerName: description: SchedulerName specifies the scheduler that will be used for scheduling @@ -7864,6 +7868,10 @@ spec: - protocol type: object type: array + priorityClassName: + description: PriorityClassName is the name of the PriorityClass + for the executor pod. + type: string schedulerName: description: SchedulerName specifies the scheduler that will be used for scheduling diff --git a/charts/spark-operator-chart/templates/controller/_helpers.tpl b/charts/spark-operator-chart/templates/controller/_helpers.tpl index e5b9457b2..f545f432f 100644 --- a/charts/spark-operator-chart/templates/controller/_helpers.tpl +++ b/charts/spark-operator-chart/templates/controller/_helpers.tpl @@ -48,6 +48,34 @@ Create the name of the service account to be used by the controller {{- end -}} {{- end -}} +{{/* +Create the name of the cluster role to be used by the controller +*/}} +{{- define "spark-operator.controller.clusterRoleName" -}} +{{ include "spark-operator.controller.name" . }} +{{- end }} + +{{/* +Create the name of the cluster role binding to be used by the controller +*/}} +{{- define "spark-operator.controller.clusterRoleBindingName" -}} +{{ include "spark-operator.controller.clusterRoleName" . }} +{{- end }} + +{{/* +Create the name of the role to be used by the controller +*/}} +{{- define "spark-operator.controller.roleName" -}} +{{ include "spark-operator.controller.name" . }} +{{- end }} + +{{/* +Create the name of the role binding to be used by the controller +*/}} +{{- define "spark-operator.controller.roleBindingName" -}} +{{ include "spark-operator.controller.roleName" . }} +{{- end }} + {{/* Create the name of the deployment to be used by controller */}} @@ -68,3 +96,95 @@ Create the name of the pod disruption budget to be used by controller {{- define "spark-operator.controller.podDisruptionBudgetName" -}} {{ include "spark-operator.controller.name" . }}-pdb {{- end -}} + +{{/* +Create the name of the service used by controller +*/}} +{{- define "spark-operator.controller.serviceName" -}} +{{ include "spark-operator.controller.name" . }}-svc +{{- end -}} + +{{/* +Create the role policy rules for the controller in every Spark job namespace +*/}} +{{- define "spark-operator.controller.policyRules" -}} +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - deletecollection +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - extensions + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - create + - delete +- apiGroups: + - sparkoperator.k8s.io + resources: + - sparkapplications + - scheduledsparkapplications + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - sparkoperator.k8s.io + resources: + - sparkapplications/status + - sparkapplications/finalizers + - scheduledsparkapplications/status + - scheduledsparkapplications/finalizers + verbs: + - get + - update + - patch +{{- if .Values.controller.batchScheduler.enable }} +{{/* required for the `volcano` batch scheduler */}} +- apiGroups: + - scheduling.incubator.k8s.io + - scheduling.sigs.dev + - scheduling.volcano.sh + resources: + - podgroups + verbs: + - "*" +{{- end }} +{{- end -}} diff --git a/charts/spark-operator-chart/templates/controller/deployment.yaml b/charts/spark-operator-chart/templates/controller/deployment.yaml index 02f9c2c90..4f9251f5a 100644 --- a/charts/spark-operator-chart/templates/controller/deployment.yaml +++ b/charts/spark-operator-chart/templates/controller/deployment.yaml @@ -21,9 +21,7 @@ metadata: labels: {{- include "spark-operator.controller.labels" . | nindent 4 }} spec: - {{- with .Values.controller.replicas }} - replicas: {{ . }} - {{- end }} + replicas: {{ .Values.controller.replicas }} selector: matchLabels: {{- include "spark-operator.controller.selectorLabels" . | nindent 6 }} @@ -59,8 +57,12 @@ spec: - --zap-log-level={{ . }} {{- end }} {{- with .Values.spark.jobNamespaces }} + {{- if has "" . }} + - --namespaces="" + {{- else }} - --namespaces={{ . | join "," }} {{- end }} + {{- end }} - --controller-threads={{ .Values.controller.workers }} {{- with .Values.controller.uiService.enable }} - --enable-ui-service=true @@ -70,8 +72,14 @@ spec: - --ingress-url-format={{ . }} {{- end }} {{- end }} - {{- with .Values.controller.batchScheduler.enable }} + {{- if .Values.controller.batchScheduler.enable }} - --enable-batch-scheduler=true + {{- with .Values.controller.batchScheduler.kubeSchedulerNames }} + - --kube-scheduler-names={{ . | join "," }} + {{- end }} + {{- with .Values.controller.batchScheduler.default }} + - --default-batch-scheduler={{ . }} + {{- end }} {{- end }} {{- if .Values.prometheus.metrics.enable }} - --enable-metrics=true @@ -83,11 +91,20 @@ spec: - --leader-election=true - --leader-election-lock-name={{ include "spark-operator.controller.leaderElectionName" . }} - --leader-election-lock-namespace={{ .Release.Namespace }} - {{- if .Values.prometheus.metrics.enable }} + {{- if .Values.controller.pprof.enable }} + - --pprof-bind-address=:{{ .Values.controller.pprof.port }} + {{- end }} + {{- if or .Values.prometheus.metrics.enable .Values.controller.pprof.enable }} ports: + {{- if .Values.controller.pprof.enable }} + - name: {{ .Values.controller.pprof.portName | quote }} + containerPort: {{ .Values.controller.pprof.port }} + {{- end }} + {{- if .Values.prometheus.metrics.enable }} - name: {{ .Values.prometheus.metrics.portName | quote }} containerPort: {{ .Values.prometheus.metrics.port }} {{- end }} + {{- end }} {{- with .Values.controller.env }} env: {{- toYaml . | nindent 8 }} diff --git a/charts/spark-operator-chart/templates/controller/rbac.yaml b/charts/spark-operator-chart/templates/controller/rbac.yaml index 472d0fcc7..5745af32a 100644 --- a/charts/spark-operator-chart/templates/controller/rbac.yaml +++ b/charts/spark-operator-chart/templates/controller/rbac.yaml @@ -18,7 +18,8 @@ limitations under the License. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: {{ include "spark-operator.controller.name" . }} + name: {{ include "spark-operator.controller.clusterRoleName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.controller.labels" . | nindent 4 }} {{- with .Values.controller.rbac.annotations }} @@ -26,50 +27,6 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} rules: -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch - - create - - update - - patch - - delete - - deletecollection -- apiGroups: - - "" - resources: - - configmaps - verbs: - - get - - create - - update - - patch - - delete -- apiGroups: - - "" - resources: - - services - verbs: - - get - - list - - watch - - create - - update - - patch - - delete -- apiGroups: - - extensions - - networking.k8s.io - resources: - - ingresses - verbs: - - get - - create - - delete - apiGroups: - "" resources: @@ -90,52 +47,16 @@ rules: - customresourcedefinitions verbs: - get -- apiGroups: - - sparkoperator.k8s.io - resources: - - sparkapplications - - scheduledsparkapplications - verbs: - - get - - list - - watch - - create - - update - - patch - - delete -- apiGroups: - - sparkoperator.k8s.io - resources: - - sparkapplications/status - - scheduledsparkapplications/status - verbs: - - get - - update - - patch -- apiGroups: - - sparkoperator.k8s.io - resources: - - sparkapplications/finalizers - - scheduledsparkapplications/finalizers - verbs: - - update -{{- if .Values.controller.batchScheduler.enable }} -{{/* required for the `volcano` batch scheduler */}} -- apiGroups: - - scheduling.incubator.k8s.io - - scheduling.sigs.dev - - scheduling.volcano.sh - resources: - - podgroups - verbs: - - "*" +{{- if not .Values.spark.jobNamespaces | or (has "" .Values.spark.jobNamespaces) }} +{{ include "spark-operator.controller.policyRules" . }} {{- end }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ include "spark-operator.controller.name" . }} + name: {{ include "spark-operator.controller.clusterRoleBindingName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.controller.labels" . | nindent 4 }} {{- with .Values.controller.rbac.annotations }} @@ -149,13 +70,14 @@ subjects: roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: {{ include "spark-operator.controller.name" . }} - + name: {{ include "spark-operator.controller.clusterRoleName" . }} --- + apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - name: {{ include "spark-operator.controller.name" . }} + name: {{ include "spark-operator.controller.roleName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.controller.labels" . | nindent 4 }} {{- with .Values.controller.rbac.annotations }} @@ -178,12 +100,16 @@ rules: verbs: - get - update - +{{- if has .Release.Namespace .Values.spark.jobNamespaces }} +{{ include "spark-operator.controller.policyRules" . }} +{{- end }} --- + apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: - name: {{ include "spark-operator.controller.name" . }} + name: {{ include "spark-operator.controller.roleBindingName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.controller.labels" . | nindent 4 }} {{- with .Values.controller.rbac.annotations }} @@ -197,5 +123,48 @@ subjects: roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: {{ include "spark-operator.controller.name" . }} + name: {{ include "spark-operator.controller.roleName" . }} + +{{- if and .Values.spark.jobNamespaces (not (has "" .Values.spark.jobNamespaces)) }} +{{- range $jobNamespace := .Values.spark.jobNamespaces }} +{{- if ne $jobNamespace $.Release.Namespace }} +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "spark-operator.controller.roleName" $ }} + namespace: {{ $jobNamespace }} + labels: + {{- include "spark-operator.controller.labels" $ | nindent 4 }} + {{- with $.Values.controller.rbac.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +rules: +{{ include "spark-operator.controller.policyRules" $ }} +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "spark-operator.controller.roleBindingName" $ }} + namespace: {{ $jobNamespace }} + labels: + {{- include "spark-operator.controller.labels" $ | nindent 4 }} + {{- with $.Values.controller.rbac.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +subjects: +- kind: ServiceAccount + name: {{ include "spark-operator.controller.serviceAccountName" $ }} + namespace: {{ $.Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "spark-operator.controller.roleName" $ }} +{{- end }} +{{- end }} +{{- end }} {{- end }} diff --git a/charts/spark-operator-chart/templates/controller/service.yaml b/charts/spark-operator-chart/templates/controller/service.yaml new file mode 100644 index 000000000..f779d2ad7 --- /dev/null +++ b/charts/spark-operator-chart/templates/controller/service.yaml @@ -0,0 +1,31 @@ +{{/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.controller.pprof.enable }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "spark-operator.controller.serviceName" . }} + labels: + {{- include "spark-operator.controller.labels" . | nindent 4 }} +spec: + selector: + {{- include "spark-operator.controller.selectorLabels" . | nindent 4 }} + ports: + - port: {{ .Values.controller.pprof.port }} + targetPort: {{ .Values.controller.pprof.portName | quote }} + name: {{ .Values.controller.pprof.portName }} +{{- end }} diff --git a/charts/spark-operator-chart/templates/controller/serviceaccount.yaml b/charts/spark-operator-chart/templates/controller/serviceaccount.yaml index 126e4245c..49c235869 100644 --- a/charts/spark-operator-chart/templates/controller/serviceaccount.yaml +++ b/charts/spark-operator-chart/templates/controller/serviceaccount.yaml @@ -19,6 +19,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: {{ include "spark-operator.controller.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.controller.labels" . | nindent 4 }} {{- with .Values.controller.serviceAccount.annotations }} diff --git a/charts/spark-operator-chart/templates/spark/_helpers.tpl b/charts/spark-operator-chart/templates/spark/_helpers.tpl index 150ae966f..c5fa346fe 100644 --- a/charts/spark-operator-chart/templates/spark/_helpers.tpl +++ b/charts/spark-operator-chart/templates/spark/_helpers.tpl @@ -36,12 +36,12 @@ Create the name of the service account to be used by spark applications Create the name of the role to be used by spark service account */}} {{- define "spark-operator.spark.roleName" -}} -{{- include "spark-operator.spark.name" . }} +{{- include "spark-operator.spark.serviceAccountName" . }} {{- end -}} {{/* Create the name of the role binding to be used by spark service account */}} {{- define "spark-operator.spark.roleBindingName" -}} -{{- include "spark-operator.spark.name" . }} +{{- include "spark-operator.spark.serviceAccountName" . }} {{- end -}} diff --git a/charts/spark-operator-chart/templates/spark/rbac.yaml b/charts/spark-operator-chart/templates/spark/rbac.yaml index e850b1e50..9e15d6dbb 100644 --- a/charts/spark-operator-chart/templates/spark/rbac.yaml +++ b/charts/spark-operator-chart/templates/spark/rbac.yaml @@ -16,7 +16,7 @@ limitations under the License. {{- if .Values.spark.rbac.create -}} {{- range $jobNamespace := .Values.spark.jobNamespaces | default list }} -{{- if $jobNamespace }} +{{- if ne $jobNamespace "" }} --- apiVersion: rbac.authorization.k8s.io/v1 @@ -67,7 +67,7 @@ subjects: roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: {{ include "spark-operator.spark.serviceAccountName" $ }} + name: {{ include "spark-operator.spark.roleName" $ }} {{- end }} {{- end }} {{- end }} diff --git a/charts/spark-operator-chart/templates/spark/serviceaccount.yaml b/charts/spark-operator-chart/templates/spark/serviceaccount.yaml index f05d8fae3..de24d801e 100644 --- a/charts/spark-operator-chart/templates/spark/serviceaccount.yaml +++ b/charts/spark-operator-chart/templates/spark/serviceaccount.yaml @@ -15,16 +15,19 @@ limitations under the License. */}} {{- if .Values.spark.serviceAccount.create }} -{{- range $sparkJobNamespace := .Values.spark.jobNamespaces | default list }} +{{- range $jobNamespace := .Values.spark.jobNamespaces | default list }} +{{- if ne $jobNamespace "" }} + --- apiVersion: v1 kind: ServiceAccount metadata: name: {{ include "spark-operator.spark.serviceAccountName" $ }} - namespace: {{ $sparkJobNamespace }} + namespace: {{ $jobNamespace }} labels: {{ include "spark-operator.labels" $ | nindent 4 }} {{- with $.Values.spark.serviceAccount.annotations }} annotations: {{ toYaml . | nindent 4 }} {{- end }} {{- end }} {{- end }} +{{- end }} diff --git a/charts/spark-operator-chart/templates/webhook/_helpers.tpl b/charts/spark-operator-chart/templates/webhook/_helpers.tpl index 71588123b..00ad4fcca 100644 --- a/charts/spark-operator-chart/templates/webhook/_helpers.tpl +++ b/charts/spark-operator-chart/templates/webhook/_helpers.tpl @@ -49,18 +49,32 @@ Create the name of service account to be used by webhook {{- end -}} {{/* -Create the name of the role to be used by webhook +Create the name of the cluster role to be used by the webhook +*/}} +{{- define "spark-operator.webhook.clusterRoleName" -}} +{{ include "spark-operator.webhook.name" . }} +{{- end }} + +{{/* +Create the name of the cluster role binding to be used by the webhook +*/}} +{{- define "spark-operator.webhook.clusterRoleBindingName" -}} +{{ include "spark-operator.webhook.clusterRoleName" . }} +{{- end }} + +{{/* +Create the name of the role to be used by the webhook */}} {{- define "spark-operator.webhook.roleName" -}} -{{- include "spark-operator.webhook.name" . }} -{{- end -}} +{{ include "spark-operator.webhook.name" . }} +{{- end }} {{/* -Create the name of the role binding to be used by webhook +Create the name of the role binding to be used by the webhook */}} {{- define "spark-operator.webhook.roleBindingName" -}} -{{- include "spark-operator.webhook.name" . }} -{{- end -}} +{{ include "spark-operator.webhook.roleName" . }} +{{- end }} {{/* Create the name of the secret to be used by webhook @@ -111,3 +125,42 @@ Create the name of the pod disruption budget to be used by webhook {{- define "spark-operator.webhook.podDisruptionBudgetName" -}} {{ include "spark-operator.webhook.name" . }}-pdb {{- end -}} + +{{/* +Create the role policy rules for the webhook in every Spark job namespace +*/}} +{{- define "spark-operator.webhook.policyRules" -}} +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - resourcequotas + verbs: + - get + - list + - watch +- apiGroups: + - sparkoperator.k8s.io + resources: + - sparkapplications + - sparkapplications/status + - sparkapplications/finalizers + - scheduledsparkapplications + - scheduledsparkapplications/status + - scheduledsparkapplications/finalizers + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +{{- end -}} \ No newline at end of file diff --git a/charts/spark-operator-chart/templates/webhook/deployment.yaml b/charts/spark-operator-chart/templates/webhook/deployment.yaml index 89b07e3df..ae5167a6e 100644 --- a/charts/spark-operator-chart/templates/webhook/deployment.yaml +++ b/charts/spark-operator-chart/templates/webhook/deployment.yaml @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- if .Values.webhook.enable }} apiVersion: apps/v1 kind: Deployment metadata: @@ -21,9 +22,7 @@ metadata: labels: {{- include "spark-operator.webhook.labels" . | nindent 4 }} spec: - {{- with .Values.webhook.replicas }} - replicas: {{ . }} - {{- end }} + replicas: {{ .Values.webhook.replicas }} selector: matchLabels: {{- include "spark-operator.webhook.selectorLabels" . | nindent 6 }} @@ -52,8 +51,12 @@ spec: - --zap-log-level={{ . }} {{- end }} {{- with .Values.spark.jobNamespaces }} + {{- if has "" . }} + - --namespaces="" + {{- else }} - --namespaces={{ . | join "," }} {{- end }} + {{- end }} - --webhook-secret-name={{ include "spark-operator.webhook.secretName" . }} - --webhook-secret-namespace={{ .Release.Namespace }} - --webhook-svc-name={{ include "spark-operator.webhook.serviceName" . }} @@ -153,3 +156,4 @@ spec: - {{ mergeOverwrite . $labelSelectorDict | toYaml | nindent 8 | trim }} {{- end }} {{- end }} +{{- end }} diff --git a/charts/spark-operator-chart/templates/webhook/mutatingwebhookconfiguration.yaml b/charts/spark-operator-chart/templates/webhook/mutatingwebhookconfiguration.yaml index f48a04320..2d6a10a7b 100644 --- a/charts/spark-operator-chart/templates/webhook/mutatingwebhookconfiguration.yaml +++ b/charts/spark-operator-chart/templates/webhook/mutatingwebhookconfiguration.yaml @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- if .Values.webhook.enable }} apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration metadata: @@ -33,16 +34,18 @@ webhooks: {{- with .Values.webhook.failurePolicy }} failurePolicy: {{ . }} {{- end }} - {{- if .Values.spark.jobNamespaces }} + {{- with .Values.spark.jobNamespaces }} + {{- if not (has "" .) }} namespaceSelector: matchExpressions: - key: kubernetes.io/metadata.name operator: In values: - {{- range .Values.spark.jobNamespaces }} - - {{ . }} + {{- range $jobNamespace := . }} + - {{ $jobNamespace }} {{- end }} {{- end }} + {{- end }} objectSelector: matchLabels: sparkoperator.k8s.io/launched-by-spark-operator: "true" @@ -66,16 +69,18 @@ webhooks: {{- with .Values.webhook.failurePolicy }} failurePolicy: {{ . }} {{- end }} - {{- if .Values.spark.jobNamespaces }} + {{- with .Values.spark.jobNamespaces }} + {{- if not (has "" .) }} namespaceSelector: matchExpressions: - key: kubernetes.io/metadata.name operator: In values: - {{- range .Values.spark.jobNamespaces }} - - {{ . }} + {{- range $jobNamespace := . }} + - {{ $jobNamespace }} {{- end }} {{- end }} + {{- end }} rules: - apiGroups: ["sparkoperator.k8s.io"] apiVersions: ["v1beta2"] @@ -96,16 +101,18 @@ webhooks: {{- with .Values.webhook.failurePolicy }} failurePolicy: {{ . }} {{- end }} - {{- if .Values.spark.jobNamespaces }} + {{- with .Values.spark.jobNamespaces }} + {{- if not (has "" .) }} namespaceSelector: matchExpressions: - key: kubernetes.io/metadata.name operator: In values: - {{- range .Values.spark.jobNamespaces }} - - {{ . }} + {{- range $jobNamespace := . }} + - {{ $jobNamespace }} {{- end }} {{- end }} + {{- end }} rules: - apiGroups: ["sparkoperator.k8s.io"] apiVersions: ["v1beta2"] @@ -114,3 +121,4 @@ webhooks: {{- with .Values.webhook.timeoutSeconds }} timeoutSeconds: {{ . }} {{- end }} +{{- end }} diff --git a/charts/spark-operator-chart/templates/webhook/poddisruptionbudget.yaml b/charts/spark-operator-chart/templates/webhook/poddisruptionbudget.yaml index 6de7e6ef5..5a6d91d8c 100644 --- a/charts/spark-operator-chart/templates/webhook/poddisruptionbudget.yaml +++ b/charts/spark-operator-chart/templates/webhook/poddisruptionbudget.yaml @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- if .Values.webhook.enable }} {{- if .Values.webhook.podDisruptionBudget.enable }} {{- if le (int .Values.webhook.replicas) 1 }} {{- fail "webhook.replicas must be greater than 1 to enable pod disruption budget for webhook" }} @@ -32,3 +33,4 @@ spec: minAvailable: {{ . }} {{- end }} {{- end }} +{{- end }} diff --git a/charts/spark-operator-chart/templates/webhook/rbac.yaml b/charts/spark-operator-chart/templates/webhook/rbac.yaml index b1c5d426f..1e48dae50 100644 --- a/charts/spark-operator-chart/templates/webhook/rbac.yaml +++ b/charts/spark-operator-chart/templates/webhook/rbac.yaml @@ -14,11 +14,13 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- if .Values.webhook.enable }} {{- if .Values.webhook.rbac.create }} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: {{ include "spark-operator.webhook.name" . }} + name: {{ include "spark-operator.webhook.clusterRoleName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.webhook.labels" . | nindent 4 }} {{- with .Values.webhook.rbac.annotations }} @@ -26,22 +28,6 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} rules: -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - resourcequotas - verbs: - - get - - list - - watch - apiGroups: - "" resources: @@ -68,29 +54,16 @@ rules: verbs: - get - update -- apiGroups: - - sparkoperator.k8s.io - resources: - - sparkapplications - - sparkapplications/status - - sparkapplications/finalizers - - scheduledsparkapplications - - scheduledsparkapplications/status - - scheduledsparkapplications/finalizers - verbs: - - get - - list - - watch - - create - - update - - patch - - delete +{{- if not .Values.spark.jobNamespaces | or (has "" .Values.spark.jobNamespaces) }} +{{ include "spark-operator.webhook.policyRules" . }} +{{- end }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ include "spark-operator.webhook.name" . }} + name: {{ include "spark-operator.webhook.clusterRoleBindingName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.webhook.labels" . | nindent 4 }} {{- with .Values.webhook.rbac.annotations }} @@ -104,13 +77,14 @@ subjects: roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: {{ include "spark-operator.webhook.name" . }} - + name: {{ include "spark-operator.webhook.clusterRoleName" . }} --- + apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - name: {{ include "spark-operator.webhook.name" . }} + name: {{ include "spark-operator.webhook.roleName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.webhook.labels" . | nindent 4 }} {{- with .Values.webhook.rbac.annotations }} @@ -148,12 +122,16 @@ rules: verbs: - get - update +{{- if has .Release.Namespace .Values.spark.jobNamespaces }} +{{ include "spark-operator.webhook.policyRules" . }} +{{- end }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: - name: {{ include "spark-operator.webhook.name" . }} + name: {{ include "spark-operator.webhook.roleBindingName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.webhook.labels" . | nindent 4 }} {{- with .Values.webhook.rbac.annotations }} @@ -167,5 +145,49 @@ subjects: roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: {{ include "spark-operator.webhook.name" . }} + name: {{ include "spark-operator.webhook.roleName" . }} + +{{- if and .Values.spark.jobNamespaces (not (has "" .Values.spark.jobNamespaces)) }} +{{- range $jobNamespace := .Values.spark.jobNamespaces }} +{{- if ne $jobNamespace $.Release.Namespace }} +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "spark-operator.webhook.roleName" $ }} + namespace: {{ $jobNamespace }} + labels: + {{- include "spark-operator.webhook.labels" $ | nindent 4 }} + {{- with $.Values.webhook.rbac.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +rules: +{{ include "spark-operator.webhook.policyRules" $ }} +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "spark-operator.webhook.roleBindingName" $ }} + namespace: {{ $jobNamespace }} + labels: + {{- include "spark-operator.webhook.labels" $ | nindent 4 }} + {{- with $.Values.webhook.rbac.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +subjects: +- kind: ServiceAccount + name: {{ include "spark-operator.webhook.serviceAccountName" $ }} + namespace: {{ $.Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "spark-operator.webhook.roleName" $ }} +{{- end }} +{{- end }} +{{- end }} +{{- end }} {{- end }} diff --git a/charts/spark-operator-chart/templates/webhook/service.yaml b/charts/spark-operator-chart/templates/webhook/service.yaml index 45064a807..51695b8c3 100644 --- a/charts/spark-operator-chart/templates/webhook/service.yaml +++ b/charts/spark-operator-chart/templates/webhook/service.yaml @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- if .Values.webhook.enable }} apiVersion: v1 kind: Service metadata: @@ -27,3 +28,4 @@ spec: - port: {{ .Values.webhook.port }} targetPort: {{ .Values.webhook.portName | quote }} name: {{ .Values.webhook.portName }} +{{- end }} diff --git a/charts/spark-operator-chart/templates/webhook/serviceaccount.yaml b/charts/spark-operator-chart/templates/webhook/serviceaccount.yaml index 77944b83c..fea4a6bbe 100644 --- a/charts/spark-operator-chart/templates/webhook/serviceaccount.yaml +++ b/charts/spark-operator-chart/templates/webhook/serviceaccount.yaml @@ -14,11 +14,13 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- if .Values.webhook.enable }} {{- if .Values.webhook.serviceAccount.create -}} apiVersion: v1 kind: ServiceAccount metadata: name: {{ include "spark-operator.webhook.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} labels: {{- include "spark-operator.webhook.labels" . | nindent 4 }} {{- with .Values.webhook.serviceAccount.annotations }} @@ -26,3 +28,4 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} {{- end }} +{{- end }} diff --git a/charts/spark-operator-chart/templates/webhook/validatingwebhookconfiguration.yaml b/charts/spark-operator-chart/templates/webhook/validatingwebhookconfiguration.yaml index 3fbf55184..8cd3b11f4 100644 --- a/charts/spark-operator-chart/templates/webhook/validatingwebhookconfiguration.yaml +++ b/charts/spark-operator-chart/templates/webhook/validatingwebhookconfiguration.yaml @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- if .Values.webhook.enable }} apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: @@ -33,16 +34,18 @@ webhooks: {{- with .Values.webhook.failurePolicy }} failurePolicy: {{ . }} {{- end }} - {{- if .Values.spark.jobNamespaces }} + {{- with .Values.spark.jobNamespaces }} + {{- if not (has "" .) }} namespaceSelector: matchExpressions: - key: kubernetes.io/metadata.name operator: In values: - {{- range .Values.spark.jobNamespaces }} - - {{ . }} + {{- range $jobNamespace := . }} + - {{ $jobNamespace }} {{- end }} {{- end }} + {{- end }} rules: - apiGroups: ["sparkoperator.k8s.io"] apiVersions: ["v1beta2"] @@ -63,16 +66,18 @@ webhooks: {{- with .Values.webhook.failurePolicy }} failurePolicy: {{ . }} {{- end }} - {{- if .Values.spark.jobNamespaces }} + {{- with .Values.spark.jobNamespaces }} + {{- if not (has "" .) }} namespaceSelector: matchExpressions: - key: kubernetes.io/metadata.name operator: In values: - {{- range .Values.spark.jobNamespaces }} - - {{ . }} + {{- range $jobNamespace := . }} + - {{ $jobNamespace }} {{- end }} {{- end }} + {{- end }} rules: - apiGroups: ["sparkoperator.k8s.io"] apiVersions: ["v1beta2"] @@ -81,3 +86,4 @@ webhooks: {{- with .Values.webhook.timeoutSeconds }} timeoutSeconds: {{ . }} {{- end }} +{{- end }} diff --git a/charts/spark-operator-chart/tests/controller/deployment_test.yaml b/charts/spark-operator-chart/tests/controller/deployment_test.yaml index e4b6983a7..57b557757 100644 --- a/charts/spark-operator-chart/tests/controller/deployment_test.yaml +++ b/charts/spark-operator-chart/tests/controller/deployment_test.yaml @@ -53,6 +53,15 @@ tests: path: spec.replicas value: 10 + - it: Should set replicas if `controller.replicas` is set + set: + controller: + replicas: 0 + asserts: + - equal: + path: spec.replicas + value: 0 + - it: Should add pod labels if `controller.labels` is set set: controller: @@ -110,14 +119,26 @@ tests: - it: Should contain `--namespaces` arg if `spark.jobNamespaces` is set set: - spark.jobNamespaces: - - ns1 - - ns2 + spark: + jobNamespaces: + - ns1 + - ns2 asserts: - contains: path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args content: --namespaces=ns1,ns2 + - it: Should set namespaces to all namespaces (`""`) if `spark.jobNamespaces` contains empty string + set: + spark: + jobNamespaces: + - "" + - default + asserts: + - contains: + path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args + content: --namespaces="" + - it: Should contain `--controller-threads` arg if `controller.workers` is set set: controller: @@ -160,6 +181,17 @@ tests: path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args content: --enable-batch-scheduler=true + - it: Should contain `--default-batch-scheduler` arg if `controller.batchScheduler.default` is set + set: + controller: + batchScheduler: + enable: true + default: yunikorn + asserts: + - contains: + path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args + content: --default-batch-scheduler=yunikorn + - it: Should contain `--enable-metrics` arg if `prometheus.metrics.enable` is set to `true` set: prometheus: @@ -535,3 +567,29 @@ tests: asserts: - failedTemplate: errorMessage: "controller.replicas must be greater than 1 to enable topology spread constraints for controller pods" + + - it: Should contain `--pprof-bind-address` arg if `controller.pprof.enable` is set to `true` + set: + controller: + pprof: + enable: true + port: 12345 + asserts: + - contains: + path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args + content: --pprof-bind-address=:12345 + + - it: Should add pprof ports if `controller.pprof.enable` is set to `true` + set: + controller: + pprof: + enable: true + port: 12345 + portName: pprof-test + asserts: + - contains: + path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].ports + content: + name: pprof-test + containerPort: 12345 + count: 1 \ No newline at end of file diff --git a/charts/spark-operator-chart/tests/controller/rbac_test.yaml b/charts/spark-operator-chart/tests/controller/rbac_test.yaml index 4a910adcb..bd0609499 100644 --- a/charts/spark-operator-chart/tests/controller/rbac_test.yaml +++ b/charts/spark-operator-chart/tests/controller/rbac_test.yaml @@ -69,7 +69,6 @@ tests: annotations: key1: value1 key2: value2 - documentIndex: 0 asserts: - equal: path: metadata.annotations.key1 @@ -77,3 +76,90 @@ tests: - equal: path: metadata.annotations.key2 value: value2 + + - it: Should create role and rolebinding for controller in release namespace + documentIndex: 2 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + name: spark-operator-controller + namespace: spark-operator + + - it: Should create role and rolebinding for controller in release namespace + documentIndex: 3 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + name: spark-operator-controller + namespace: spark-operator + - contains: + path: subjects + content: + kind: ServiceAccount + name: spark-operator-controller + namespace: spark-operator + count: 1 + - equal: + path: roleRef + value: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark-operator-controller + + - it: Should create roles and rolebindings for controller in every spark job namespace if `spark.jobNamespaces` is set and does not contain empty string + set: + spark: + jobNamespaces: + - default + - spark + documentIndex: 4 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + name: spark-operator-controller + namespace: default + + - it: Should create roles and rolebindings for controller in every spark job namespace if `spark.jobNamespaces` is set and does not contain empty string + set: + spark: + jobNamespaces: + - default + - spark + documentIndex: 5 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + name: spark-operator-controller + namespace: default + + - it: Should create roles and rolebindings for controller in every spark job namespace if `spark.jobNamespaces` is set and does not contain empty string + set: + spark: + jobNamespaces: + - default + - spark + documentIndex: 6 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + name: spark-operator-controller + namespace: spark + + - it: Should create roles and rolebindings for controller in every spark job namespace if `spark.jobNamespaces` is set and does not contain empty string + set: + spark: + jobNamespaces: + - default + - spark + documentIndex: 7 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + name: spark-operator-controller + namespace: spark diff --git a/charts/spark-operator-chart/tests/controller/service_test.yaml b/charts/spark-operator-chart/tests/controller/service_test.yaml new file mode 100644 index 000000000..c61062f91 --- /dev/null +++ b/charts/spark-operator-chart/tests/controller/service_test.yaml @@ -0,0 +1,44 @@ +# +# Copyright 2024 The Kubeflow authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +suite: Test controller deployment + +templates: + - controller/service.yaml + +release: + name: spark-operator + namespace: spark-operator + +tests: + - it: Should create the pprof service correctly + set: + controller: + pprof: + enable: true + port: 12345 + portName: pprof-test + asserts: + - containsDocument: + apiVersion: v1 + kind: Service + name: spark-operator-controller-svc + - equal: + path: spec.ports[0] + value: + port: 12345 + targetPort: pprof-test + name: pprof-test \ No newline at end of file diff --git a/charts/spark-operator-chart/tests/spark/rbac_test.yaml b/charts/spark-operator-chart/tests/spark/rbac_test.yaml index 2de678b54..0cebd06c6 100644 --- a/charts/spark-operator-chart/tests/spark/rbac_test.yaml +++ b/charts/spark-operator-chart/tests/spark/rbac_test.yaml @@ -14,7 +14,7 @@ # limitations under the License. # -suite: Test spark rbac +suite: Test Spark RBAC templates: - spark/rbac.yaml @@ -24,7 +24,7 @@ release: namespace: spark-operator tests: - - it: Should not create spark RBAC resources if `spark.rbac.create` is false + - it: Should not create RBAC resources for Spark if `spark.rbac.create` is false set: spark: rbac: @@ -33,91 +33,150 @@ tests: - hasDocuments: count: 0 - - it: Should create spark role by default + - it: Should create RBAC resources for Spark in namespace `default` by default documentIndex: 0 asserts: - containsDocument: apiVersion: rbac.authorization.k8s.io/v1 kind: Role name: spark-operator-spark + namespace: default - - it: Should create spark role binding by default - set: - rbac: - spark: - create: true + - it: Should create RBAC resources for Spark in namespace `default` by default documentIndex: 1 asserts: - containsDocument: apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding name: spark-operator-spark + namespace: default + - contains: + path: subjects + content: + kind: ServiceAccount + name: spark-operator-spark + namespace: default + - equal: + path: roleRef + value: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark-operator-spark - - it: Should create a single spark role with namespace "" by default + - it: Should create RBAC resources for Spark in every Spark job namespace + set: + spark: + jobNamespaces: + - ns1 + - ns2 documentIndex: 0 asserts: - containsDocument: apiVersion: rbac.authorization.k8s.io/v1 kind: Role name: spark-operator-spark + namespace: ns1 - - it: Should create a single spark role binding with namespace "" by default + - it: Should create RBAC resources for Spark in every Spark job namespace + set: + spark: + jobNamespaces: + - ns1 + - ns2 documentIndex: 1 asserts: - containsDocument: apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding name: spark-operator-spark - namespace: "" + namespace: ns1 + - contains: + path: subjects + content: + kind: ServiceAccount + name: spark-operator-spark + namespace: ns1 + - equal: + path: roleRef + value: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark-operator-spark - - it: Should create multiple spark roles if `spark.jobNamespaces` is set with multiple values + - it: Should create RBAC resources for Spark in every Spark job namespace set: - spark.jobNamespaces: - - ns1 - - ns2 - documentIndex: 0 + spark: + jobNamespaces: + - ns1 + - ns2 + documentIndex: 2 asserts: - containsDocument: apiVersion: rbac.authorization.k8s.io/v1 kind: Role name: spark-operator-spark - namespace: ns1 + namespace: ns2 - - it: Should create multiple spark role bindings if `spark.jobNamespaces` is set with multiple values + - it: Should create RBAC resources for Spark in every Spark job namespace set: - spark.jobNamespaces: - - ns1 - - ns2 - documentIndex: 1 + spark: + jobNamespaces: + - ns1 + - ns2 + documentIndex: 3 asserts: - containsDocument: apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding name: spark-operator-spark - namespace: ns1 + namespace: ns2 + - contains: + path: subjects + content: + kind: ServiceAccount + name: spark-operator-spark + namespace: ns2 + - equal: + path: roleRef + value: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark-operator-spark - - it: Should create multiple spark roles if `spark.jobNamespaces` is set with multiple values + - it: Should use the specified service account name if `spark.serviceAccount.name` is set set: - spark.jobNamespaces: - - ns1 - - ns2 - documentIndex: 2 + spark: + serviceAccount: + name: spark + documentIndex: 0 asserts: - containsDocument: apiVersion: rbac.authorization.k8s.io/v1 kind: Role - name: spark-operator-spark - namespace: ns2 + name: spark + namespace: default - - it: Should create multiple spark role bindings if `spark.jobNamespaces` is set with multiple values + - it: Should use the specified service account name if `spark.serviceAccount.name` is set set: - spark.jobNamespaces: - - ns1 - - ns2 - documentIndex: 3 + spark: + serviceAccount: + name: spark + documentIndex: 1 asserts: - containsDocument: apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding - name: spark-operator-spark - namespace: ns2 + name: spark + namespace: default + - contains: + path: subjects + content: + kind: ServiceAccount + name: spark + namespace: default + - equal: + path: roleRef + value: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark diff --git a/charts/spark-operator-chart/tests/spark/serviceaccount_test.yaml b/charts/spark-operator-chart/tests/spark/serviceaccount_test.yaml index a1f1898b4..e8b764a6f 100644 --- a/charts/spark-operator-chart/tests/spark/serviceaccount_test.yaml +++ b/charts/spark-operator-chart/tests/spark/serviceaccount_test.yaml @@ -66,59 +66,36 @@ tests: path: metadata.annotations.key2 value: value2 - - it: Should create multiple service accounts if `spark.jobNamespaces` is set + - it: Should create service account for every non-empty spark job namespace if `spark.jobNamespaces` is set with multiple values set: spark: - serviceAccount: - name: spark jobNamespaces: + - "" - ns1 - ns2 - - ns3 documentIndex: 0 asserts: - hasDocuments: - count: 3 + count: 2 - containsDocument: apiVersion: v1 kind: ServiceAccount - name: spark + name: spark-operator-spark namespace: ns1 - - it: Should create multiple service accounts if `spark.jobNamespaces` is set + - it: Should create service account for every non-empty spark job namespace if `spark.jobNamespaces` is set with multiple values set: spark: - serviceAccount: - name: spark jobNamespaces: + - "" - ns1 - ns2 - - ns3 documentIndex: 1 asserts: - hasDocuments: - count: 3 + count: 2 - containsDocument: apiVersion: v1 kind: ServiceAccount - name: spark + name: spark-operator-spark namespace: ns2 - - - it: Should create multiple service accounts if `spark.jobNamespaces` is set - set: - spark: - serviceAccount: - name: spark - jobNamespaces: - - ns1 - - ns2 - - ns3 - documentIndex: 2 - asserts: - - hasDocuments: - count: 3 - - containsDocument: - apiVersion: v1 - kind: ServiceAccount - name: spark - namespace: ns3 diff --git a/charts/spark-operator-chart/tests/webhook/deployment_test.yaml b/charts/spark-operator-chart/tests/webhook/deployment_test.yaml index 14c34f7a8..bf6bc03c8 100644 --- a/charts/spark-operator-chart/tests/webhook/deployment_test.yaml +++ b/charts/spark-operator-chart/tests/webhook/deployment_test.yaml @@ -31,6 +31,14 @@ tests: kind: Deployment name: spark-operator-webhook + - it: Should not create webhook deployment if `webhook.enable` is `false` + set: + webhook: + enable: false + asserts: + - hasDocuments: + count: 0 + - it: Should set replicas if `webhook.replicas` is set set: webhook: @@ -40,6 +48,15 @@ tests: path: spec.replicas value: 10 + - it: Should set replicas if `webhook.replicas` is set + set: + webhook: + replicas: 0 + asserts: + - equal: + path: spec.replicas + value: 0 + - it: Should add pod labels if `webhook.labels` is set set: webhook: @@ -107,6 +124,17 @@ tests: path: spec.template.spec.containers[?(@.name=="spark-operator-webhook")].args content: --namespaces=ns1,ns2 + - it: Should set namespaces to all namespaces (`""`) if `spark.jobNamespaces` contains empty string + set: + spark: + jobNamespaces: + - "" + - default + asserts: + - contains: + path: spec.template.spec.containers[?(@.name=="spark-operator-webhook")].args + content: --namespaces="" + - it: Should contain `--enable-metrics` arg if `prometheus.metrics.enable` is set to `true` set: prometheus: diff --git a/charts/spark-operator-chart/tests/webhook/mutatingwebhookconfiguration_test.yaml b/charts/spark-operator-chart/tests/webhook/mutatingwebhookconfiguration_test.yaml index 54273df18..d68a74d09 100644 --- a/charts/spark-operator-chart/tests/webhook/mutatingwebhookconfiguration_test.yaml +++ b/charts/spark-operator-chart/tests/webhook/mutatingwebhookconfiguration_test.yaml @@ -31,6 +31,14 @@ tests: kind: MutatingWebhookConfiguration name: spark-operator-webhook + - it: Should not create the mutating webhook configuration if `webhook.enable` is `false` + set: + webhook: + enable: false + asserts: + - hasDocuments: + count: 0 + - it: Should use the specified webhook port set: webhook: @@ -49,7 +57,7 @@ tests: path: webhooks[*].failurePolicy value: Fail - - it: Should set namespaceSelector if sparkJobNamespaces is not empty + - it: Should set namespaceSelector if `spark.jobNamespaces` is set with non-empty strings set: spark: jobNamespaces: @@ -68,6 +76,19 @@ tests: - ns2 - ns3 + - it: Should not set namespaceSelector if `spark.jobNamespaces` contains empty string + set: + spark: + jobNamespaces: + - "" + - ns1 + - ns2 + - ns3 + asserts: + - notExists: + path: webhooks[*].namespaceSelector + + - it: Should should use the specified timeoutSeconds set: webhook: diff --git a/charts/spark-operator-chart/tests/webhook/poddisruptionbudget_test.yaml b/charts/spark-operator-chart/tests/webhook/poddisruptionbudget_test.yaml index f45350dbb..e7295ff02 100644 --- a/charts/spark-operator-chart/tests/webhook/poddisruptionbudget_test.yaml +++ b/charts/spark-operator-chart/tests/webhook/poddisruptionbudget_test.yaml @@ -24,6 +24,14 @@ release: namespace: spark-operator tests: + - it: Should not render podDisruptionBudget if `webhook.enable` is `false` + set: + webhook: + enable: false + asserts: + - hasDocuments: + count: 0 + - it: Should not render podDisruptionBudget if `webhook.podDisruptionBudget.enable` is false set: webhook: @@ -40,7 +48,7 @@ tests: podDisruptionBudget: enable: true asserts: - - failedTemplate: + - failedTemplate: errorMessage: "webhook.replicas must be greater than 1 to enable pod disruption budget for webhook" - it: Should render spark operator podDisruptionBudget if `webhook.podDisruptionBudget.enable` is true diff --git a/charts/spark-operator-chart/tests/webhook/rbac_test.yaml b/charts/spark-operator-chart/tests/webhook/rbac_test.yaml new file mode 100644 index 000000000..bbcc42772 --- /dev/null +++ b/charts/spark-operator-chart/tests/webhook/rbac_test.yaml @@ -0,0 +1,165 @@ +# +# Copyright 2024 The Kubeflow authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +suite: Test webhook rbac + +templates: + - webhook/rbac.yaml + +release: + name: spark-operator + namespace: spark-operator + +tests: + - it: Should not create webhook RBAC resources if `webhook.rbac.create` is false + set: + webhook: + rbac: + create: false + asserts: + - hasDocuments: + count: 0 + + - it: Should create webhook ClusterRole by default + documentIndex: 0 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + name: spark-operator-webhook + + - it: Should create webhook ClusterRoleBinding by default + documentIndex: 1 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + name: spark-operator-webhook + - contains: + path: subjects + content: + kind: ServiceAccount + name: spark-operator-webhook + namespace: spark-operator + count: 1 + - equal: + path: roleRef + value: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: spark-operator-webhook + + - it: Should add extra annotations to webhook ClusterRole if `webhook.rbac.annotations` is set + set: + webhook: + rbac: + annotations: + key1: value1 + key2: value2 + asserts: + - equal: + path: metadata.annotations.key1 + value: value1 + - equal: + path: metadata.annotations.key2 + value: value2 + + - it: Should create role and rolebinding for webhook in release namespace + documentIndex: 2 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + name: spark-operator-webhook + namespace: spark-operator + + - it: Should create role and rolebinding for webhook in release namespace + documentIndex: 3 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + name: spark-operator-webhook + namespace: spark-operator + - contains: + path: subjects + content: + kind: ServiceAccount + name: spark-operator-webhook + namespace: spark-operator + count: 1 + - equal: + path: roleRef + value: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spark-operator-webhook + + - it: Should create roles and rolebindings for webhook in every spark job namespace if `spark.jobNamespaces` is set and does not contain empty string + set: + spark: + jobNamespaces: + - default + - spark + documentIndex: 4 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + name: spark-operator-webhook + namespace: default + + - it: Should create roles and rolebindings for webhook in every spark job namespace if `spark.jobNamespaces` is set and does not contain empty string + set: + spark: + jobNamespaces: + - default + - spark + documentIndex: 5 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + name: spark-operator-webhook + namespace: default + + - it: Should create roles and rolebindings for webhook in every spark job namespace if `spark.jobNamespaces` is set and does not contain empty string + set: + spark: + jobNamespaces: + - default + - spark + documentIndex: 6 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + name: spark-operator-webhook + namespace: spark + + - it: Should create roles and rolebindings for webhook in every spark job namespace if `spark.jobNamespaces` is set and does not contain empty string + set: + spark: + jobNamespaces: + - default + - spark + documentIndex: 7 + asserts: + - containsDocument: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + name: spark-operator-webhook + namespace: spark diff --git a/charts/spark-operator-chart/tests/webhook/service_test.yaml b/charts/spark-operator-chart/tests/webhook/service_test.yaml index c06631f97..6ef15726b 100644 --- a/charts/spark-operator-chart/tests/webhook/service_test.yaml +++ b/charts/spark-operator-chart/tests/webhook/service_test.yaml @@ -24,6 +24,14 @@ release: namespace: spark-operator tests: + - it: Should not create webhook service if `webhook.enable` is `false` + set: + webhook: + enable: false + asserts: + - hasDocuments: + count: 0 + - it: Should create the webhook service correctly set: webhook: diff --git a/charts/spark-operator-chart/tests/webhook/validatingwebhookconfiguration_test.yaml b/charts/spark-operator-chart/tests/webhook/validatingwebhookconfiguration_test.yaml index 9c7fa4daa..a252d7f4a 100644 --- a/charts/spark-operator-chart/tests/webhook/validatingwebhookconfiguration_test.yaml +++ b/charts/spark-operator-chart/tests/webhook/validatingwebhookconfiguration_test.yaml @@ -31,6 +31,14 @@ tests: kind: ValidatingWebhookConfiguration name: spark-operator-webhook + - it: Should not create the validating webhook configuration if `webhook.enable` is `false` + set: + webhook: + enable: false + asserts: + - hasDocuments: + count: 0 + - it: Should use the specified webhook port set: webhook: @@ -49,7 +57,7 @@ tests: path: webhooks[*].failurePolicy value: Fail - - it: Should set namespaceSelector if `spark.jobNamespaces` is not empty + - it: Should set namespaceSelector if `spark.jobNamespaces` is set with non-empty strings set: spark.jobNamespaces: - ns1 @@ -67,6 +75,18 @@ tests: - ns2 - ns3 + - it: Should not set namespaceSelector if `spark.jobNamespaces` contains empty string + set: + spark: + jobNamespaces: + - "" + - ns1 + - ns2 + - ns3 + asserts: + - notExists: + path: webhooks[*].namespaceSelector + - it: Should should use the specified timeoutSeconds set: webhook: diff --git a/charts/spark-operator-chart/values.yaml b/charts/spark-operator-chart/values.yaml index a5adbe477..94100dfb2 100644 --- a/charts/spark-operator-chart/values.yaml +++ b/charts/spark-operator-chart/values.yaml @@ -67,6 +67,13 @@ controller: # -- Specifies whether to enable batch scheduler for spark jobs scheduling. # If enabled, users can specify batch scheduler name in spark application. enable: false + # -- Specifies a list of kube-scheduler names for scheduling Spark pods. + kubeSchedulerNames: [] + # - default-scheduler + # -- Default batch scheduler to be used if not specified by the user. + # If specified, this value must be either "volcano" or "yunikorn". Specifying any other + # value will cause the controller to error on startup. + default: "" serviceAccount: # -- Specifies whether to create a service account for the controller. @@ -163,7 +170,18 @@ controller: # Require `controller.replicas` to be greater than 1 minAvailable: 1 + pprof: + # -- Specifies whether to enable pprof. + enable: false + # -- Specifies pprof port. + port: 6060 + # -- Specifies pprof service port name. + portName: pprof + webhook: + # -- Specifies whether to enable webhook. + enable: true + # -- Number of replicas of webhook server. replicas: 1 diff --git a/cmd/operator/controller/start.go b/cmd/operator/controller/start.go index 8fb54d7ea..38fee35c9 100644 --- a/cmd/operator/controller/start.go +++ b/cmd/operator/controller/start.go @@ -20,6 +20,7 @@ import ( "crypto/tls" "flag" "os" + "slices" "time" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) @@ -44,6 +45,7 @@ import ( logzap "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" ctrlwebhook "sigs.k8s.io/controller-runtime/pkg/webhook" + schedulingv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" sparkoperator "github.com/kubeflow/spark-operator" "github.com/kubeflow/spark-operator/api/v1beta1" @@ -52,7 +54,9 @@ import ( "github.com/kubeflow/spark-operator/internal/controller/sparkapplication" "github.com/kubeflow/spark-operator/internal/metrics" "github.com/kubeflow/spark-operator/internal/scheduler" + "github.com/kubeflow/spark-operator/internal/scheduler/kubescheduler" "github.com/kubeflow/spark-operator/internal/scheduler/volcano" + "github.com/kubeflow/spark-operator/internal/scheduler/yunikorn" "github.com/kubeflow/spark-operator/pkg/common" "github.com/kubeflow/spark-operator/pkg/util" // +kubebuilder:scaffold:imports @@ -71,7 +75,9 @@ var ( cacheSyncTimeout time.Duration // Batch scheduler - enableBatchScheduler bool + enableBatchScheduler bool + kubeSchedulerNames []string + defaultBatchScheduler string // Spark web UI service and ingress enableUIService bool @@ -95,6 +101,7 @@ var ( metricsJobStartLatencyBuckets []float64 healthProbeBindAddress string + pprofBindAddress string secureMetrics bool enableHTTP2 bool development bool @@ -103,6 +110,7 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(schedulingv1alpha1.AddToScheme(scheme)) utilruntime.Must(v1beta1.AddToScheme(scheme)) utilruntime.Must(v1beta2.AddToScheme(scheme)) @@ -123,10 +131,13 @@ func NewStartCommand() *cobra.Command { } command.Flags().IntVar(&controllerThreads, "controller-threads", 10, "Number of worker threads used by the SparkApplication controller.") - command.Flags().StringSliceVar(&namespaces, "namespaces", []string{}, "The Kubernetes namespace to manage. Will manage custom resource objects of the managed CRD types for the whole cluster if unset.") + command.Flags().StringSliceVar(&namespaces, "namespaces", []string{}, "The Kubernetes namespace to manage. Will manage custom resource objects of the managed CRD types for the whole cluster if unset or contains empty string.") command.Flags().DurationVar(&cacheSyncTimeout, "cache-sync-timeout", 30*time.Second, "Informer cache sync timeout.") command.Flags().BoolVar(&enableBatchScheduler, "enable-batch-scheduler", false, "Enable batch schedulers.") + command.Flags().StringSliceVar(&kubeSchedulerNames, "kube-scheduler-names", []string{}, "The kube-scheduler names for scheduling Spark applications.") + command.Flags().StringVar(&defaultBatchScheduler, "default-batch-scheduler", "", "Default batch scheduler.") + command.Flags().BoolVar(&enableUIService, "enable-ui-service", true, "Enable Spark Web UI service.") command.Flags().StringVar(&ingressClassName, "ingress-class-name", "", "Set ingressClassName for ingress resources created.") command.Flags().StringVar(&ingressURLFormat, "ingress-url-format", "", "Ingress URL format.") @@ -151,6 +162,9 @@ func NewStartCommand() *cobra.Command { command.Flags().BoolVar(&secureMetrics, "secure-metrics", false, "If set the metrics endpoint is served securely") command.Flags().BoolVar(&enableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers") + command.Flags().StringVar(&pprofBindAddress, "pprof-bind-address", "0", "The address the pprof endpoint binds to. "+ + "If not set, it will be 0 in order to disable the pprof server") + flagSet := flag.NewFlagSet("controller", flag.ExitOnError) ctrl.RegisterFlags(flagSet) zapOptions.BindFlags(flagSet) @@ -183,6 +197,7 @@ func start() { TLSOpts: tlsOptions, }), HealthProbeBindAddress: healthProbeBindAddress, + PprofBindAddress: pprofBindAddress, LeaderElection: enableLeaderElection, LeaderElectionID: leaderElectionLockName, LeaderElectionNamespace: leaderElectionLockNamespace, @@ -206,9 +221,19 @@ func start() { var registry *scheduler.Registry if enableBatchScheduler { registry = scheduler.GetRegistry() + _ = registry.Register(common.VolcanoSchedulerName, volcano.Factory) + _ = registry.Register(yunikorn.SchedulerName, yunikorn.Factory) + + // Register kube-schedulers. + for _, name := range kubeSchedulerNames { + registry.Register(name, kubescheduler.Factory) + } - // Register volcano scheduler. - registry.Register(common.VolcanoSchedulerName, volcano.Factory) + schedulerNames := registry.GetRegisteredSchedulerNames() + if defaultBatchScheduler != "" && !slices.Contains(schedulerNames, defaultBatchScheduler) { + logger.Error(nil, "Failed to find default batch scheduler in registered schedulers") + os.Exit(1) + } } // Setup controller for SparkApplication. @@ -300,9 +325,7 @@ func newTLSOptions() []func(c *tls.Config) { // newCacheOptions creates and returns a cache.Options instance configured with default namespaces and object caching settings. func newCacheOptions() cache.Options { defaultNamespaces := make(map[string]cache.Config) - if util.ContainsString(namespaces, cache.AllNamespaces) { - defaultNamespaces[cache.AllNamespaces] = cache.Config{} - } else { + if !util.ContainsString(namespaces, cache.AllNamespaces) { for _, ns := range namespaces { defaultNamespaces[ns] = cache.Config{} } @@ -350,9 +373,13 @@ func newSparkApplicationReconcilerOptions() sparkapplication.Options { EnableUIService: enableUIService, IngressClassName: ingressClassName, IngressURLFormat: ingressURLFormat, + DefaultBatchScheduler: defaultBatchScheduler, SparkApplicationMetrics: sparkApplicationMetrics, SparkExecutorMetrics: sparkExecutorMetrics, } + if enableBatchScheduler { + options.KubeSchedulerNames = kubeSchedulerNames + } return options } diff --git a/cmd/operator/webhook/start.go b/cmd/operator/webhook/start.go index 23ef7ae48..cc3997ca1 100644 --- a/cmd/operator/webhook/start.go +++ b/cmd/operator/webhook/start.go @@ -130,7 +130,7 @@ func NewStartCommand() *cobra.Command { } command.Flags().IntVar(&controllerThreads, "controller-threads", 10, "Number of worker threads used by the SparkApplication controller.") - command.Flags().StringSliceVar(&namespaces, "namespaces", []string{"default"}, "The Kubernetes namespace to manage. Will manage custom resource objects of the managed CRD types for the whole cluster if unset.") + command.Flags().StringSliceVar(&namespaces, "namespaces", []string{}, "The Kubernetes namespace to manage. Will manage custom resource objects of the managed CRD types for the whole cluster if unset or contains empty string.") command.Flags().StringVar(&labelSelectorFilter, "label-selector-filter", "", "A comma-separated list of key=value, or key labels to filter resources during watch and list based on the specified labels.") command.Flags().DurationVar(&cacheSyncTimeout, "cache-sync-timeout", 30*time.Second, "Informer cache sync timeout.") @@ -368,9 +368,7 @@ func newTLSOptions() []func(c *tls.Config) { // newCacheOptions creates and returns a cache.Options instance configured with default namespaces and object caching settings. func newCacheOptions() cache.Options { defaultNamespaces := make(map[string]cache.Config) - if util.ContainsString(namespaces, cache.AllNamespaces) { - defaultNamespaces[cache.AllNamespaces] = cache.Config{} - } else { + if !util.ContainsString(namespaces, cache.AllNamespaces) { for _, ns := range namespaces { defaultNamespaces[ns] = cache.Config{} } diff --git a/config/crd/bases/sparkoperator.k8s.io_scheduledsparkapplications.yaml b/config/crd/bases/sparkoperator.k8s.io_scheduledsparkapplications.yaml index 7f77e1bb9..f5318f036 100644 --- a/config/crd/bases/sparkoperator.k8s.io_scheduledsparkapplications.yaml +++ b/config/crd/bases/sparkoperator.k8s.io_scheduledsparkapplications.yaml @@ -3179,6 +3179,10 @@ spec: - protocol type: object type: array + priorityClassName: + description: PriorityClassName is the name of the PriorityClass + for the driver pod. + type: string schedulerName: description: SchedulerName specifies the scheduler that will be used for scheduling @@ -7946,6 +7950,10 @@ spec: - protocol type: object type: array + priorityClassName: + description: PriorityClassName is the name of the PriorityClass + for the executor pod. + type: string schedulerName: description: SchedulerName specifies the scheduler that will be used for scheduling diff --git a/config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml b/config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml index afc07c253..c56effc2f 100644 --- a/config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml +++ b/config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml @@ -3127,6 +3127,10 @@ spec: - protocol type: object type: array + priorityClassName: + description: PriorityClassName is the name of the PriorityClass + for the driver pod. + type: string schedulerName: description: SchedulerName specifies the scheduler that will be used for scheduling @@ -7864,6 +7868,10 @@ spec: - protocol type: object type: array + priorityClassName: + description: PriorityClassName is the name of the PriorityClass + for the executor pod. + type: string schedulerName: description: SchedulerName specifies the scheduler that will be used for scheduling diff --git a/config/samples/v1beta1_sparkapplication.yaml b/config/samples/v1beta1_sparkapplication.yaml index d6c3e25b4..a9b084bbd 100644 --- a/config/samples/v1beta1_sparkapplication.yaml +++ b/config/samples/v1beta1_sparkapplication.yaml @@ -8,16 +8,16 @@ metadata: spec: type: Scala mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar - sparkVersion: 3.5.0 + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 driver: labels: - version: 3.5.0 + version: 3.5.2 serviceAccount: spark-operator-spark executor: labels: - version: 3.5.0 + version: 3.5.2 instances: 1 diff --git a/config/samples/v1beta2_scheduledsparkapplication.yaml b/config/samples/v1beta2_scheduledsparkapplication.yaml index 294430f57..e6850801c 100644 --- a/config/samples/v1beta2_scheduledsparkapplication.yaml +++ b/config/samples/v1beta2_scheduledsparkapplication.yaml @@ -11,23 +11,23 @@ spec: template: type: Scala mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar - sparkVersion: 3.5.0 + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 restartPolicy: type: Never driver: labels: - version: 3.5.0 + version: 3.5.2 cores: 1 coreLimit: 1200m memory: 512m serviceAccount: spark-operator-spark executor: labels: - version: 3.5.0 + version: 3.5.2 instances: 1 cores: 1 coreLimit: 1200m diff --git a/config/samples/v1beta2_sparkapplication.yaml b/config/samples/v1beta2_sparkapplication.yaml index 70f4152b9..f534a07e7 100644 --- a/config/samples/v1beta2_sparkapplication.yaml +++ b/config/samples/v1beta2_sparkapplication.yaml @@ -8,16 +8,16 @@ metadata: spec: type: Scala mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar - sparkVersion: 3.5.0 + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 driver: labels: - version: 3.5.0 + version: 3.5.2 serviceAccount: spark-operator-spark executor: labels: - version: 3.5.0 + version: 3.5.2 instances: 1 diff --git a/docs/api-docs.md b/docs/api-docs.md index 6117b6a23..fa1c5a39e 100644 --- a/docs/api-docs.md +++ b/docs/api-docs.md @@ -625,6 +625,18 @@ executors to connect to the driver.

Ports settings for the pods, following the Kubernetes specifications.

+ + +priorityClassName
+ +string + + + +(Optional) +

PriorityClassName is the name of the PriorityClass for the driver pod.

+ +

DriverState @@ -840,6 +852,18 @@ Maps to spark.kubernetes.executor.deleteOnTermination that is avail

Ports settings for the pods, following the Kubernetes specifications.

+ + +priorityClassName
+ +string + + + +(Optional) +

PriorityClassName is the name of the PriorityClass for the executor pod.

+ +

ExecutorState diff --git a/docs/release.md b/docs/release.md index d3a385b60..35c313ce7 100644 --- a/docs/release.md +++ b/docs/release.md @@ -71,28 +71,42 @@ If you want to push changes to the `release-X.Y` release branch, you have to che ```bash # Get version and remove the leading 'v' VERSION=$(cat VERSION | sed "s/^v//") + + # Change the version and appVersion in Chart.yaml + # On Linux sed -i "s/^version.*/version: ${VERSION}/" charts/spark-operator-chart/Chart.yaml sed -i "s/^appVersion.*/appVersion: ${VERSION}/" charts/spark-operator-chart/Chart.yaml + + # On MacOS + sed -i '' "s/^version.*/version: ${VERSION}/" charts/spark-operator-chart/Chart.yaml + sed -i '' "s/^appVersion.*/appVersion: ${VERSION}/" charts/spark-operator-chart/Chart.yaml + ``` + +3. Update the Helm chart README: + + ```bash + make helm-docs ``` -3. Commit the changes: +4. Commit the changes: ```bash git add VERSION git add charts/spark-operator-chart/Chart.yaml - git commit -s -m "Release $VERSION" - git push + git add charts/spark-operator-chart/README.md + git commit -s -m "Spark Operator Official Release v${VERSION}" + git push origin release-X.Y ``` -4. Submit a PR to the release branch. After the PR is merged, a new tag will be automatically created if the `VERSION` file has changed. +5. Submit a PR to the release branch. ### Release Spark Operator Image -After a pre-release/release tag is pushed, a release workflow will be triggered to build and push Spark operator docker image to Docker Hub. +After `VERSION` file is modified and pushed to the release branch, a release workflow will be triggered to build and push Spark operator docker images to Docker Hub. ### Publish release -After a pre-release/release tag is pushed, a release workflow will be triggered to create a new draft release. +After `VERSION` file is modified and pushed to the release branch, a release workflow will be triggered to create a new draft release with the Spark operator Helm chart packaged as an artifact. After modifying the release notes, then publish the release. ### Release Spark Operator Helm Chart diff --git a/examples/spark-pi-configmap.yaml b/examples/spark-pi-configmap.yaml index e2b4bc556..ddb812412 100644 --- a/examples/spark-pi-configmap.yaml +++ b/examples/spark-pi-configmap.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar - sparkVersion: 3.5.0 + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 restartPolicy: type: Never volumes: @@ -33,18 +33,13 @@ spec: configMap: name: test-configmap driver: - labels: - version: 3.5.0 cores: 1 - coreLimit: 1200m memory: 512m - serviceAccount: spark-operator-spark volumeMounts: - name: config-vol mountPath: /opt/spark/config + serviceAccount: spark-operator-spark executor: - labels: - version: 3.5.0 instances: 1 cores: 1 memory: 512m diff --git a/examples/spark-pi-custom-resource.yaml b/examples/spark-pi-custom-resource.yaml index 83df405e1..1880cacfe 100644 --- a/examples/spark-pi-custom-resource.yaml +++ b/examples/spark-pi-custom-resource.yaml @@ -21,34 +21,20 @@ metadata: spec: type: Scala mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar - sparkVersion: 3.5.0 + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 restartPolicy: type: Never - volumes: - - name: test-volume - hostPath: - path: /tmp - type: Directory driver: - labels: - version: 3.5.0 - cores: 1 - coreLimit: 1200m + coreRequest: "0.5" + coreLimit: 800m memory: 512m serviceAccount: spark-operator-spark - volumeMounts: - - name: test-volume - mountPath: /tmp executor: - labels: - version: 3.5.0 instances: 1 - cores: 1 + coreRequest: "1200m" + coreLimit: 1500m memory: 512m - volumeMounts: - - name: test-volume - mountPath: /tmp diff --git a/examples/spark-pi-dynamic-allocation.yaml b/examples/spark-pi-dynamic-allocation.yaml index 800313914..c2480cb5a 100644 --- a/examples/spark-pi-dynamic-allocation.yaml +++ b/examples/spark-pi-dynamic-allocation.yaml @@ -21,26 +21,18 @@ metadata: spec: type: Scala mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar - sparkVersion: 3.5.0 - arguments: - - "50000" + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 driver: - labels: - version: 3.5.0 cores: 1 - coreLimit: 1200m memory: 512m serviceAccount: spark-operator-spark executor: - labels: - version: 3.5.0 instances: 1 cores: 1 - coreLimit: 1200m memory: 512m dynamicAllocation: enabled: true diff --git a/examples/spark-pi-kube-scheduler.yaml b/examples/spark-pi-kube-scheduler.yaml new file mode 100644 index 000000000..c30b6734e --- /dev/null +++ b/examples/spark-pi-kube-scheduler.yaml @@ -0,0 +1,37 @@ +# +# Copyright 2024 The Kubeflow authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: sparkoperator.k8s.io/v1beta2 +kind: SparkApplication +metadata: + name: spark-pi-kube-scheduler + namespace: default +spec: + type: Scala + mode: cluster + image: spark:3.5.2 + imagePullPolicy: IfNotPresent + mainClass: org.apache.spark.examples.SparkPi + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 + driver: + cores: 1 + memory: 512m + serviceAccount: spark-operator-spark + executor: + instances: 2 + cores: 1 + memory: 512m + batchScheduler: kube-scheduler diff --git a/examples/spark-pi-prometheus.yaml b/examples/spark-pi-prometheus.yaml index 29a447061..416cf527c 100644 --- a/examples/spark-pi-prometheus.yaml +++ b/examples/spark-pi-prometheus.yaml @@ -22,28 +22,27 @@ metadata: spec: type: Scala mode: cluster - image: gcr.io/spark-operator/spark:v3.1.1-gcs-prometheus + image: {IMAGE_REGISTRY}/{IMAGE_REPOSITORY}/spark:3.5.2-gcs-prometheus imagePullPolicy: Always mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.1.1.jar + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar arguments: - "100000" - sparkVersion: 3.1.1 + sparkVersion: 3.5.2 restartPolicy: type: Never driver: cores: 1 - coreLimit: 1200m memory: 512m labels: - version: 3.1.1 + version: 3.5.2 serviceAccount: spark-operator-spark executor: cores: 1 instances: 1 memory: 512m labels: - version: 3.1.1 + version: 3.5.2 monitoring: exposeDriverMetrics: true exposeExecutorMetrics: true diff --git a/examples/spark-pi-python.yaml b/examples/spark-pi-python.yaml index 5d0a7f273..33b659eef 100644 --- a/examples/spark-pi-python.yaml +++ b/examples/spark-pi-python.yaml @@ -22,21 +22,15 @@ spec: type: Python pythonVersion: "3" mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainApplicationFile: local:///opt/spark/examples/src/main/python/pi.py - sparkVersion: 3.5.0 + sparkVersion: 3.5.2 driver: - labels: - version: 3.5.0 cores: 1 - coreLimit: 1200m memory: 512m serviceAccount: spark-operator-spark executor: - labels: - version: 3.5.0 instances: 1 cores: 1 - coreLimit: 1200m memory: 512m diff --git a/examples/spark-pi-scheduled.yaml b/examples/spark-pi-scheduled.yaml index f74143e7c..3bcd053af 100644 --- a/examples/spark-pi-scheduled.yaml +++ b/examples/spark-pi-scheduled.yaml @@ -25,24 +25,18 @@ spec: template: type: Scala mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar - sparkVersion: 3.5.0 + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 restartPolicy: type: Never driver: - labels: - version: 3.5.0 cores: 1 - coreLimit: 1200m memory: 512m serviceAccount: spark-operator-spark executor: - labels: - version: 3.5.0 instances: 1 cores: 1 - coreLimit: 1200m memory: 512m diff --git a/examples/spark-pi-ttl.yaml b/examples/spark-pi-ttl.yaml new file mode 100644 index 000000000..68e6dd413 --- /dev/null +++ b/examples/spark-pi-ttl.yaml @@ -0,0 +1,37 @@ +# +# Copyright 2024 The Kubeflow authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: sparkoperator.k8s.io/v1beta2 +kind: SparkApplication +metadata: + name: spark-pi-ttl + namespace: default +spec: + type: Scala + mode: cluster + image: spark:3.5.2 + imagePullPolicy: IfNotPresent + mainClass: org.apache.spark.examples.SparkPi + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 + timeToLiveSeconds: 30 + driver: + cores: 1 + memory: 512m + serviceAccount: spark-operator-spark + executor: + instances: 1 + cores: 1 + memory: 512m diff --git a/examples/spark-pi-volcano.yaml b/examples/spark-pi-volcano.yaml index 277ed173d..889d32c36 100644 --- a/examples/spark-pi-volcano.yaml +++ b/examples/spark-pi-volcano.yaml @@ -21,23 +21,17 @@ metadata: spec: type: Scala mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar - sparkVersion: 3.5.0 + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 driver: - labels: - version: 3.5.0 cores: 1 - coreLimit: 1200m memory: 512m serviceAccount: spark-operator-spark executor: - labels: - version: 3.5.0 instances: 2 cores: 1 - coreLimit: 1200m memory: 512m batchScheduler: volcano diff --git a/examples/spark-pi-yunikorn.yaml b/examples/spark-pi-yunikorn.yaml new file mode 100644 index 000000000..0cffd6aaa --- /dev/null +++ b/examples/spark-pi-yunikorn.yaml @@ -0,0 +1,39 @@ +# +# Copyright 2024 The Kubeflow authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: sparkoperator.k8s.io/v1beta2 +kind: SparkApplication +metadata: + name: spark-pi-yunikorn + namespace: default +spec: + type: Scala + mode: cluster + image: spark:3.5.2 + imagePullPolicy: IfNotPresent + mainClass: org.apache.spark.examples.SparkPi + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + sparkVersion: 3.5.2 + driver: + cores: 1 + memory: 512m + serviceAccount: spark-operator-spark + executor: + instances: 2 + cores: 1 + memory: 512m + batchScheduler: yunikorn + batchSchedulerOptions: + queue: root.default \ No newline at end of file diff --git a/examples/spark-pi.yaml b/examples/spark-pi.yaml index 6d7ae6869..034490e0c 100644 --- a/examples/spark-pi.yaml +++ b/examples/spark-pi.yaml @@ -21,22 +21,22 @@ metadata: spec: type: Scala mode: cluster - image: spark:3.5.0 + image: spark:3.5.2 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.0.jar - sparkVersion: 3.5.0 + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.2.jar + arguments: + - "5000" + sparkVersion: 3.5.2 driver: labels: - version: 3.5.0 + version: 3.5.2 cores: 1 - coreLimit: 1200m memory: 512m serviceAccount: spark-operator-spark executor: labels: - version: 3.5.0 + version: 3.5.2 instances: 1 cores: 1 - coreLimit: 1200m memory: 512m diff --git a/go.mod b/go.mod index 72c1d2548..ed0c37d13 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,12 @@ module github.com/kubeflow/spark-operator -go 1.22.5 +go 1.23.1 require ( cloud.google.com/go/storage v1.43.0 - github.com/aws/aws-sdk-go-v2 v1.30.3 - github.com/aws/aws-sdk-go-v2/config v1.27.26 - github.com/aws/aws-sdk-go-v2/service/s3 v1.58.2 + github.com/aws/aws-sdk-go-v2 v1.30.5 + github.com/aws/aws-sdk-go-v2/config v1.27.33 + github.com/aws/aws-sdk-go-v2/service/s3 v1.58.3 github.com/golang/glog v1.2.2 github.com/google/uuid v1.6.0 github.com/olekukonko/tablewriter v0.0.5 @@ -18,52 +18,54 @@ require ( github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.9.0 go.uber.org/zap v1.27.0 - gocloud.dev v0.37.0 - golang.org/x/net v0.27.0 - helm.sh/helm/v3 v3.15.3 - k8s.io/api v0.30.2 - k8s.io/apiextensions-apiserver v0.30.2 - k8s.io/apimachinery v0.30.2 + gocloud.dev v0.39.0 + golang.org/x/net v0.28.0 + helm.sh/helm/v3 v3.16.1 + k8s.io/api v0.31.0 + k8s.io/apiextensions-apiserver v0.31.0 + k8s.io/apimachinery v0.31.0 k8s.io/client-go v1.5.2 k8s.io/kubernetes v1.30.2 - k8s.io/utils v0.0.0-20240710235135-d4aae2beeffc + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/controller-runtime v0.17.5 + sigs.k8s.io/scheduler-plugins v0.29.8 volcano.sh/apis v1.9.0 ) require ( cloud.google.com/go v0.115.0 // indirect - cloud.google.com/go/auth v0.7.0 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.3 // indirect + cloud.google.com/go/auth v0.8.1 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect cloud.google.com/go/compute/metadata v0.5.0 // indirect - cloud.google.com/go/iam v1.1.11 // indirect + cloud.google.com/go/iam v1.1.13 // indirect + dario.cat/mergo v1.0.1 // indirect github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect github.com/BurntSushi/toml v1.4.0 // indirect github.com/MakeNowJust/heredoc v1.0.0 // indirect github.com/Masterminds/goutils v1.1.1 // indirect - github.com/Masterminds/semver/v3 v3.2.1 // indirect - github.com/Masterminds/sprig/v3 v3.2.3 // indirect + github.com/Masterminds/semver/v3 v3.3.0 // indirect + github.com/Masterminds/sprig/v3 v3.3.0 // indirect github.com/Masterminds/squirrel v1.5.4 // indirect github.com/Microsoft/hcsshim v0.12.4 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect - github.com/aws/aws-sdk-go v1.54.18 // indirect + github.com/aws/aws-sdk-go v1.55.5 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.3 // indirect - github.com/aws/aws-sdk-go-v2/credentials v1.17.26 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.11 // indirect - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.7 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.15 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.15 // indirect - github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.17.32 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.13 // indirect + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.10 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.15 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.4 // indirect github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.17 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.17 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.15 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.22.3 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.4 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.30.3 // indirect - github.com/aws/smithy-go v1.20.3 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.22.7 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.7 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.30.7 // indirect + github.com/aws/smithy-go v1.20.4 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect @@ -72,7 +74,7 @@ require ( github.com/containerd/errdefs v0.1.0 // indirect github.com/containerd/log v0.1.0 // indirect github.com/containerd/platforms v0.2.1 // indirect - github.com/cyphar/filepath-securejoin v0.2.5 // indirect + github.com/cyphar/filepath-securejoin v0.3.1 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/distribution/reference v0.6.0 // indirect github.com/docker/cli v27.0.3+incompatible // indirect @@ -106,11 +108,11 @@ require ( github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20240625030939-27f56978b8b0 // indirect - github.com/google/s2a-go v0.1.7 // indirect + github.com/google/s2a-go v0.1.8 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/wire v0.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect - github.com/googleapis/gax-go/v2 v2.12.5 // indirect + github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/gorilla/mux v1.8.1 // indirect github.com/gorilla/websocket v1.5.3 // indirect github.com/gosuri/uitable v0.0.4 // indirect @@ -165,7 +167,7 @@ require ( github.com/sirupsen/logrus v1.9.3 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.11.0 // indirect - github.com/spf13/cast v1.6.0 // indirect + github.com/spf13/cast v1.7.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect @@ -180,21 +182,21 @@ require ( go.opentelemetry.io/otel/trace v1.28.0 // indirect go.starlark.net v0.0.0-20240705175910-70002002b310 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.25.0 // indirect + golang.org/x/crypto v0.26.0 // indirect golang.org/x/exp v0.0.0-20240707233637-46b078467d37 // indirect - golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.22.0 // indirect - golang.org/x/term v0.22.0 // indirect - golang.org/x/text v0.16.0 // indirect - golang.org/x/time v0.5.0 // indirect + golang.org/x/oauth2 v0.22.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/sys v0.24.0 // indirect + golang.org/x/term v0.23.0 // indirect + golang.org/x/text v0.17.0 // indirect + golang.org/x/time v0.6.0 // indirect golang.org/x/tools v0.23.0 // indirect - golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect + golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/api v0.188.0 // indirect - google.golang.org/genproto v0.0.0-20240709173604-40e1e62336c5 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240709173604-40e1e62336c5 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240709173604-40e1e62336c5 // indirect + google.golang.org/api v0.191.0 // indirect + google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240812133136-8ffd90a71988 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240812133136-8ffd90a71988 // indirect google.golang.org/grpc v1.65.0 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect @@ -202,12 +204,12 @@ require ( gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiserver v0.30.2 // indirect - k8s.io/cli-runtime v0.30.2 // indirect - k8s.io/component-base v0.30.2 // indirect + k8s.io/apiserver v0.31.0 // indirect + k8s.io/cli-runtime v0.31.0 // indirect + k8s.io/component-base v0.31.0 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20240709000822-3c01b740850f // indirect - k8s.io/kubectl v0.30.2 // indirect + k8s.io/kubectl v0.31.0 // indirect oras.land/oras-go v1.2.5 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/kustomize/api v0.17.2 // indirect @@ -227,8 +229,11 @@ replace ( k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.29.3 k8s.io/code-generator => k8s.io/code-generator v0.29.3 k8s.io/component-base => k8s.io/component-base v0.29.3 + k8s.io/controller-manager => k8s.io/controller-manager v0.29.3 k8s.io/cri-api => k8s.io/cri-api v0.29.3 k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.29.3 + k8s.io/dynamic-resource-allocation => k8s.io/dynamic-resource-allocation v0.29.3 + k8s.io/endpointslice => k8s.io/endpointslice v0.29.3 k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.29.3 k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.29.3 k8s.io/kube-proxy => k8s.io/kube-proxy v0.29.3 @@ -237,7 +242,9 @@ replace ( k8s.io/kubelet => k8s.io/kubelet v0.29.3 k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.29.3 k8s.io/metrics => k8s.io/metrics v0.29.3 + k8s.io/mount-utils => k8s.io/mount-utils v0.29.3 k8s.io/node-api => k8s.io/node-api v0.29.3 + k8s.io/pod-security-admission => k8s.io/pod-security-admission v0.29.3 k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.29.3 k8s.io/sample-cli-plugin => k8s.io/sample-cli-plugin v0.29.3 k8s.io/sample-controller => k8s.io/sample-controller v0.29.3 diff --git a/go.sum b/go.sum index a5bb11dd5..36d5d72bc 100644 --- a/go.sum +++ b/go.sum @@ -1,18 +1,20 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.115.0 h1:CnFSK6Xo3lDYRoBKEcAtia6VSC837/ZkJuRduSFnr14= cloud.google.com/go v0.115.0/go.mod h1:8jIM5vVgoAEoiVxQ/O4BFTfHqulPZgs/ufEzMcFMdWU= -cloud.google.com/go/auth v0.7.0 h1:kf/x9B3WTbBUHkC+1VS8wwwli9TzhSt0vSTVBmMR8Ts= -cloud.google.com/go/auth v0.7.0/go.mod h1:D+WqdrpcjmiCgWrXmLLxOVq1GACoE36chW6KXoEvuIw= -cloud.google.com/go/auth/oauth2adapt v0.2.3 h1:MlxF+Pd3OmSudg/b1yZ5lJwoXCEaeedAguodky1PcKI= -cloud.google.com/go/auth/oauth2adapt v0.2.3/go.mod h1:tMQXOfZzFuNuUxOypHlQEXgdfX5cuhwU+ffUuXRJE8I= +cloud.google.com/go/auth v0.8.1 h1:QZW9FjC5lZzN864p13YxvAtGUlQ+KgRL+8Sg45Z6vxo= +cloud.google.com/go/auth v0.8.1/go.mod h1:qGVp/Y3kDRSDZ5gFD/XPUfYQ9xW1iI7q8RIRoCyBbJc= +cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= +cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= -cloud.google.com/go/iam v1.1.11 h1:0mQ8UKSfdHLut6pH9FM3bI55KWR46ketn0PuXleDyxw= -cloud.google.com/go/iam v1.1.11/go.mod h1:biXoiLWYIKntto2joP+62sd9uW5EpkZmKIvfNcTWlnQ= -cloud.google.com/go/longrunning v0.5.9 h1:haH9pAuXdPAMqHvzX0zlWQigXT7B0+CL4/2nXXdBo5k= -cloud.google.com/go/longrunning v0.5.9/go.mod h1:HD+0l9/OOW0za6UWdKJtXoFAX/BGg/3Wj8p10NeWF7c= +cloud.google.com/go/iam v1.1.13 h1:7zWBXG9ERbMLrzQBRhFliAV+kjcRToDTgQT3CTwYyv4= +cloud.google.com/go/iam v1.1.13/go.mod h1:K8mY0uSXwEXS30KrnVb+j54LB/ntfZu1dr+4zFMNbus= +cloud.google.com/go/longrunning v0.5.12 h1:5LqSIdERr71CqfUsFlJdBpOkBH8FBCFD7P1nTWy3TYE= +cloud.google.com/go/longrunning v0.5.12/go.mod h1:S5hMV8CDJ6r50t2ubVJSKQVv5u0rmik5//KgLO3k4lU= cloud.google.com/go/storage v1.43.0 h1:CcxnSohZwizt4LCzQHWvBf1/kvtHUn7gk9QERXPyXFs= cloud.google.com/go/storage v1.43.0/go.mod h1:ajvxEa7WmZS1PxvKRq4bq0tFT3vMd502JwstCcYv0Q0= +dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= +dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= @@ -28,11 +30,10 @@ github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= -github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= -github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= -github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= -github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA= -github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM= +github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= +github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs= +github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= @@ -47,46 +48,46 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= -github.com/aws/aws-sdk-go v1.54.18 h1:t8DGtN8A2wEiazoJxeDbfPsbxCKtjoRLuO7jBSgJzo4= -github.com/aws/aws-sdk-go v1.54.18/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= -github.com/aws/aws-sdk-go-v2 v1.30.3 h1:jUeBtG0Ih+ZIFH0F4UkmL9w3cSpaMv9tYYDbzILP8dY= -github.com/aws/aws-sdk-go-v2 v1.30.3/go.mod h1:nIQjQVp5sfpQcTc9mPSr1B0PaWK5ByX9MOoDadSN4lc= +github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU= +github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= +github.com/aws/aws-sdk-go-v2 v1.30.5 h1:mWSRTwQAb0aLE17dSzztCVJWI9+cRMgqebndjwDyK0g= +github.com/aws/aws-sdk-go-v2 v1.30.5/go.mod h1:CT+ZPWXbYrci8chcARI3OmI/qgd+f6WtuLOoaIA8PR0= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.3 h1:tW1/Rkad38LA15X4UQtjXZXNKsCgkshC3EbmcUmghTg= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.3/go.mod h1:UbnqO+zjqk3uIt9yCACHJ9IVNhyhOCnYk8yA19SAWrM= -github.com/aws/aws-sdk-go-v2/config v1.27.26 h1:T1kAefbKuNum/AbShMsZEro6eRkeOT8YILfE9wyjAYQ= -github.com/aws/aws-sdk-go-v2/config v1.27.26/go.mod h1:ivWHkAWFrw/nxty5Fku7soTIVdqZaZ7dw+tc5iGW3GA= -github.com/aws/aws-sdk-go-v2/credentials v1.17.26 h1:tsm8g/nJxi8+/7XyJJcP2dLrnK/5rkFp6+i2nhmz5fk= -github.com/aws/aws-sdk-go-v2/credentials v1.17.26/go.mod h1:3vAM49zkIa3q8WT6o9Ve5Z0vdByDMwmdScO0zvThTgI= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.11 h1:KreluoV8FZDEtI6Co2xuNk/UqI9iwMrOx/87PBNIKqw= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.11/go.mod h1:SeSUYBLsMYFoRvHE0Tjvn7kbxaUhl75CJi1sbfhMxkU= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.7 h1:kNemAUX+bJFBSfPkGVZ8HFOKIadjLoI2Ua1ZKivhGSo= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.7/go.mod h1:71S2C1g/Zjn+ANmyoOqJ586OrPF9uC9iiHt9ZAT+MOw= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.15 h1:SoNJ4RlFEQEbtDcCEt+QG56MY4fm4W8rYirAmq+/DdU= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.15/go.mod h1:U9ke74k1n2bf+RIgoX1SXFed1HLs51OgUSs+Ph0KJP8= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.15 h1:C6WHdGnTDIYETAm5iErQUiVNsclNx9qbJVPIt03B6bI= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.15/go.mod h1:ZQLZqhcu+JhSrA9/NXRm8SkDvsycE+JkV3WGY41e+IM= -github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU= -github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY= +github.com/aws/aws-sdk-go-v2/config v1.27.33 h1:Nof9o/MsmH4oa0s2q9a0k7tMz5x/Yj5k06lDODWz3BU= +github.com/aws/aws-sdk-go-v2/config v1.27.33/go.mod h1:kEqdYzRb8dd8Sy2pOdEbExTTF5v7ozEXX0McgPE7xks= +github.com/aws/aws-sdk-go-v2/credentials v1.17.32 h1:7Cxhp/BnT2RcGy4VisJ9miUPecY+lyE9I8JvcZofn9I= +github.com/aws/aws-sdk-go-v2/credentials v1.17.32/go.mod h1:P5/QMF3/DCHbXGEGkdbilXHsyTBX5D3HSwcrSc9p20I= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.13 h1:pfQ2sqNpMVK6xz2RbqLEL0GH87JOwSxPV2rzm8Zsb74= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.13/go.mod h1:NG7RXPUlqfsCLLFfi0+IpKN4sCB9D9fw/qTaSB+xRoU= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.10 h1:zeN9UtUlA6FTx0vFSayxSX32HDw73Yb6Hh2izDSFxXY= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.10/go.mod h1:3HKuexPDcwLWPaqpW2UR/9n8N/u/3CKcGAzSs8p8u8g= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 h1:pI7Bzt0BJtYA0N/JEC6B8fJ4RBrEMi1LBrkMdFYNSnQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17/go.mod h1:Dh5zzJYMtxfIjYW+/evjQ8uj2OyR/ve2KROHGHlSFqE= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 h1:Mqr/V5gvrhA2gvgnF42Zh5iMiQNcOYthFYwCyrnuWlc= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17/go.mod h1:aLJpZlCmjE+V+KtN1q1uyZkfnUWpQGpbsn89XPKyzfU= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 h1:VaRN3TlFdd6KxX1x3ILT5ynH6HvKgqdiXoTxAF4HQcQ= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc= github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.15 h1:Z5r7SycxmSllHYmaAZPpmN8GviDrSGhMS6bldqtXZPw= github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.15/go.mod h1:CetW7bDE00QoGEmPUoZuRog07SGVAUVW6LFpNP0YfIg= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.3 h1:dT3MqvGhSoaIhRseqw2I0yH81l7wiR2vjs57O51EAm8= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.3/go.mod h1:GlAeCkHwugxdHaueRr4nhPuY+WW+gR8UjlcqzPr1SPI= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.4 h1:KypMCbLPPHEmf9DgMGw51jMj77VfGPAN2Kv4cfhlfgI= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.4/go.mod h1:Vz1JQXliGcQktFTN/LN6uGppAIRoLBR2bMvIMP0gOjc= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.17 h1:YPYe6ZmvUfDDDELqEKtAd6bo8zxhkm+XEFEzQisqUIE= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.17/go.mod h1:oBtcnYua/CgzCWYN7NZ5j7PotFDaFSUjCYVTtfyn7vw= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.17 h1:HGErhhrxZlQ044RiM+WdoZxp0p+EGM62y3L6pwA4olE= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.17/go.mod h1:RkZEx4l0EHYDJpWppMJ3nD9wZJAa8/0lq9aVC+r2UII= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19 h1:rfprUlsdzgl7ZL2KlXiUAoJnI/VxfHCvDFr2QDFj6u4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19/go.mod h1:SCWkEdRq8/7EK60NcvvQ6NXKuTcchAD4ROAsC37VEZE= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.15 h1:246A4lSTXWJw/rmlQI+TT2OcqeDMKBdyjEQrafMaQdA= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.15/go.mod h1:haVfg3761/WF7YPuJOER2MP0k4UAXyHaLclKXB6usDg= -github.com/aws/aws-sdk-go-v2/service/s3 v1.58.2 h1:sZXIzO38GZOU+O0C+INqbH7C2yALwfMWpd64tONS/NE= -github.com/aws/aws-sdk-go-v2/service/s3 v1.58.2/go.mod h1:Lcxzg5rojyVPU/0eFwLtcyTaek/6Mtic5B1gJo7e/zE= -github.com/aws/aws-sdk-go-v2/service/sso v1.22.3 h1:Fv1vD2L65Jnp5QRsdiM64JvUM4Xe+E0JyVsRQKv6IeA= -github.com/aws/aws-sdk-go-v2/service/sso v1.22.3/go.mod h1:ooyCOXjvJEsUw7x+ZDHeISPMhtwI3ZCB7ggFMcFfWLU= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.4 h1:yiwVzJW2ZxZTurVbYWA7QOrAaCYQR72t0wrSBfoesUE= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.4/go.mod h1:0oxfLkpz3rQ/CHlx5hB7H69YUpFiI1tql6Q6Ne+1bCw= -github.com/aws/aws-sdk-go-v2/service/sts v1.30.3 h1:ZsDKRLXGWHk8WdtyYMoGNO7bTudrvuKpDKgMVRlepGE= -github.com/aws/aws-sdk-go-v2/service/sts v1.30.3/go.mod h1:zwySh8fpFyXp9yOr/KVzxOl8SRqgf/IDw5aUt9UKFcQ= -github.com/aws/smithy-go v1.20.3 h1:ryHwveWzPV5BIof6fyDvor6V3iUL7nTfiTKXHiW05nE= -github.com/aws/smithy-go v1.20.3/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E= +github.com/aws/aws-sdk-go-v2/service/s3 v1.58.3 h1:hT8ZAZRIfqBqHbzKTII+CIiY8G2oC9OpLedkZ51DWl8= +github.com/aws/aws-sdk-go-v2/service/s3 v1.58.3/go.mod h1:Lcxzg5rojyVPU/0eFwLtcyTaek/6Mtic5B1gJo7e/zE= +github.com/aws/aws-sdk-go-v2/service/sso v1.22.7 h1:pIaGg+08llrP7Q5aiz9ICWbY8cqhTkyy+0SHvfzQpTc= +github.com/aws/aws-sdk-go-v2/service/sso v1.22.7/go.mod h1:eEygMHnTKH/3kNp9Jr1n3PdejuSNcgwLe1dWgQtO0VQ= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.7 h1:/Cfdu0XV3mONYKaOt1Gr0k1KvQzkzPyiKUdlWJqy+J4= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.7/go.mod h1:bCbAxKDqNvkHxRaIMnyVPXPo+OaPRwvmgzMxbz1VKSA= +github.com/aws/aws-sdk-go-v2/service/sts v1.30.7 h1:NKTa1eqZYw8tiHSRGpP0VtTdub/8KNk8sDkNPFaOKDE= +github.com/aws/aws-sdk-go-v2/service/sts v1.30.7/go.mod h1:NXi1dIAGteSaRLqYgarlhP/Ij0cFT+qmCwiJqWh/U5o= +github.com/aws/smithy-go v1.20.4 h1:2HK1zBdPgRbjFOHlfeQZfpC4r72MOb9bZkiFwggKO+4= +github.com/aws/smithy-go v1.20.4/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -124,8 +125,8 @@ github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7np github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= -github.com/cyphar/filepath-securejoin v0.2.5 h1:6iR5tXJ/e6tJZzzdMc1km3Sa7RRIVBKAK32O2s7AYfo= -github.com/cyphar/filepath-securejoin v0.2.5/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= +github.com/cyphar/filepath-securejoin v0.3.1 h1:1V7cHiaW+C+39wEfpH6XlLBQo3j/PciWFrgfCLS8XrE= +github.com/cyphar/filepath-securejoin v0.3.1/go.mod h1:F7i41x/9cBF7lzCrVsYs9fuzwRZm4NQsGTBdpp6mETc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -166,8 +167,8 @@ github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/foxcpp/go-mockdns v1.0.0 h1:7jBqxd3WDWwi/6WhDvacvH1XsN3rOLXyHM1uhvIx6FI= -github.com/foxcpp/go-mockdns v1.0.0/go.mod h1:lgRN6+KxQBawyIghpnl5CezHFGS9VLzvtVlwxvzXTQ4= +github.com/foxcpp/go-mockdns v1.1.0 h1:jI0rD8M0wuYAxL7r/ynTrCQQq0BVqfB99Vgk7DlmewI= +github.com/foxcpp/go-mockdns v1.1.0/go.mod h1:IhLeSFGed3mJIAXPH2aiRQB+kqz7oqu8ld2qVbOu7Wk= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= @@ -236,8 +237,8 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-replayers/grpcreplay v1.1.0 h1:S5+I3zYyZ+GQz68OfbURDdt/+cSMqCK1wrvNx7WBzTE= -github.com/google/go-replayers/grpcreplay v1.1.0/go.mod h1:qzAvJ8/wi57zq7gWqaE6AwLM6miiXUQwP1S+I9icmhk= +github.com/google/go-replayers/grpcreplay v1.3.0 h1:1Keyy0m1sIpqstQmgz307zhiJ1pV4uIlFds5weTmxbo= +github.com/google/go-replayers/grpcreplay v1.3.0/go.mod h1:v6NgKtkijC0d3e3RW8il6Sy5sqRVUwoQa4mHOGEy8DI= github.com/google/go-replayers/httpreplay v1.2.0 h1:VM1wEyyjaoU53BwrOnaf9VhAyQQEEioJvFYxYcLRKzk= github.com/google/go-replayers/httpreplay v1.2.0/go.mod h1:WahEFFZZ7a1P4VM1qEeHy+tME4bwyqPcwWbNlUI1Mcg= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -247,12 +248,11 @@ github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9 github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= github.com/google/pprof v0.0.0-20240625030939-27f56978b8b0 h1:e+8XbKB6IMn8A4OAyZccO4pYfB3s7bt6azNIPE7AnPg= github.com/google/pprof v0.0.0-20240625030939-27f56978b8b0/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= -github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= -github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= +github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= +github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= -github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -260,8 +260,8 @@ github.com/google/wire v0.6.0 h1:HBkoIh4BdSxoyo9PveV8giw7ZsaBOvzWKfcg/6MrVwI= github.com/google/wire v0.6.0/go.mod h1:F4QhpQ9EDIdJ1Mbop/NZBRB+5yrR6qg3BnctaoUk6NA= github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= -github.com/googleapis/gax-go/v2 v2.12.5 h1:8gw9KZK8TiVKB6q3zHY3SBzLnrGp6HQjyfYBYGmXdxA= -github.com/googleapis/gax-go/v2 v2.12.5/go.mod h1:BUDKcWo+RaKq5SC9vVYL0wLADa3VcfswbOMMRmB9H3E= +github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= +github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4= github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= @@ -281,10 +281,8 @@ github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+l github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= -github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -335,16 +333,14 @@ github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/miekg/dns v1.1.25 h1:dFwPR6SfLtrSwgDcIq2bcU/gVutB4sNApq2HBdqcakg= -github.com/miekg/dns v1.1.25/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= -github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= +github.com/miekg/dns v1.1.57 h1:Jzi7ApEIzwEPLHWRcafCN9LZSBbqQpxjt/wpgvg7wcM= +github.com/miekg/dns v1.1.57/go.mod h1:uqRjCRUuEAA6qsOiJvDd+CFo/vW+y5WR6SNmHE55hZk= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= @@ -429,7 +425,6 @@ github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6g github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= -github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= @@ -439,9 +434,8 @@ github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9yS github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= -github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= -github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w= +github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -456,7 +450,6 @@ github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -507,18 +500,17 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -gocloud.dev v0.37.0 h1:XF1rN6R0qZI/9DYjN16Uy0durAmSlf58DHOcb28GPro= -gocloud.dev v0.37.0/go.mod h1:7/O4kqdInCNsc6LqgmuFnS0GRew4XNNYWpA44yQnwco= +gocloud.dev v0.39.0 h1:EYABYGhAalPUaMrbSKOr5lejxoxvXj99nE8XFtsDgds= +gocloud.dev v0.39.0/go.mod h1:drz+VyYNBvrMTW0KZiBAYEdl8lbNZx+OQ7oQvdrFmSQ= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= -golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= -golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20240707233637-46b078467d37 h1:uLDX+AfeFCct3a2C7uIWBKMJIR3CJMhcgfrUAqjRK6w= golang.org/x/exp v0.0.0-20240707233637-46b078467d37/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= @@ -531,6 +523,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8= +golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -544,16 +538,15 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= -golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= -golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= +golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -564,8 +557,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -580,35 +573,32 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= +golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= +golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= +golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= +golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -627,23 +617,23 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= -golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 h1:LLhsEBxRTBLuKlQxFBYUOU8xyFgXv6cOTp2HASDlsDk= +golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/api v0.188.0 h1:51y8fJ/b1AaaBRJr4yWm96fPcuxSo0JcegXE3DaHQHw= -google.golang.org/api v0.188.0/go.mod h1:VR0d+2SIiWOYG3r/jdm7adPW9hI2aRv9ETOSCQ9Beag= +google.golang.org/api v0.191.0 h1:cJcF09Z+4HAB2t5qTQM1ZtfL/PemsLFkcFG67qq2afk= +google.golang.org/api v0.191.0/go.mod h1:tD5dsFGxFza0hnQveGfVk9QQYKcfp+VzgRqyXFxE0+E= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20240709173604-40e1e62336c5 h1:ORprMx6Xqr56pGwKXMnVEFBI0k7OIcHI0Rx92/rKypo= -google.golang.org/genproto v0.0.0-20240709173604-40e1e62336c5/go.mod h1:FfBgJBJg9GcpPvKIuHSZ/aE1g2ecGL74upMzGZjiGEY= -google.golang.org/genproto/googleapis/api v0.0.0-20240709173604-40e1e62336c5 h1:a/Z0jgw03aJ2rQnp5PlPpznJqJft0HyvyrcUcxgzPwY= -google.golang.org/genproto/googleapis/api v0.0.0-20240709173604-40e1e62336c5/go.mod h1:mw8MG/Qz5wfgYr6VqVCiZcHe/GJEfI+oGGDCohaVgB0= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240709173604-40e1e62336c5 h1:SbSDUWW1PAO24TNpLdeheoYPd7kllICcLU52x6eD4kQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240709173604-40e1e62336c5/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= +google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 h1:CT2Thj5AuPV9phrYMtzX11k+XkzMGfRAet42PmoTATM= +google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988/go.mod h1:7uvplUBj4RjHAxIZ//98LzOvrQ04JBkaixRmCMI29hc= +google.golang.org/genproto/googleapis/api v0.0.0-20240812133136-8ffd90a71988 h1:+/tmTy5zAieooKIXfzDm9KiA3Bv6JBwriRN9LY+yayk= +google.golang.org/genproto/googleapis/api v0.0.0-20240812133136-8ffd90a71988/go.mod h1:4+X6GvPs+25wZKbQq9qyAXrwIRExv7w0Ea6MgZLZiDM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240812133136-8ffd90a71988 h1:V71AcdLZr2p8dC9dbOIMCpqi4EmRl8wUwnJzXXLmbmc= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240812133136-8ffd90a71988/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= @@ -673,9 +663,7 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= @@ -683,8 +671,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o= gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g= -helm.sh/helm/v3 v3.15.3 h1:HcZDaVFe9uHa6hpsR54mJjYyRy4uz/pc6csg27nxFOc= -helm.sh/helm/v3 v3.15.3/go.mod h1:FzSIP8jDQaa6WAVg9F+OkKz7J0ZmAga4MABtTbsb9WQ= +helm.sh/helm/v3 v3.16.1 h1:cER6tI/8PgUAsaJaQCVBUg3VI9KN4oVaZJgY60RIc0c= +helm.sh/helm/v3 v3.16.1/go.mod h1:r+xBHHP20qJeEqtvBXMf7W35QDJnzY/eiEBzt+TfHps= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= k8s.io/api v0.29.3 h1:2ORfZ7+bGC3YJqGpV0KSDDEVf8hdGQ6A03/50vj8pmw= @@ -709,8 +697,8 @@ k8s.io/kubectl v0.29.3 h1:RuwyyIU42MAISRIePaa8Q7A3U74Q9P4MoJbDFz9o3us= k8s.io/kubectl v0.29.3/go.mod h1:yCxfY1dbwgVdEt2zkJ6d5NNLOhhWgTyrqACIoFhpdd4= k8s.io/kubernetes v1.30.2 h1:11WhS78OYX/lnSy6TXxPO6Hk+E5K9ZNrEsk9JgMSX8I= k8s.io/kubernetes v1.30.2/go.mod h1:yPbIk3MhmhGigX62FLJm+CphNtjxqCvAIFQXup6RKS0= -k8s.io/utils v0.0.0-20240710235135-d4aae2beeffc h1:sAWhW/i0Lsz5ZUgeE9svkFa4UyoA+LNAsPcWnwQ2PzM= -k8s.io/utils v0.0.0-20240710235135-d4aae2beeffc/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= oras.land/oras-go v1.2.5 h1:XpYuAwAb0DfQsunIyMfeET92emK8km3W4yEzZvUbsTo= oras.land/oras-go v1.2.5/go.mod h1:PuAwRShRZCsZb7g8Ar3jKKQR/2A/qN+pkYxIOd/FAoo= sigs.k8s.io/controller-runtime v0.17.5 h1:1FI9Lm7NiOOmBsgTV36/s2XrEFXnO2C4sbg/Zme72Rw= @@ -721,6 +709,8 @@ sigs.k8s.io/kustomize/api v0.17.2 h1:E7/Fjk7V5fboiuijoZHgs4aHuexi5Y2loXlVOAVAG5g sigs.k8s.io/kustomize/api v0.17.2/go.mod h1:UWTz9Ct+MvoeQsHcJ5e+vziRRkwimm3HytpZgIYqye0= sigs.k8s.io/kustomize/kyaml v0.17.1 h1:TnxYQxFXzbmNG6gOINgGWQt09GghzgTP6mIurOgrLCQ= sigs.k8s.io/kustomize/kyaml v0.17.1/go.mod h1:9V0mCjIEYjlXuCdYsSXvyoy2BTsLESH7TlGV81S282U= +sigs.k8s.io/scheduler-plugins v0.29.8 h1:T3qyi/mi+TwOEERAazwqJBjTWrMVfDS18DC2Es4g6HQ= +sigs.k8s.io/scheduler-plugins v0.29.8/go.mod h1:e8M31FE7JWXkx9yIZIwsJDwvTcmUAqWchy9MJRNGDDk= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/internal/controller/scheduledsparkapplication/event_filter.go b/internal/controller/scheduledsparkapplication/event_filter.go index e6ea5487b..f0d7568f5 100644 --- a/internal/controller/scheduledsparkapplication/event_filter.go +++ b/internal/controller/scheduledsparkapplication/event_filter.go @@ -35,9 +35,14 @@ var _ predicate.Predicate = &EventFilter{} // NewEventFilter creates a new EventFilter instance. func NewEventFilter(namespaces []string) *EventFilter { nsMap := make(map[string]bool) - for _, ns := range namespaces { - nsMap[ns] = true + if len(namespaces) == 0 { + nsMap[metav1.NamespaceAll] = true + } else { + for _, ns := range namespaces { + nsMap[ns] = true + } } + return &EventFilter{ namespaces: nsMap, } diff --git a/internal/controller/sparkapplication/controller.go b/internal/controller/sparkapplication/controller.go index 753108a90..e257b37f5 100644 --- a/internal/controller/sparkapplication/controller.go +++ b/internal/controller/sparkapplication/controller.go @@ -42,7 +42,9 @@ import ( "github.com/kubeflow/spark-operator/api/v1beta2" "github.com/kubeflow/spark-operator/internal/metrics" "github.com/kubeflow/spark-operator/internal/scheduler" + "github.com/kubeflow/spark-operator/internal/scheduler/kubescheduler" "github.com/kubeflow/spark-operator/internal/scheduler/volcano" + "github.com/kubeflow/spark-operator/internal/scheduler/yunikorn" "github.com/kubeflow/spark-operator/pkg/common" "github.com/kubeflow/spark-operator/pkg/util" ) @@ -53,10 +55,13 @@ var ( // Options defines the options of the controller. type Options struct { - Namespaces []string - EnableUIService bool - IngressClassName string - IngressURLFormat string + Namespaces []string + EnableUIService bool + IngressClassName string + IngressURLFormat string + DefaultBatchScheduler string + + KubeSchedulerNames []string SparkApplicationMetrics *metrics.SparkApplicationMetrics SparkExecutorMetrics *metrics.SparkExecutorMetrics @@ -366,9 +371,11 @@ func (r *Reconciler) reconcileRunningSparkApplication(ctx context.Context, req c if err := r.updateSparkApplicationState(ctx, app); err != nil { return err } + if err := r.updateSparkApplicationStatus(ctx, app); err != nil { return err } + return nil }, ) @@ -524,85 +531,62 @@ func (r *Reconciler) reconcileFailingSparkApplication(ctx context.Context, req c } func (r *Reconciler) reconcileCompletedSparkApplication(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - key := req.NamespacedName - retryErr := retry.RetryOnConflict( - retry.DefaultRetry, - func() error { - old, err := r.getSparkApplication(key) - if err != nil { - return err - } - if old.Status.AppState.State != v1beta2.ApplicationStateCompleted { - return nil - } - app := old.DeepCopy() - - if util.IsExpired(app) { - logger.Info("Deleting expired SparkApplication", "name", app.Name, "namespace", app.Namespace, "state", app.Status.AppState.State) - if err := r.client.Delete(ctx, app); err != nil { - return err - } - return nil - } - if err := r.updateExecutorState(ctx, app); err != nil { - return err - } - if err := r.updateSparkApplicationStatus(ctx, app); err != nil { - return err - } - if err := r.cleanUpOnTermination(old, app); err != nil { - logger.Error(err, "Failed to clean up resources for SparkApplication", "name", old.Name, "namespace", old.Namespace, "state", old.Status.AppState.State) - return err - } - return nil - }, - ) - if retryErr != nil { - logger.Error(retryErr, "Failed to reconcile SparkApplication", "name", key.Name, "namespace", key.Namespace) - return ctrl.Result{}, retryErr - } - return ctrl.Result{}, nil + return r.reconcileTerminatedSparkApplication(ctx, req) } func (r *Reconciler) reconcileFailedSparkApplication(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + return r.reconcileTerminatedSparkApplication(ctx, req) +} + +func (r *Reconciler) reconcileTerminatedSparkApplication(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { key := req.NamespacedName - retryErr := retry.RetryOnConflict( - retry.DefaultRetry, - func() error { - old, err := r.getSparkApplication(key) - if err != nil { - return err - } - if old.Status.AppState.State != v1beta2.ApplicationStateFailed { - return nil - } - app := old.DeepCopy() + old, err := r.getSparkApplication(key) + if err != nil { + return ctrl.Result{Requeue: true}, err + } - if util.IsExpired(app) { - logger.Info("Deleting expired SparkApplication", "name", app.Name, "namespace", app.Namespace, "state", app.Status.AppState.State) - if err := r.client.Delete(ctx, app); err != nil { - return err - } - return nil - } - if err := r.updateExecutorState(ctx, app); err != nil { - return err - } - if err := r.updateSparkApplicationStatus(ctx, app); err != nil { - return err - } - if err := r.cleanUpOnTermination(old, app); err != nil { - logger.Error(err, "Failed to clean up resources for SparkApplication", "name", old.Name, "namespace", old.Namespace, "state", old.Status.AppState.State) - return err - } - return nil - }, - ) - if retryErr != nil { - logger.Error(retryErr, "Failed to reconcile SparkApplication", "name", key.Name, "namespace", key.Namespace) - return ctrl.Result{}, retryErr + app := old.DeepCopy() + if !util.IsTerminated(app) { + return ctrl.Result{}, nil } - return ctrl.Result{}, nil + + if util.IsExpired(app) { + logger.Info("Deleting expired SparkApplication", "name", app.Name, "namespace", app.Namespace, "state", app.Status.AppState.State) + if err := r.client.Delete(ctx, app); err != nil { + return ctrl.Result{Requeue: true}, err + } + return ctrl.Result{}, nil + } + + if err := r.updateExecutorState(ctx, app); err != nil { + return ctrl.Result{Requeue: true}, err + } + + if err := r.updateSparkApplicationStatus(ctx, app); err != nil { + return ctrl.Result{Requeue: true}, err + } + + if err := r.cleanUpOnTermination(old, app); err != nil { + logger.Error(err, "Failed to clean up resources for SparkApplication", "name", old.Name, "namespace", old.Namespace, "state", old.Status.AppState.State) + return ctrl.Result{Requeue: true}, err + } + + // If termination time or TTL is not set, will not requeue this application. + if app.Status.TerminationTime.IsZero() || app.Spec.TimeToLiveSeconds == nil || *app.Spec.TimeToLiveSeconds <= 0 { + return ctrl.Result{}, nil + } + + // Otherwise, requeue the application for subsequent deletion. + now := time.Now() + ttl := time.Duration(*app.Spec.TimeToLiveSeconds) * time.Second + survival := now.Sub(app.Status.TerminationTime.Time) + + // If survival time is greater than TTL, requeue the application immediately. + if survival >= ttl { + return ctrl.Result{Requeue: true}, nil + } + // Otherwise, requeue the application after (TTL - survival) seconds. + return ctrl.Result{RequeueAfter: ttl - survival}, nil } func (r *Reconciler) reconcileUnknownSparkApplication(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -667,7 +651,7 @@ func (r *Reconciler) submitSparkApplication(app *v1beta2.SparkApplication) error if r.options.EnableUIService { service, err := r.createWebUIService(app) if err != nil { - return fmt.Errorf("failed to create web UI service") + return fmt.Errorf("failed to create web UI service: %v", err) } app.Status.DriverInfo.WebUIServiceName = service.serviceName app.Status.DriverInfo.WebUIPort = service.servicePort @@ -691,7 +675,7 @@ func (r *Reconciler) submitSparkApplication(app *v1beta2.SparkApplication) error } ingress, err := r.createWebUIIngress(app, *service, ingressURL, r.options.IngressClassName) if err != nil { - return fmt.Errorf("failed to create web UI service") + return fmt.Errorf("failed to create web UI ingress: %v", err) } app.Status.DriverInfo.WebUIIngressAddress = ingress.ingressURL.String() app.Status.DriverInfo.WebUIIngressName = ingress.ingressName @@ -1183,20 +1167,42 @@ func (r *Reconciler) resetSparkApplicationStatus(app *v1beta2.SparkApplication) } func (r *Reconciler) shouldDoBatchScheduling(app *v1beta2.SparkApplication) (bool, scheduler.Interface) { - if r.registry == nil || app.Spec.BatchScheduler == nil || *app.Spec.BatchScheduler == "" { + // If batch scheduling isn't enabled + if r.registry == nil { + return false, nil + } + + schedulerName := r.options.DefaultBatchScheduler + if app.Spec.BatchScheduler != nil && *app.Spec.BatchScheduler != "" { + schedulerName = *app.Spec.BatchScheduler + } + + // If both the default and app batch scheduler are unspecified or empty + if schedulerName == "" { return false, nil } var err error var scheduler scheduler.Interface - schedulerName := *app.Spec.BatchScheduler switch schedulerName { case common.VolcanoSchedulerName: config := &volcano.Config{ RestConfig: r.manager.GetConfig(), } scheduler, err = r.registry.GetScheduler(schedulerName, config) + case yunikorn.SchedulerName: + scheduler, err = r.registry.GetScheduler(schedulerName, nil) + } + + for _, name := range r.options.KubeSchedulerNames { + if schedulerName == name { + config := &kubescheduler.Config{ + SchedulerName: name, + Client: r.manager.GetClient(), + } + scheduler, err = r.registry.GetScheduler(name, config) + } } if err != nil || scheduler == nil { diff --git a/internal/controller/sparkapplication/driveringress.go b/internal/controller/sparkapplication/driveringress.go index 982ee8b03..7cf2af83b 100644 --- a/internal/controller/sparkapplication/driveringress.go +++ b/internal/controller/sparkapplication/driveringress.go @@ -212,7 +212,7 @@ func (r *Reconciler) createDriverIngressLegacy(app *v1beta2.SparkApplication, se if len(ingressTLSHosts) != 0 { ingress.Spec.TLS = convertIngressTLSHostsToLegacy(ingressTLSHosts) } - logger.Info("Creating extensions.v1beta1/Ingress for SparkApplication web UI", app.Name, "namespace", app.Namespace, "ingressName", ingress.Name) + logger.Info("Creating extensions.v1beta1/Ingress for SparkApplication web UI", "name", app.Name, "namespace", app.Namespace, "ingressName", ingress.Name) if err := r.client.Create(context.TODO(), ingress); err != nil { return nil, fmt.Errorf("failed to create ingress %s/%s: %v", ingress.Namespace, ingress.Name, err) } diff --git a/internal/controller/sparkapplication/event_filter.go b/internal/controller/sparkapplication/event_filter.go index 3fe49ee13..121155f2e 100644 --- a/internal/controller/sparkapplication/event_filter.go +++ b/internal/controller/sparkapplication/event_filter.go @@ -42,8 +42,12 @@ var _ predicate.Predicate = &sparkPodEventFilter{} // newSparkPodEventFilter creates a new SparkPodEventFilter instance. func newSparkPodEventFilter(namespaces []string) *sparkPodEventFilter { nsMap := make(map[string]bool) - for _, ns := range namespaces { - nsMap[ns] = true + if len(namespaces) == 0 { + nsMap[metav1.NamespaceAll] = true + } else { + for _, ns := range namespaces { + nsMap[ns] = true + } } return &sparkPodEventFilter{ @@ -118,8 +122,12 @@ var _ predicate.Predicate = &EventFilter{} func NewSparkApplicationEventFilter(client client.Client, recorder record.EventRecorder, namespaces []string) *EventFilter { nsMap := make(map[string]bool) - for _, ns := range namespaces { - nsMap[ns] = true + if len(namespaces) == 0 { + nsMap[metav1.NamespaceAll] = true + } else { + for _, ns := range namespaces { + nsMap[ns] = true + } } return &EventFilter{ diff --git a/internal/scheduler/kubescheduler/scheduler.go b/internal/scheduler/kubescheduler/scheduler.go new file mode 100644 index 000000000..b7126b137 --- /dev/null +++ b/internal/scheduler/kubescheduler/scheduler.go @@ -0,0 +1,159 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubescheduler + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + schedulingv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" + + "github.com/kubeflow/spark-operator/api/v1beta2" + "github.com/kubeflow/spark-operator/internal/scheduler" + "github.com/kubeflow/spark-operator/pkg/util" +) + +const ( + Name = "kube-scheduler" +) + +var ( + logger = log.Log.WithName("") +) + +// Scheduler is a scheduler that uses scheduler plugins to schedule Spark pods. +// Ref: https://github.com/kubernetes-sigs/scheduler-plugins. +type Scheduler struct { + name string + client client.Client +} + +// Scheduler implements scheduler.Interface. +var _ scheduler.Interface = &Scheduler{} + +// Config defines the configurations of kube-scheduler. +type Config struct { + SchedulerName string + Client client.Client +} + +// Config implements scheduler.Config. +var _ scheduler.Config = &Config{} + +// Factory creates a new Scheduler instance. +func Factory(config scheduler.Config) (scheduler.Interface, error) { + c, ok := config.(*Config) + if !ok { + return nil, fmt.Errorf("failed to get kube-scheduler config") + } + + scheduler := &Scheduler{ + name: c.SchedulerName, + client: c.Client, + } + return scheduler, nil +} + +// Name implements scheduler.Interface. +func (s *Scheduler) Name() string { + return s.name +} + +// ShouldSchedule implements scheduler.Interface. +func (s *Scheduler) ShouldSchedule(app *v1beta2.SparkApplication) bool { + // There is no additional requirements for scheduling. + return true +} + +// Schedule implements scheduler.Interface. +func (s *Scheduler) Schedule(app *v1beta2.SparkApplication) error { + minResources := util.SumResourceList([]corev1.ResourceList{util.GetDriverRequestResource(app), util.GetExecutorRequestResource(app)}) + podGroup := &schedulingv1alpha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: getPodGroupName(app), + Namespace: app.Namespace, + OwnerReferences: []metav1.OwnerReference{ + *metav1.NewControllerRef(app, v1beta2.SchemeGroupVersion.WithKind("SparkApplication")), + }, + }, + Spec: schedulingv1alpha1.PodGroupSpec{ + MinMember: 1, + MinResources: minResources, + }, + } + + if err := s.syncPodGroup(podGroup); err != nil { + return fmt.Errorf("failed to sync pod group: %v", err) + } + + // Add a label `scheduling.x-k8s.io/pod-group` to mark the pod belongs to a group + if app.ObjectMeta.Labels == nil { + app.ObjectMeta.Labels = make(map[string]string) + } + app.ObjectMeta.Labels[schedulingv1alpha1.PodGroupLabel] = podGroup.Name + + return nil +} + +// Cleanup implements scheduler.Interface. +func (s *Scheduler) Cleanup(app *v1beta2.SparkApplication) error { + podGroup := &schedulingv1alpha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: getPodGroupName(app), + Namespace: app.Namespace, + }, + } + if err := s.client.Delete(context.TODO(), podGroup); err != nil { + if errors.IsNotFound(err) { + return nil + } + return err + } + logger.Info("Deleted PodGroup", "Name", podGroup.Name, "Namespace", podGroup.Namespace) + return nil +} + +func (s *Scheduler) syncPodGroup(podGroup *schedulingv1alpha1.PodGroup) error { + key := types.NamespacedName{ + Namespace: podGroup.Namespace, + Name: podGroup.Name, + } + + if err := s.client.Get(context.TODO(), key, &schedulingv1alpha1.PodGroup{}); err != nil { + if !errors.IsNotFound(err) { + return err + } + + if err := s.client.Create(context.TODO(), podGroup); err != nil { + return err + } + logger.Info("Created PodGroup", "Name", podGroup.Name, "Namespace", podGroup.Namespace) + return nil + } + + if err := s.client.Update(context.TODO(), podGroup); err != nil { + return err + } + logger.Info("Updated PodGroup", "Name", podGroup.Name, "Namespace", podGroup.Namespace) + return nil +} diff --git a/internal/scheduler/kubescheduler/util.go b/internal/scheduler/kubescheduler/util.go new file mode 100644 index 000000000..f4996a204 --- /dev/null +++ b/internal/scheduler/kubescheduler/util.go @@ -0,0 +1,27 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubescheduler + +import ( + "fmt" + + "github.com/kubeflow/spark-operator/api/v1beta2" +) + +func getPodGroupName(app *v1beta2.SparkApplication) string { + return fmt.Sprintf("%s-pg", app.Name) +} diff --git a/internal/scheduler/yunikorn/resourceusage/java.go b/internal/scheduler/yunikorn/resourceusage/java.go new file mode 100644 index 000000000..8b56d64aa --- /dev/null +++ b/internal/scheduler/yunikorn/resourceusage/java.go @@ -0,0 +1,56 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourceusage + +import ( + "fmt" + "regexp" + "strconv" + "strings" +) + +var ( + javaStringSuffixes = map[string]int64{ + "b": 1, + "kb": 1 << 10, + "k": 1 << 10, + "mb": 1 << 20, + "m": 1 << 20, + "gb": 1 << 30, + "g": 1 << 30, + "tb": 1 << 40, + "t": 1 << 40, + "pb": 1 << 50, + "p": 1 << 50, + } + + javaStringPattern = regexp.MustCompile(`^([0-9]+)([a-z]+)?$`) +) + +func byteStringAsBytes(byteString string) (int64, error) { + matches := javaStringPattern.FindStringSubmatch(strings.ToLower(byteString)) + if matches != nil { + value, err := strconv.ParseInt(matches[1], 10, 64) + if err != nil { + return 0, err + } + if multiplier, present := javaStringSuffixes[matches[2]]; present { + return value * multiplier, nil + } + } + return 0, fmt.Errorf("unable to parse byte string: %s", byteString) +} diff --git a/internal/scheduler/yunikorn/resourceusage/java_test.go b/internal/scheduler/yunikorn/resourceusage/java_test.go new file mode 100644 index 000000000..d9d1ae59e --- /dev/null +++ b/internal/scheduler/yunikorn/resourceusage/java_test.go @@ -0,0 +1,63 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourceusage + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestByteStringAsMb(t *testing.T) { + testCases := []struct { + input string + expected int + }{ + {"1k", 1024}, + {"1m", 1024 * 1024}, + {"1g", 1024 * 1024 * 1024}, + {"1t", 1024 * 1024 * 1024 * 1024}, + {"1p", 1024 * 1024 * 1024 * 1024 * 1024}, + } + + for _, tc := range testCases { + t.Run(tc.input, func(t *testing.T) { + actual, err := byteStringAsBytes(tc.input) + assert.Nil(t, err) + assert.Equal(t, int64(tc.expected), actual) + }) + } +} + +func TestByteStringAsMbInvalid(t *testing.T) { + invalidInputs := []string{ + "0.064", + "0.064m", + "500ub", + "This breaks 600b", + "This breaks 600", + "600gb This breaks", + "This 123mb breaks", + } + + for _, input := range invalidInputs { + t.Run(input, func(t *testing.T) { + _, err := byteStringAsBytes(input) + assert.NotNil(t, err) + }) + } +} diff --git a/internal/scheduler/yunikorn/resourceusage/memory.go b/internal/scheduler/yunikorn/resourceusage/memory.go new file mode 100644 index 000000000..88e17cdc9 --- /dev/null +++ b/internal/scheduler/yunikorn/resourceusage/memory.go @@ -0,0 +1,133 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourceusage + +import ( + "fmt" + "math" + "strconv" + + "github.com/kubeflow/spark-operator/api/v1beta2" + "github.com/kubeflow/spark-operator/pkg/common" +) + +func isJavaApp(appType v1beta2.SparkApplicationType) bool { + return appType == v1beta2.SparkApplicationTypeJava || appType == v1beta2.SparkApplicationTypeScala +} + +func getMemoryOverheadFactor(app *v1beta2.SparkApplication) (float64, error) { + if app.Spec.MemoryOverheadFactor != nil { + parsed, err := strconv.ParseFloat(*app.Spec.MemoryOverheadFactor, 64) + if err != nil { + return 0, fmt.Errorf("failed to parse memory overhead factor as float: %w", err) + } + return parsed, nil + } else if isJavaApp(app.Spec.Type) { + return common.DefaultJVMMemoryOverheadFactor, nil + } + + return common.DefaultNonJVMMemoryOverheadFactor, nil +} + +func memoryRequestBytes(podSpec *v1beta2.SparkPodSpec, memoryOverheadFactor float64) (int64, error) { + var memoryBytes, memoryOverheadBytes int64 + + if podSpec.Memory != nil { + parsed, err := byteStringAsBytes(*podSpec.Memory) + if err != nil { + return 0, err + } + memoryBytes = parsed + } + + if podSpec.MemoryOverhead != nil { + parsed, err := byteStringAsBytes(*podSpec.MemoryOverhead) + if err != nil { + return 0, err + } + memoryOverheadBytes = parsed + } else { + memoryOverheadBytes = int64(math.Max( + float64(memoryBytes)*memoryOverheadFactor, + common.MinMemoryOverhead, + )) + } + + return memoryBytes + memoryOverheadBytes, nil +} + +func executorPysparkMemoryBytes(app *v1beta2.SparkApplication) (int64, error) { + pysparkMemory, found := app.Spec.SparkConf["spark.executor.pyspark.memory"] + if app.Spec.Type != v1beta2.SparkApplicationTypePython || !found { + return 0, nil + } + + // This fields defaults to mebibytes if no resource suffix is specified + // https://github.com/apache/spark/blob/7de71a2ec78d985c2a045f13c1275101b126cec4/docs/configuration.md?plain=1#L289-L305 + if _, err := strconv.Atoi(pysparkMemory); err == nil { + pysparkMemory = pysparkMemory + "m" + } + + pysparkMemoryBytes, err := byteStringAsBytes(pysparkMemory) + if err != nil { + return 0, nil + } + + return pysparkMemoryBytes, nil +} + +func bytesToMi(b int64) string { + // this floors the value to the nearest mebibyte + return fmt.Sprintf("%dMi", b/1024/1024) +} + +func driverMemoryRequest(app *v1beta2.SparkApplication) (string, error) { + memoryOverheadFactor, err := getMemoryOverheadFactor(app) + if err != nil { + return "", err + } + + requestBytes, err := memoryRequestBytes(&app.Spec.Driver.SparkPodSpec, memoryOverheadFactor) + if err != nil { + return "", err + } + + // Convert memory quantity to mebibytes even if larger than a gibibyte to match Spark + // https://github.com/apache/spark/blob/11b682cf5b7c5360a02410be288b7905eecc1d28/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala#L88 + // https://github.com/apache/spark/blob/11b682cf5b7c5360a02410be288b7905eecc1d28/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala#L121 + return bytesToMi(requestBytes), nil +} + +func executorMemoryRequest(app *v1beta2.SparkApplication) (string, error) { + memoryOverheadFactor, err := getMemoryOverheadFactor(app) + if err != nil { + return "", err + } + + requestBytes, err := memoryRequestBytes(&app.Spec.Executor.SparkPodSpec, memoryOverheadFactor) + if err != nil { + return "", err + } + + pysparkMemoryBytes, err := executorPysparkMemoryBytes(app) + if err != nil { + return "", err + } + + // See comment above in driver + return bytesToMi(requestBytes + pysparkMemoryBytes), nil +} diff --git a/internal/scheduler/yunikorn/resourceusage/memory_test.go b/internal/scheduler/yunikorn/resourceusage/memory_test.go new file mode 100644 index 000000000..f7fa64b7d --- /dev/null +++ b/internal/scheduler/yunikorn/resourceusage/memory_test.go @@ -0,0 +1,39 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourceusage + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBytesToMi(t *testing.T) { + testCases := []struct { + input int64 + expected string + }{ + {(2 * 1024 * 1024) - 1, "1Mi"}, + {2 * 1024 * 1024, "2Mi"}, + {(1024 * 1024 * 1024) - 1, "1023Mi"}, + {1024 * 1024 * 1024, "1024Mi"}, + } + + for _, tc := range testCases { + assert.Equal(t, tc.expected, bytesToMi(tc.input)) + } +} diff --git a/internal/scheduler/yunikorn/resourceusage/resource_usage.go b/internal/scheduler/yunikorn/resourceusage/resource_usage.go new file mode 100644 index 000000000..adc3e0e98 --- /dev/null +++ b/internal/scheduler/yunikorn/resourceusage/resource_usage.go @@ -0,0 +1,76 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourceusage + +import ( + "fmt" + + "k8s.io/apimachinery/pkg/api/resource" + + "github.com/kubeflow/spark-operator/api/v1beta2" +) + +func cpuRequest(cores *int32, coreRequest *string) (string, error) { + // coreRequest takes precedence over cores if specified + // coreLimit is not relevant as pods are scheduled based on request values + if coreRequest != nil { + // Fail fast by validating coreRequest before app submission even though + // both Spark and Yunikorn validate this field anyway + if _, err := resource.ParseQuantity(*coreRequest); err != nil { + return "", fmt.Errorf("failed to parse %s: %w", *coreRequest, err) + } + return *coreRequest, nil + } + if cores != nil { + return fmt.Sprintf("%d", *cores), nil + } + return "1", nil +} + +func DriverPodRequests(app *v1beta2.SparkApplication) (map[string]string, error) { + cpuValue, err := cpuRequest(app.Spec.Driver.Cores, app.Spec.Driver.CoreRequest) + if err != nil { + return nil, err + } + + memoryValue, err := driverMemoryRequest(app) + if err != nil { + return nil, err + } + + return map[string]string{ + "cpu": cpuValue, + "memory": memoryValue, + }, nil +} + +func ExecutorPodRequests(app *v1beta2.SparkApplication) (map[string]string, error) { + cpuValue, err := cpuRequest(app.Spec.Executor.Cores, app.Spec.Executor.CoreRequest) + if err != nil { + return nil, err + } + + memoryValue, err := executorMemoryRequest(app) + if err != nil { + return nil, err + } + + return map[string]string{ + "cpu": cpuValue, + "memory": memoryValue, + }, nil +} diff --git a/internal/scheduler/yunikorn/resourceusage/resource_usage_test.go b/internal/scheduler/yunikorn/resourceusage/resource_usage_test.go new file mode 100644 index 000000000..8cadb800f --- /dev/null +++ b/internal/scheduler/yunikorn/resourceusage/resource_usage_test.go @@ -0,0 +1,57 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourceusage + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/kubeflow/spark-operator/pkg/util" +) + +func TestCpuRequest(t *testing.T) { + testCases := []struct { + cores *int32 + coreRequest *string + expected string + }{ + {nil, nil, "1"}, + {util.Int32Ptr(1), nil, "1"}, + {nil, util.StringPtr("1"), "1"}, + {util.Int32Ptr(1), util.StringPtr("500m"), "500m"}, + } + + for _, tc := range testCases { + actual, err := cpuRequest(tc.cores, tc.coreRequest) + assert.Nil(t, err) + assert.Equal(t, tc.expected, actual) + } +} + +func TestCpuRequestInvalid(t *testing.T) { + invalidInputs := []string{ + "", + "asd", + "Random 500m", + } + + for _, input := range invalidInputs { + _, err := cpuRequest(nil, &input) + assert.NotNil(t, err) + } +} diff --git a/internal/scheduler/yunikorn/scheduler.go b/internal/scheduler/yunikorn/scheduler.go new file mode 100644 index 000000000..4f9444c7a --- /dev/null +++ b/internal/scheduler/yunikorn/scheduler.go @@ -0,0 +1,183 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package yunikorn + +import ( + "encoding/json" + "fmt" + "maps" + + v1 "k8s.io/api/core/v1" + + "github.com/kubeflow/spark-operator/api/v1beta2" + "github.com/kubeflow/spark-operator/internal/scheduler" + "github.com/kubeflow/spark-operator/internal/scheduler/yunikorn/resourceusage" + "github.com/kubeflow/spark-operator/pkg/util" +) + +const ( + SchedulerName = "yunikorn" + + // The names are set to match the Yunikorn gang scheduling example for Spark, but these can be any + // value as long as what's on the pod matches the task group definition + // https://yunikorn.apache.org/docs/next/user_guide/gang_scheduling/#enable-gang-scheduling-for-spark-jobs + driverTaskGroupName = "spark-driver" + executorTaskGroupName = "spark-executor" + + // https://yunikorn.apache.org/docs/next/user_guide/labels_and_annotations_in_yunikorn/ + taskGroupNameAnnotation = "yunikorn.apache.org/task-group-name" + taskGroupsAnnotation = "yunikorn.apache.org/task-groups" + queueLabel = "queue" +) + +// This struct has been defined separately rather than imported so that tags can be included for JSON marshalling +// https://github.com/apache/yunikorn-k8shim/blob/207e4031c6484c965fca4018b6b8176afc5956b4/pkg/cache/amprotocol.go#L47-L56 +type taskGroup struct { + Name string `json:"name"` + MinMember int32 `json:"minMember"` + MinResource map[string]string `json:"minResource,omitempty"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + Tolerations []v1.Toleration `json:"tolerations,omitempty"` + Affinity *v1.Affinity `json:"affinity,omitempty"` + Labels map[string]string `json:"labels,omitempty"` +} + +type Scheduler struct{} + +func Factory(_ scheduler.Config) (scheduler.Interface, error) { + return &Scheduler{}, nil +} + +func (s *Scheduler) Name() string { + return SchedulerName +} + +func (s *Scheduler) ShouldSchedule(_ *v1beta2.SparkApplication) bool { + // Yunikorn gets all the information it needs from pod annotations on the originating pod, + // so no additional resources need to be created + return true +} + +func (s *Scheduler) Schedule(app *v1beta2.SparkApplication) error { + driverMinResources, err := resourceusage.DriverPodRequests(app) + if err != nil { + return fmt.Errorf("failed to calculate driver minResources: %w", err) + } + + taskGroups := []taskGroup{ + { + Name: driverTaskGroupName, + MinMember: 1, + MinResource: driverMinResources, + NodeSelector: mergeNodeSelector(app.Spec.NodeSelector, app.Spec.Driver.NodeSelector), + Tolerations: app.Spec.Driver.Tolerations, + Affinity: app.Spec.Driver.Affinity, + Labels: app.Spec.Driver.Labels, + }, + } + + // A minMember of zero is not a valid config for a Yunikorn task group, so we should leave out + // the executor task group completely if the initial number of executors is zero + if numInitialExecutors := util.GetInitialExecutorNumber(app); numInitialExecutors > 0 { + executorMinResources, err := resourceusage.ExecutorPodRequests(app) + if err != nil { + return fmt.Errorf("failed to calculate executor minResources: %w", err) + } + + taskGroups = append(taskGroups, taskGroup{ + Name: executorTaskGroupName, + MinMember: numInitialExecutors, + MinResource: executorMinResources, + NodeSelector: mergeNodeSelector(app.Spec.NodeSelector, app.Spec.Executor.NodeSelector), + Tolerations: app.Spec.Executor.Tolerations, + Affinity: app.Spec.Executor.Affinity, + Labels: app.Spec.Executor.Labels, + }) + } + + // Ensure that the driver and executors pods are scheduled by Yunikorn + // if it is installed with the admissions controller disabled + app.Spec.Driver.SchedulerName = util.StringPtr(SchedulerName) + app.Spec.Executor.SchedulerName = util.StringPtr(SchedulerName) + + // Yunikorn re-uses the application ID set by the driver under the label "spark-app-selector", + // so there is no need to set an application ID + // https://github.com/apache/yunikorn-k8shim/blob/2278b3217c702ccb796e4d623bc7837625e5a4ec/pkg/common/utils/utils.go#L168-L171 + addQueueLabels(app) + if err := addTaskGroupAnnotations(app, taskGroups); err != nil { + return fmt.Errorf("failed to add task group annotations: %w", err) + } + + return nil +} + +func (s *Scheduler) Cleanup(_ *v1beta2.SparkApplication) error { + // No additional resources are created so there's nothing to be cleaned up + return nil +} + +func addTaskGroupAnnotations(app *v1beta2.SparkApplication, taskGroups []taskGroup) error { + marshalledTaskGroups, err := json.Marshal(taskGroups) + if err != nil { + return fmt.Errorf("failed to marshal taskGroups: %w", err) + } + + if app.Spec.Driver.Annotations == nil { + app.Spec.Driver.Annotations = make(map[string]string) + } + if app.Spec.Executor.Annotations == nil { + app.Spec.Executor.Annotations = make(map[string]string) + } + + app.Spec.Driver.Annotations[taskGroupNameAnnotation] = driverTaskGroupName + app.Spec.Executor.Annotations[taskGroupNameAnnotation] = executorTaskGroupName + + // The task group definition only needs to be present on the originating pod + // https://yunikorn.apache.org/docs/next/user_guide/gang_scheduling/#app-configuration + app.Spec.Driver.Annotations[taskGroupsAnnotation] = string(marshalledTaskGroups) + + return nil +} + +func addQueueLabels(app *v1beta2.SparkApplication) { + if app.Spec.BatchSchedulerOptions != nil && app.Spec.BatchSchedulerOptions.Queue != nil { + if app.Spec.Driver.Labels == nil { + app.Spec.Driver.Labels = make(map[string]string) + } + if app.Spec.Executor.Labels == nil { + app.Spec.Executor.Labels = make(map[string]string) + } + + app.Spec.Driver.Labels[queueLabel] = *app.Spec.BatchSchedulerOptions.Queue + app.Spec.Executor.Labels[queueLabel] = *app.Spec.BatchSchedulerOptions.Queue + } +} + +func mergeNodeSelector(appNodeSelector map[string]string, podNodeSelector map[string]string) map[string]string { + // app.Spec.NodeSelector is passed "spark.kubernetes.node.selector.%s", which means it will be present + // in the pod definition before the mutating webhook. The mutating webhook merges the driver/executor-specific + // NodeSelector with what's already present + nodeSelector := make(map[string]string) + maps.Copy(nodeSelector, appNodeSelector) + maps.Copy(nodeSelector, podNodeSelector) + + // Return nil if there are no entries in the map so that the field is skipped during JSON marshalling + if len(nodeSelector) == 0 { + return nil + } + return nodeSelector +} diff --git a/internal/scheduler/yunikorn/scheduler_test.go b/internal/scheduler/yunikorn/scheduler_test.go new file mode 100644 index 000000000..8d44e9df9 --- /dev/null +++ b/internal/scheduler/yunikorn/scheduler_test.go @@ -0,0 +1,331 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package yunikorn + +import ( + "encoding/json" + "testing" + + v1 "k8s.io/api/core/v1" + + "github.com/stretchr/testify/assert" + + "github.com/kubeflow/spark-operator/api/v1beta2" + "github.com/kubeflow/spark-operator/pkg/util" +) + +func TestSchedule(t *testing.T) { + testCases := []struct { + name string + app *v1beta2.SparkApplication + expected []taskGroup + }{ + { + name: "spark-pi-yunikorn", + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Type: v1beta2.SparkApplicationTypeScala, + Driver: v1beta2.DriverSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: util.Int32Ptr(1), + CoreLimit: util.StringPtr("1200m"), + Memory: util.StringPtr("512m"), + }, + }, + Executor: v1beta2.ExecutorSpec{ + Instances: util.Int32Ptr(2), + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: util.Int32Ptr(1), + CoreLimit: util.StringPtr("1200m"), + Memory: util.StringPtr("512m"), + }, + }, + BatchSchedulerOptions: &v1beta2.BatchSchedulerConfiguration{ + Queue: util.StringPtr("root.default"), + }, + }, + }, + expected: []taskGroup{ + { + Name: "spark-driver", + MinMember: 1, + MinResource: map[string]string{ + "cpu": "1", + "memory": "896Mi", // 512Mi + 384Mi min overhead + }, + }, + { + Name: "spark-executor", + MinMember: 2, + MinResource: map[string]string{ + "cpu": "1", + "memory": "896Mi", // 512Mi + 384Mi min overhead + }, + }, + }, + }, + { + name: "Dynamic allocation and memory overhead", + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Type: v1beta2.SparkApplicationTypePython, + MemoryOverheadFactor: util.StringPtr("0.3"), + Driver: v1beta2.DriverSpec{ + CoreRequest: util.StringPtr("2000m"), + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: util.Int32Ptr(4), + Memory: util.StringPtr("8g"), + }, + }, + Executor: v1beta2.ExecutorSpec{ + Instances: util.Int32Ptr(4), + SparkPodSpec: v1beta2.SparkPodSpec{ + MemoryOverhead: util.StringPtr("2g"), + Cores: util.Int32Ptr(8), + Memory: util.StringPtr("64g"), + }, + }, + DynamicAllocation: &v1beta2.DynamicAllocation{ + Enabled: true, + InitialExecutors: util.Int32Ptr(8), + MinExecutors: util.Int32Ptr(2), + }, + BatchSchedulerOptions: &v1beta2.BatchSchedulerConfiguration{ + Queue: util.StringPtr("root.default"), + }, + }, + }, + expected: []taskGroup{ + { + Name: "spark-driver", + MinMember: 1, + MinResource: map[string]string{ + "cpu": "2000m", // CoreRequest takes precedence over Cores + "memory": "10649Mi", // 1024Mi * 8 * 1.3 (manually specified overhead) + }, + }, + { + Name: "spark-executor", + MinMember: 8, // Max of instances, dynamic allocation min and initial + MinResource: map[string]string{ + "cpu": "8", + "memory": "67584Mi", // 1024Mi * 64 + 1024 * 2 (executor memory overhead takes precedence) + }, + }, + }, + }, + { + name: "Node selectors, tolerations, affinity and labels", + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Type: v1beta2.SparkApplicationTypePython, + NodeSelector: map[string]string{"key": "value"}, + Driver: v1beta2.DriverSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: util.Int32Ptr(1), + Memory: util.StringPtr("1g"), + NodeSelector: map[string]string{"key": "newvalue", "key2": "value2"}, + Tolerations: []v1.Toleration{ + { + Key: "example-key", + Operator: v1.TolerationOpEqual, + Value: "example-value", + Effect: v1.TaintEffectNoSchedule, + }, + }, + }, + }, + Executor: v1beta2.ExecutorSpec{ + Instances: util.Int32Ptr(1), + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: util.Int32Ptr(1), + Memory: util.StringPtr("1g"), + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "another-key", + Operator: v1.NodeSelectorOpIn, + Values: []string{"value1", "value2"}, + }, + }, + }, + }, + }, + }, + }, + Labels: map[string]string{"label": "value"}, + }, + }, + }, + }, + expected: []taskGroup{ + { + Name: "spark-driver", + MinMember: 1, + MinResource: map[string]string{ + "cpu": "1", + "memory": "1433Mi", // 1024Mi * 1.4 non-JVM overhead + }, + NodeSelector: map[string]string{"key": "newvalue", "key2": "value2"}, + Tolerations: []v1.Toleration{ + { + Key: "example-key", + Operator: v1.TolerationOpEqual, + Value: "example-value", + Effect: v1.TaintEffectNoSchedule, + }, + }, + }, + { + Name: "spark-executor", + MinMember: 1, + MinResource: map[string]string{ + "cpu": "1", + "memory": "1433Mi", // 1024Mi * 1.4 non-JVM overhead + }, + NodeSelector: map[string]string{"key": "value"}, // No executor specific node-selector + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "another-key", + Operator: v1.NodeSelectorOpIn, + Values: []string{"value1", "value2"}, + }, + }, + }, + }, + }, + }, + }, + Labels: map[string]string{"label": "value"}, + }, + }, + }, + { + name: "spark.executor.pyspark.memory", + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Type: v1beta2.SparkApplicationTypePython, + Driver: v1beta2.DriverSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: util.Int32Ptr(1), + Memory: util.StringPtr("512m"), + }, + }, + Executor: v1beta2.ExecutorSpec{ + Instances: util.Int32Ptr(2), + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: util.Int32Ptr(1), + Memory: util.StringPtr("512m"), + }, + }, + SparkConf: map[string]string{ + "spark.executor.pyspark.memory": "500m", + }, + }, + }, + expected: []taskGroup{ + { + Name: "spark-driver", + MinMember: 1, + MinResource: map[string]string{ + "cpu": "1", + "memory": "896Mi", // 512Mi + 384Mi min overhead + }, + }, + { + Name: "spark-executor", + MinMember: 2, + MinResource: map[string]string{ + "cpu": "1", + // 512Mi + 384Mi min overhead + 500Mi spark.executor.pyspark.memory + "memory": "1396Mi", + }, + }, + }, + }, + } + + scheduler := &Scheduler{} + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + marshalledExpected, err := json.Marshal(tc.expected) + if err != nil { + t.Fatalf("Failed to marshal expected task groups: %v", err) + } + + err = scheduler.Schedule(tc.app) + assert.Nil(t, err) + assert.JSONEq(t, string(marshalledExpected), tc.app.Spec.Driver.Annotations[taskGroupsAnnotation]) + + options := tc.app.Spec.BatchSchedulerOptions + if options != nil && options.Queue != nil { + assert.Equal(t, *options.Queue, tc.app.Spec.Driver.Labels[queueLabel]) + assert.Equal(t, *options.Queue, tc.app.Spec.Executor.Labels[queueLabel]) + } + + assert.Equal(t, "yunikorn", *tc.app.Spec.Driver.SchedulerName) + assert.Equal(t, "yunikorn", *tc.app.Spec.Executor.SchedulerName) + }) + } +} + +func TestMergeNodeSelector(t *testing.T) { + testCases := []struct { + appNodeSelector map[string]string + podNodeSelector map[string]string + expected map[string]string + }{ + { + appNodeSelector: map[string]string{}, + podNodeSelector: map[string]string{}, + expected: nil, + }, + { + appNodeSelector: map[string]string{"key1": "value1"}, + podNodeSelector: map[string]string{}, + expected: map[string]string{"key1": "value1"}, + }, + { + appNodeSelector: map[string]string{}, + podNodeSelector: map[string]string{"key1": "value1"}, + expected: map[string]string{"key1": "value1"}, + }, + { + appNodeSelector: map[string]string{"key1": "value1"}, + podNodeSelector: map[string]string{"key2": "value2"}, + expected: map[string]string{"key1": "value1", "key2": "value2"}, + }, + { + appNodeSelector: map[string]string{"key1": "value1"}, + podNodeSelector: map[string]string{"key1": "value2", "key2": "value2"}, + expected: map[string]string{"key1": "value2", "key2": "value2"}, + }, + } + + for _, tc := range testCases { + assert.Equal(t, tc.expected, mergeNodeSelector(tc.appNodeSelector, tc.podNodeSelector)) + } +} diff --git a/internal/webhook/sparkpod_defaulter.go b/internal/webhook/sparkpod_defaulter.go index c7a7a858c..cc724a660 100644 --- a/internal/webhook/sparkpod_defaulter.go +++ b/internal/webhook/sparkpod_defaulter.go @@ -23,6 +23,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -49,15 +50,19 @@ type SparkPodDefaulter struct { var _ admission.CustomDefaulter = &SparkPodDefaulter{} // NewSparkPodDefaulter creates a new SparkPodDefaulter instance. -func NewSparkPodDefaulter(client client.Client, sparkJobNamespaces []string) *SparkPodDefaulter { - m := make(map[string]bool) - for _, ns := range sparkJobNamespaces { - m[ns] = true +func NewSparkPodDefaulter(client client.Client, namespaces []string) *SparkPodDefaulter { + nsMap := make(map[string]bool) + if len(namespaces) == 0 { + nsMap[metav1.NamespaceAll] = true + } else { + for _, ns := range namespaces { + nsMap[ns] = true + } } return &SparkPodDefaulter{ client: client, - sparkJobNamespaces: m, + sparkJobNamespaces: nsMap, } } @@ -93,7 +98,7 @@ func (d *SparkPodDefaulter) Default(ctx context.Context, obj runtime.Object) err } func (d *SparkPodDefaulter) isSparkJobNamespace(ns string) bool { - return d.sparkJobNamespaces[ns] + return d.sparkJobNamespaces[metav1.NamespaceAll] || d.sparkJobNamespaces[ns] } type mutateSparkPodOption func(pod *corev1.Pod, app *v1beta2.SparkApplication) error @@ -477,19 +482,19 @@ func addSchedulerName(pod *corev1.Pod, app *v1beta2.SparkApplication) error { func addPriorityClassName(pod *corev1.Pod, app *v1beta2.SparkApplication) error { var priorityClassName *string - if app.Spec.BatchSchedulerOptions != nil { - priorityClassName = app.Spec.BatchSchedulerOptions.PriorityClassName + + if util.IsDriverPod(pod) { + priorityClassName = app.Spec.Driver.PriorityClassName + } else if util.IsExecutorPod(pod) { + priorityClassName = app.Spec.Executor.PriorityClassName } if priorityClassName != nil && *priorityClassName != "" { pod.Spec.PriorityClassName = *priorityClassName - if pod.Spec.Priority != nil { - pod.Spec.Priority = nil - } - if pod.Spec.PreemptionPolicy != nil { - pod.Spec.PreemptionPolicy = nil - } + pod.Spec.Priority = nil + pod.Spec.PreemptionPolicy = nil } + return nil } diff --git a/internal/webhook/sparkpod_defaulter_test.go b/internal/webhook/sparkpod_defaulter_test.go index f81eac24b..edbc6bd70 100644 --- a/internal/webhook/sparkpod_defaulter_test.go +++ b/internal/webhook/sparkpod_defaulter_test.go @@ -781,14 +781,13 @@ func TestPatchSparkPod_PriorityClassName(t *testing.T) { UID: "spark-test-1", }, Spec: v1beta2.SparkApplicationSpec{ - BatchSchedulerOptions: &v1beta2.BatchSchedulerConfiguration{ - PriorityClassName: &priorityClassName, - }, Driver: v1beta2.DriverSpec{ - SparkPodSpec: v1beta2.SparkPodSpec{}, + SparkPodSpec: v1beta2.SparkPodSpec{}, + PriorityClassName: &priorityClassName, }, Executor: v1beta2.ExecutorSpec{ - SparkPodSpec: v1beta2.SparkPodSpec{}, + SparkPodSpec: v1beta2.SparkPodSpec{}, + PriorityClassName: &priorityClassName, }, }, } diff --git a/pkg/util/sparkapplication.go b/pkg/util/sparkapplication.go index 273ad7401..29b8dab81 100644 --- a/pkg/util/sparkapplication.go +++ b/pkg/util/sparkapplication.go @@ -51,6 +51,12 @@ func GetApplicationState(app *v1beta2.SparkApplication) v1beta2.ApplicationState return app.Status.AppState.State } +// IsTerminated returns whether the given SparkApplication is terminated. +func IsTerminated(app *v1beta2.SparkApplication) bool { + return app.Status.AppState.State == v1beta2.ApplicationStateCompleted || + app.Status.AppState.State == v1beta2.ApplicationStateFailed +} + // IsExpired returns whether the given SparkApplication is expired. func IsExpired(app *v1beta2.SparkApplication) bool { // The application has no TTL defined and will never expire. @@ -428,3 +434,29 @@ func GetExecutorRequestResource(app *v1beta2.SparkApplication) corev1.ResourceLi } return SumResourceList(resourceList) } + +// GetInitialExecutorNumber calculates the initial number of executor pods that will be requested by the driver on startup. +func GetInitialExecutorNumber(app *v1beta2.SparkApplication) int32 { + // The reference for this implementation: https://github.com/apache/spark/blob/ba208b9ca99990fa329c36b28d0aa2a5f4d0a77e/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackendUtils.scala#L31 + var initialNumExecutors int32 + + dynamicAllocationEnabled := app.Spec.DynamicAllocation != nil && app.Spec.DynamicAllocation.Enabled + if dynamicAllocationEnabled { + if app.Spec.Executor.Instances != nil { + initialNumExecutors = max(initialNumExecutors, *app.Spec.Executor.Instances) + } + if app.Spec.DynamicAllocation.InitialExecutors != nil { + initialNumExecutors = max(initialNumExecutors, *app.Spec.DynamicAllocation.InitialExecutors) + } + if app.Spec.DynamicAllocation.MinExecutors != nil { + initialNumExecutors = max(initialNumExecutors, *app.Spec.DynamicAllocation.MinExecutors) + } + } else { + initialNumExecutors = 2 + if app.Spec.Executor.Instances != nil { + initialNumExecutors = *app.Spec.Executor.Instances + } + } + + return initialNumExecutors +} diff --git a/spark-docker/Dockerfile b/spark-docker/Dockerfile index 502bdd160..20ca69fbb 100644 --- a/spark-docker/Dockerfile +++ b/spark-docker/Dockerfile @@ -14,7 +14,7 @@ # limitations under the License. # -ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v3.1.1 +ARG SPARK_IMAGE=spark:3.5.2 FROM ${SPARK_IMAGE} # Switch to user root so we can add additional jars and configuration files. diff --git a/sparkctl/README.md b/sparkctl/README.md index 188006e13..7954a1cc3 100644 --- a/sparkctl/README.md +++ b/sparkctl/README.md @@ -2,10 +2,39 @@ `sparkctl` is a command-line tool of the Spark Operator for creating, listing, checking status of, getting logs of, and deleting `SparkApplication`s. It can also do port forwarding from a local port to the Spark web UI port for accessing the Spark web UI on the driver. Each function is implemented as a sub-command of `sparkctl`. -To build `sparkctl`, make sure you followed build steps [here](https://github.com/kubeflow/spark-operator/blob/master/docs/developer-guide.md#build-the-operator) and have all the dependencies, then run the following command from within `sparkctl/`: +To build the `sparkctl` binary, run the following command in the root directory of the project: ```bash -go build -o sparkctl +make build-sparkctl +``` + +Then the `sparkctl` binary can be found in the `bin` directory: + +```bash +$ bin/sparkctl --help +sparkctl is the command-line tool for working with the Spark Operator. It supports creating, deleting and + checking status of SparkApplication objects. It also supports fetching application logs. + +Usage: + sparkctl [command] + +Available Commands: + completion Generate the autocompletion script for the specified shell + create Create a SparkApplication object + delete Delete a SparkApplication object + event Shows SparkApplication events + forward Start to forward a local port to the remote port of the driver UI + help Help about any command + list List SparkApplication objects + log log is a sub-command of sparkctl that fetches logs of a Spark application. + status Check status of a SparkApplication + +Flags: + -h, --help help for sparkctl + -k, --kubeconfig string The path to the local Kubernetes configuration file (default "/Users/chenyi/.kube/config") + -n, --namespace string The namespace in which the SparkApplication is to be created (default "default") + +Use "sparkctl [command] --help" for more information about a command. ``` ## Flags diff --git a/sparkctl/build.sh b/sparkctl/build.sh deleted file mode 100755 index f4cca33ae..000000000 --- a/sparkctl/build.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2019 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -SCRIPT=$(basename ${BASH_SOURCE[0]}) -DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" -set -e -platforms=("linux:amd64" "darwin:amd64") -for platform in "${platforms[@]}"; do - GOOS="${platform%%:*}" - GOARCH="${platform#*:}" - echo $GOOS - echo $GOARCH - CGO_ENABLED=0 GOOS=$GOOS GOARCH=$GOARCH go build -buildvcs=false -o sparkctl-${GOOS}-${GOARCH} -done diff --git a/test/e2e/sparkapplication_test.go b/test/e2e/sparkapplication_test.go index a3e8829a0..113326cea 100644 --- a/test/e2e/sparkapplication_test.go +++ b/test/e2e/sparkapplication_test.go @@ -21,26 +21,22 @@ import ( "os" "path/filepath" "strings" - "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/yaml" "github.com/kubeflow/spark-operator/api/v1beta2" + "github.com/kubeflow/spark-operator/pkg/common" "github.com/kubeflow/spark-operator/pkg/util" ) -const ( - PollInterval = 1 * time.Second - WaitTimeout = 300 * time.Second -) - var _ = Describe("Example SparkApplication", func() { Context("spark-pi", func() { ctx := context.Background() @@ -72,15 +68,7 @@ var _ = Describe("Example SparkApplication", func() { It("should complete successfully", func() { By("Waiting for SparkApplication to complete") key := types.NamespacedName{Namespace: app.Namespace, Name: app.Name} - cancelCtx, cancelFunc := context.WithTimeout(ctx, WaitTimeout) - defer cancelFunc() - Expect(wait.PollUntilContextCancel(cancelCtx, PollInterval, true, func(ctx context.Context) (done bool, err error) { - err = k8sClient.Get(ctx, key, app) - if app.Status.AppState.State == v1beta2.ApplicationStateCompleted { - return true, nil - } - return false, err - })).NotTo(HaveOccurred()) + Expect(waitForSparkApplicationCompleted(ctx, key)).NotTo(HaveOccurred()) By("Checking out driver logs") driverPodName := util.GetDriverPodName(app) @@ -145,21 +133,43 @@ var _ = Describe("Example SparkApplication", func() { } }) - It("Should complete successfully", func() { + It("Should complete successfully with configmap mounted", func() { By("Waiting for SparkApplication to complete") key := types.NamespacedName{Namespace: app.Namespace, Name: app.Name} - cancelCtx, cancelFunc := context.WithTimeout(ctx, WaitTimeout) - defer cancelFunc() - Expect(wait.PollUntilContextCancel(cancelCtx, PollInterval, true, func(ctx context.Context) (done bool, err error) { - err = k8sClient.Get(ctx, key, app) - if app.Status.AppState.State == v1beta2.ApplicationStateCompleted { - return true, nil + Expect(waitForSparkApplicationCompleted(ctx, key)).NotTo(HaveOccurred()) + + By("Checking out whether volumes are mounted to driver pod") + driverPodName := util.GetDriverPodName(app) + driverPodKey := types.NamespacedName{Namespace: app.Namespace, Name: driverPodName} + driverPod := &corev1.Pod{} + Expect(k8sClient.Get(ctx, driverPodKey, driverPod)).NotTo(HaveOccurred()) + hasVolumes := false + hasVolumeMounts := false + for _, volume := range app.Spec.Volumes { + for _, podVolume := range driverPod.Spec.Volumes { + if volume.Name == podVolume.Name { + hasVolumes = true + break + } } - return false, err - })).NotTo(HaveOccurred()) + } + for _, volumeMount := range app.Spec.Driver.VolumeMounts { + for _, container := range driverPod.Spec.Containers { + if container.Name != common.SparkDriverContainerName { + continue + } + for _, podVolumeMount := range container.VolumeMounts { + if equality.Semantic.DeepEqual(volumeMount, podVolumeMount) { + hasVolumeMounts = true + break + } + } + } + } + Expect(hasVolumes).To(BeTrue()) + Expect(hasVolumeMounts).To(BeTrue()) By("Checking out driver logs") - driverPodName := util.GetDriverPodName(app) bytes, err := clientset.CoreV1().Pods(app.Namespace).GetLogs(driverPodName, &corev1.PodLogOptions{}).Do(ctx).Raw() Expect(err).NotTo(HaveOccurred()) Expect(bytes).NotTo(BeEmpty()) @@ -197,18 +207,28 @@ var _ = Describe("Example SparkApplication", func() { It("Should complete successfully", func() { By("Waiting for SparkApplication to complete") key := types.NamespacedName{Namespace: app.Namespace, Name: app.Name} - cancelCtx, cancelFunc := context.WithTimeout(ctx, WaitTimeout) - defer cancelFunc() - Expect(wait.PollUntilContextCancel(cancelCtx, PollInterval, true, func(ctx context.Context) (done bool, err error) { - err = k8sClient.Get(ctx, key, app) - if app.Status.AppState.State == v1beta2.ApplicationStateCompleted { - return true, nil + Expect(waitForSparkApplicationCompleted(ctx, key)).NotTo(HaveOccurred()) + + By("Checking out whether resource requests and limits of driver pod are set") + driverPodName := util.GetDriverPodName(app) + driverPodKey := types.NamespacedName{Namespace: app.Namespace, Name: driverPodName} + driverPod := &corev1.Pod{} + Expect(k8sClient.Get(ctx, driverPodKey, driverPod)).NotTo(HaveOccurred()) + for _, container := range driverPod.Spec.Containers { + if container.Name != common.SparkDriverContainerName { + continue } - return false, err - })).NotTo(HaveOccurred()) + if app.Spec.Driver.CoreRequest != nil { + Expect(container.Resources.Requests.Cpu().Equal(resource.MustParse(*app.Spec.Driver.CoreRequest))).To(BeTrue()) + } + if app.Spec.Driver.CoreLimit != nil { + Expect(container.Resources.Limits.Cpu().Equal(resource.MustParse(*app.Spec.Driver.CoreLimit))).To(BeTrue()) + } + Expect(container.Resources.Requests.Memory).NotTo(BeNil()) + Expect(container.Resources.Limits.Memory).NotTo(BeNil()) + } By("Checking out driver logs") - driverPodName := util.GetDriverPodName(app) bytes, err := clientset.CoreV1().Pods(app.Namespace).GetLogs(driverPodName, &corev1.PodLogOptions{}).Do(ctx).Raw() Expect(err).NotTo(HaveOccurred()) Expect(bytes).NotTo(BeEmpty()) @@ -246,15 +266,7 @@ var _ = Describe("Example SparkApplication", func() { It("Should complete successfully", func() { By("Waiting for SparkApplication to complete") key := types.NamespacedName{Namespace: app.Namespace, Name: app.Name} - cancelCtx, cancelFunc := context.WithTimeout(ctx, WaitTimeout) - defer cancelFunc() - Expect(wait.PollUntilContextCancel(cancelCtx, PollInterval, true, func(ctx context.Context) (done bool, err error) { - err = k8sClient.Get(ctx, key, app) - if app.Status.AppState.State == v1beta2.ApplicationStateCompleted { - return true, nil - } - return false, err - })).NotTo(HaveOccurred()) + Expect(waitForSparkApplicationCompleted(ctx, key)).NotTo(HaveOccurred()) By("Checking out driver logs") driverPodName := util.GetDriverPodName(app) diff --git a/test/e2e/suit_test.go b/test/e2e/suit_test.go index 4c60f9762..85dc3ed04 100644 --- a/test/e2e/suit_test.go +++ b/test/e2e/suit_test.go @@ -27,12 +27,15 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/chart/loader" + "helm.sh/helm/v3/pkg/chartutil" "helm.sh/helm/v3/pkg/cli" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" @@ -53,6 +56,12 @@ import ( const ( ReleaseName = "spark-operator" ReleaseNamespace = "spark-operator" + + MutatingWebhookName = "spark-operator-webhook" + ValidatingWebhookName = "spark-operator-webhook" + + PollInterval = 1 * time.Second + WaitTimeout = 5 * time.Minute ) var ( @@ -123,14 +132,25 @@ var _ = BeforeSuite(func() { installAction.ReleaseName = ReleaseName installAction.Namespace = envSettings.Namespace() installAction.Wait = true - installAction.Timeout = 5 * time.Minute + installAction.Timeout = WaitTimeout chartPath := filepath.Join("..", "..", "charts", "spark-operator-chart") chart, err := loader.Load(chartPath) Expect(err).NotTo(HaveOccurred()) Expect(chart).NotTo(BeNil()) - release, err := installAction.Run(chart, nil) + values, err := chartutil.ReadValuesFile(filepath.Join(chartPath, "ci", "ci-values.yaml")) + Expect(err).NotTo(HaveOccurred()) + Expect(values).NotTo(BeNil()) + release, err := installAction.Run(chart, values) Expect(err).NotTo(HaveOccurred()) Expect(release).NotTo(BeNil()) + + By("Waiting for the webhooks to be ready") + mutatingWebhookKey := types.NamespacedName{Name: MutatingWebhookName} + validatingWebhookKey := types.NamespacedName{Name: ValidatingWebhookName} + Expect(waitForMutatingWebhookReady(context.Background(), mutatingWebhookKey)).NotTo(HaveOccurred()) + Expect(waitForValidatingWebhookReady(context.Background(), validatingWebhookKey)).NotTo(HaveOccurred()) + // TODO: Remove this when there is a better way to ensure the webhooks are ready before running the e2e tests. + time.Sleep(10 * time.Second) }) var _ = AfterSuite(func() { @@ -144,7 +164,7 @@ var _ = AfterSuite(func() { uninstallAction := action.NewUninstall(actionConfig) Expect(uninstallAction).NotTo(BeNil()) uninstallAction.Wait = true - uninstallAction.Timeout = 5 * time.Minute + uninstallAction.Timeout = WaitTimeout resp, err := uninstallAction.Run(ReleaseName) Expect(err).To(BeNil()) Expect(resp).NotTo(BeNil()) @@ -157,3 +177,95 @@ var _ = AfterSuite(func() { err = testEnv.Stop() Expect(err).ToNot(HaveOccurred()) }) + +func waitForMutatingWebhookReady(ctx context.Context, key types.NamespacedName) error { + cancelCtx, cancelFunc := context.WithTimeout(ctx, WaitTimeout) + defer cancelFunc() + + mutatingWebhook := admissionregistrationv1.MutatingWebhookConfiguration{} + err := wait.PollUntilContextCancel(cancelCtx, PollInterval, true, func(ctx context.Context) (bool, error) { + if err := k8sClient.Get(ctx, key, &mutatingWebhook); err != nil { + return false, err + } + + for _, wh := range mutatingWebhook.Webhooks { + // Checkout webhook CA certificate + if wh.ClientConfig.CABundle == nil { + return false, nil + } + + // Checkout webhook service endpoints + svcRef := wh.ClientConfig.Service + if svcRef == nil { + return false, fmt.Errorf("webhook service is nil") + } + endpoints := corev1.Endpoints{} + endpointsKey := types.NamespacedName{Namespace: svcRef.Namespace, Name: svcRef.Name} + if err := k8sClient.Get(ctx, endpointsKey, &endpoints); err != nil { + return false, err + } + if len(endpoints.Subsets) == 0 { + return false, nil + } + } + + return true, nil + }) + return err +} + +func waitForValidatingWebhookReady(ctx context.Context, key types.NamespacedName) error { + cancelCtx, cancelFunc := context.WithTimeout(ctx, WaitTimeout) + defer cancelFunc() + + validatingWebhook := admissionregistrationv1.ValidatingWebhookConfiguration{} + err := wait.PollUntilContextCancel(cancelCtx, PollInterval, true, func(ctx context.Context) (bool, error) { + if err := k8sClient.Get(ctx, key, &validatingWebhook); err != nil { + return false, err + } + + for _, wh := range validatingWebhook.Webhooks { + // Checkout webhook CA certificate + if wh.ClientConfig.CABundle == nil { + return false, nil + } + + // Checkout webhook service endpoints + svcRef := wh.ClientConfig.Service + if svcRef == nil { + return false, fmt.Errorf("webhook service is nil") + } + endpoints := corev1.Endpoints{} + endpointsKey := types.NamespacedName{Namespace: svcRef.Namespace, Name: svcRef.Name} + if err := k8sClient.Get(ctx, endpointsKey, &endpoints); err != nil { + return false, err + } + if len(endpoints.Subsets) == 0 { + return false, nil + } + } + + return true, nil + }) + return err +} + +func waitForSparkApplicationCompleted(ctx context.Context, key types.NamespacedName) error { + cancelCtx, cancelFunc := context.WithTimeout(ctx, WaitTimeout) + defer cancelFunc() + + app := &v1beta2.SparkApplication{} + err := wait.PollUntilContextCancel(cancelCtx, PollInterval, true, func(ctx context.Context) (bool, error) { + if err := k8sClient.Get(ctx, key, app); err != nil { + return false, err + } + switch app.Status.AppState.State { + case v1beta2.ApplicationStateFailedSubmission, v1beta2.ApplicationStateFailed: + return false, fmt.Errorf(app.Status.AppState.ErrorMessage) + case v1beta2.ApplicationStateCompleted: + return true, nil + } + return false, nil + }) + return err +}