From ef6db4f6a5c7f71679e891a204ee75f73a48ba65 Mon Sep 17 00:00:00 2001 From: pmahindrakar-oss Date: Fri, 3 Sep 2021 12:26:28 +0530 Subject: [PATCH] Flyte native scheduler (#228) * Flyte native scheduler Signed-off-by: Prafulla Mahindrakar Co-authored-by: Yuvraj --- flyteadmin/.github/workflows/master.yml | 11 + flyteadmin/.github/workflows/pull_request.yml | 16 +- flyteadmin/Dockerfile.scheduler | 36 ++ flyteadmin/Makefile | 20 ++ flyteadmin/cmd/scheduler/entrypoints/root.go | 74 ++++ .../cmd/scheduler/entrypoints/scheduler.go | 75 ++++ flyteadmin/cmd/scheduler/main.go | 14 + flyteadmin/flyteadmin_config.yaml | 4 + flyteadmin/go.mod | 5 + flyteadmin/go.sum | 4 +- .../schedule/aws/cloud_watch_scheduler.go | 49 ++- .../aws/cloud_watch_scheduler_test.go | 13 +- .../async/schedule/aws/serialization_test.go | 1 + flyteadmin/pkg/async/schedule/aws/shared.go | 4 +- .../pkg/async/schedule/aws/shared_test.go | 6 +- .../async/schedule/aws/workflow_executor.go | 2 +- flyteadmin/pkg/async/schedule/factory.go | 11 +- .../schedule/interfaces/event_scheduler.go | 10 +- .../schedule/mocks/mock_event_scheduler.go | 16 + .../async/schedule/noop/event_scheduler.go | 8 +- .../pkg/manager/impl/launch_plan_manager.go | 49 +-- .../manager/impl/launch_plan_manager_test.go | 36 +- .../pkg/repositories/config/database.go | 16 + .../pkg/repositories/config/migrations.go | 21 ++ .../pkg/repositories/config/postgres.go | 1 - flyteadmin/pkg/repositories/errors/errors.go | 11 +- flyteadmin/pkg/repositories/factory.go | 3 + .../pkg/repositories/mocks/repository.go | 58 +-- flyteadmin/pkg/repositories/postgres_repo.go | 58 +-- flyteadmin/pkg/rpc/adminservice/base.go | 2 +- .../runtime/application_config_provider.go | 9 +- .../interfaces/application_configuration.go | 153 ++++++++ flyteadmin/scheduler/core/doc.go | 9 + flyteadmin/scheduler/core/gocron_job.go | 41 +++ flyteadmin/scheduler/core/gocron_scheduler.go | 337 ++++++++++++++++++ flyteadmin/scheduler/core/scheduler.go | 28 ++ flyteadmin/scheduler/core/snapshot_runner.go | 26 ++ flyteadmin/scheduler/core/updater.go | 28 ++ flyteadmin/scheduler/dbapi/doc.go | 5 + .../scheduler/dbapi/event_scheduler_impl.go | 87 +++++ .../dbapi/event_scheduler_impl_test.go | 141 ++++++++ flyteadmin/scheduler/doc.go | 100 ++++++ flyteadmin/scheduler/executor/doc.go | 5 + flyteadmin/scheduler/executor/executor.go | 14 + .../scheduler/executor/executor_impl.go | 146 ++++++++ .../scheduler/executor/executor_impl_test.go | 84 +++++ flyteadmin/scheduler/identifier/doc.go | 3 + flyteadmin/scheduler/identifier/identifier.go | 71 ++++ flyteadmin/scheduler/repositories/doc.go | 4 + flyteadmin/scheduler/repositories/factory.go | 44 +++ .../repositories/gormimpl/metrics.go | 37 ++ .../gormimpl/schedulable_entity_repo.go | 148 ++++++++ .../schedule_entities_snapshot_repo.go | 57 +++ .../interfaces/schedulable_entity_repo.go | 28 ++ .../schedule_entities_snapshot_repo.go | 19 + .../schedulable_entity_repo_interface.go | 192 ++++++++++ ...edule_entities_snap_shot_repo_interface.go | 87 +++++ .../repositories/models/schedulable_entity.go | 25 ++ .../models/schedule_entities_snapshot.go | 13 + .../scheduler/repositories/postgres_repo.go | 29 ++ flyteadmin/scheduler/schedule_executor.go | 115 ++++++ .../scheduler/schedule_executor_test.go | 182 ++++++++++ flyteadmin/scheduler/snapshoter/doc.go | 5 + .../scheduler/snapshoter/persistence.go | 15 + flyteadmin/scheduler/snapshoter/reader.go | 9 + flyteadmin/scheduler/snapshoter/snapshot.go | 23 ++ .../scheduler/snapshoter/snapshot_v1.go | 51 +++ flyteadmin/scheduler/snapshoter/snapshoter.go | 88 +++++ .../scheduler/snapshoter/snapshoter_test.go | 110 ++++++ .../snapshoter/versioned_snapshot.go | 43 +++ .../snapshoter/versioned_snapshot_test.go | 50 +++ flyteadmin/scheduler/snapshoter/writer.go | 9 + 72 files changed, 3183 insertions(+), 121 deletions(-) create mode 100644 flyteadmin/Dockerfile.scheduler create mode 100644 flyteadmin/cmd/scheduler/entrypoints/root.go create mode 100644 flyteadmin/cmd/scheduler/entrypoints/scheduler.go create mode 100644 flyteadmin/cmd/scheduler/main.go create mode 100644 flyteadmin/scheduler/core/doc.go create mode 100644 flyteadmin/scheduler/core/gocron_job.go create mode 100644 flyteadmin/scheduler/core/gocron_scheduler.go create mode 100644 flyteadmin/scheduler/core/scheduler.go create mode 100644 flyteadmin/scheduler/core/snapshot_runner.go create mode 100644 flyteadmin/scheduler/core/updater.go create mode 100644 flyteadmin/scheduler/dbapi/doc.go create mode 100644 flyteadmin/scheduler/dbapi/event_scheduler_impl.go create mode 100644 flyteadmin/scheduler/dbapi/event_scheduler_impl_test.go create mode 100644 flyteadmin/scheduler/doc.go create mode 100644 flyteadmin/scheduler/executor/doc.go create mode 100644 flyteadmin/scheduler/executor/executor.go create mode 100644 flyteadmin/scheduler/executor/executor_impl.go create mode 100644 flyteadmin/scheduler/executor/executor_impl_test.go create mode 100644 flyteadmin/scheduler/identifier/doc.go create mode 100644 flyteadmin/scheduler/identifier/identifier.go create mode 100644 flyteadmin/scheduler/repositories/doc.go create mode 100644 flyteadmin/scheduler/repositories/factory.go create mode 100644 flyteadmin/scheduler/repositories/gormimpl/metrics.go create mode 100644 flyteadmin/scheduler/repositories/gormimpl/schedulable_entity_repo.go create mode 100644 flyteadmin/scheduler/repositories/gormimpl/schedule_entities_snapshot_repo.go create mode 100644 flyteadmin/scheduler/repositories/interfaces/schedulable_entity_repo.go create mode 100644 flyteadmin/scheduler/repositories/interfaces/schedule_entities_snapshot_repo.go create mode 100644 flyteadmin/scheduler/repositories/mocks/schedulable_entity_repo_interface.go create mode 100644 flyteadmin/scheduler/repositories/mocks/schedule_entities_snap_shot_repo_interface.go create mode 100644 flyteadmin/scheduler/repositories/models/schedulable_entity.go create mode 100644 flyteadmin/scheduler/repositories/models/schedule_entities_snapshot.go create mode 100644 flyteadmin/scheduler/repositories/postgres_repo.go create mode 100644 flyteadmin/scheduler/schedule_executor.go create mode 100644 flyteadmin/scheduler/schedule_executor_test.go create mode 100644 flyteadmin/scheduler/snapshoter/doc.go create mode 100644 flyteadmin/scheduler/snapshoter/persistence.go create mode 100644 flyteadmin/scheduler/snapshoter/reader.go create mode 100644 flyteadmin/scheduler/snapshoter/snapshot.go create mode 100644 flyteadmin/scheduler/snapshoter/snapshot_v1.go create mode 100644 flyteadmin/scheduler/snapshoter/snapshoter.go create mode 100644 flyteadmin/scheduler/snapshoter/snapshoter_test.go create mode 100644 flyteadmin/scheduler/snapshoter/versioned_snapshot.go create mode 100644 flyteadmin/scheduler/snapshoter/versioned_snapshot_test.go create mode 100644 flyteadmin/scheduler/snapshoter/writer.go diff --git a/flyteadmin/.github/workflows/master.yml b/flyteadmin/.github/workflows/master.yml index e61426ef15..e04fbf853a 100644 --- a/flyteadmin/.github/workflows/master.yml +++ b/flyteadmin/.github/workflows/master.yml @@ -117,6 +117,17 @@ jobs: registry: ghcr.io build_extra_args: "--compress=true" + - name: Push Flytescheduler Docker Image to Github Registry + uses: whoan/docker-build-with-cache-action@v5 + with: + username: "${{ secrets.FLYTE_BOT_USERNAME }}" + password: "${{ secrets.FLYTE_BOT_PAT }}" + image_name: flytescheduler + image_tag: latest,${{ github.sha }},${{ needs.bump-version.outputs.version }} + push_git_tag: true + dockerfile: Dockerfile.scheduler + registry: ghcr.io + build_extra_args: "--compress=true" tests-lint: name: Run tests and lint runs-on: ubuntu-latest diff --git a/flyteadmin/.github/workflows/pull_request.yml b/flyteadmin/.github/workflows/pull_request.yml index f5ce713fa3..9adc695de0 100644 --- a/flyteadmin/.github/workflows/pull_request.yml +++ b/flyteadmin/.github/workflows/pull_request.yml @@ -27,7 +27,21 @@ jobs: docker build -t flyteorg/${{ github.event.repository.name }}:latest --cache-from=flyteorg/${{ github.event.repository.name }}:builder . - name: Tag and cache docker image - run: mkdir -p /tmp/tmp/docker-images && docker save flyteorg/${{ github.event.repository.name }}:builder -o /tmp/tmp/docker-images/snapshot-builder.tar && docker save flyteorg/${{ github.event.repository.name }}:latest -o /tmp/tmp/docker-images/snapshot.tar + run: mkdir -p /tmp/tmp/docker-images && docker save flyteorg/${{ github.event.repository.name }}:builder -o /tmp/tmp/docker-images/snapshot-builder.tar && docker save flyteorg/${{ github.event.repository.name }}:latest -o /tmp/tmp/docker-images/snapshot.tar + + - name: Build Flytescheduler Docker Image + uses: whoan/docker-build-with-cache-action@v5 + with: + username: "${{ secrets.FLYTE_BOT_USERNAME }}" + password: "${{ secrets.FLYTE_BOT_PAT }}" + image_name: flytescheduler + image_tag: latest,${{ github.sha }},${{ needs.bump-version.outputs.version }} + push_git_tag: true + push_image_and_stages: false + dockerfile: Dockerfile.scheduler + registry: ghcr.io + build_extra_args: "--compress=true" + endtoend: name: End to End tests diff --git a/flyteadmin/Dockerfile.scheduler b/flyteadmin/Dockerfile.scheduler new file mode 100644 index 0000000000..82cf96cfbe --- /dev/null +++ b/flyteadmin/Dockerfile.scheduler @@ -0,0 +1,36 @@ +# WARNING: THIS FILE IS MANAGED IN THE 'BOILERPLATE' REPO AND COPIED TO OTHER REPOSITORIES. +# ONLY EDIT THIS FILE FROM WITHIN THE 'LYFT/BOILERPLATE' REPOSITORY: +# +# TO OPT OUT OF UPDATES, SEE https://github.com/lyft/boilerplate/blob/master/Readme.rst + +FROM golang:1.16.0-alpine3.13 as builder +RUN apk add git openssh-client make curl + +# COPY only the go mod files for efficient caching +COPY go.mod go.sum /go/src/github.com/flyteorg/flyteadmin/ +WORKDIR /go/src/github.com/flyteorg/flyteadmin + +# Pull dependencies +RUN go mod download + +# COPY the rest of the source code +COPY . /go/src/github.com/flyteorg/flyteadmin/ + +# This 'linux_compile_scheduler' target should compile binaries to the /artifacts directory +# The main entrypoint should be compiled to /artifacts/flytescheduler +RUN make linux_compile_scheduler + +# update the PATH to include the /artifacts directory +ENV PATH="/artifacts:${PATH}" + +# This will eventually move to centurylink/ca-certs:latest for minimum possible image size +FROM alpine:3.13 +LABEL org.opencontainers.image.source https://github.com/flyteorg/flyteadmin + +COPY --from=builder /artifacts /bin + +# Ensure the latest CA certs are present to authenticate SSL connections. +RUN apk --update add ca-certificates + +CMD ["flytescheduler"] + diff --git a/flyteadmin/Makefile b/flyteadmin/Makefile index de61de3f4a..5ce74dbf72 100644 --- a/flyteadmin/Makefile +++ b/flyteadmin/Makefile @@ -1,4 +1,5 @@ export REPOSITORY=flyteadmin +export FLYTE_SCHEDULER_REPOSITORY=flytescheduler include boilerplate/flyte/docker_build/Makefile include boilerplate/flyte/golang_test_targets/Makefile include boilerplate/flyte/end2end/Makefile @@ -15,6 +16,10 @@ update_boilerplate: @curl https://raw.githubusercontent.com/flyteorg/boilerplate/master/boilerplate/update.sh -o boilerplate/update.sh @boilerplate/update.sh +.PHONY: docker_build_scheduler +docker_build_scheduler: + docker build -t $$FLYTE_SCHEDULER_REPOSITORY:$(GIT_HASH) -f Dockerfile.scheduler . + .PHONY: integration integration: CGO_ENABLED=0 GOFLAGS="-count=1" go test -v -tags=integration ./tests/... @@ -31,14 +36,29 @@ k8s_integration_execute: compile: go build -o flyteadmin -ldflags=$(LD_FLAGS) ./cmd/ && mv ./flyteadmin ${GOPATH}/bin +.PHONY: compile_scheduler +compile_scheduler: + go build -o flytescheduler -ldflags=$(LD_FLAGS) ./cmd/scheduler/ && mv ./flytescheduler ${GOPATH}/bin + + .PHONY: linux_compile linux_compile: GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o /artifacts/flyteadmin -ldflags=$(LD_FLAGS) ./cmd/ +.PHONY: linux_compile_scheduler +linux_compile_scheduler: + GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o /artifacts/flytescheduler -ldflags=$(LD_FLAGS) ./cmd/scheduler/ + + .PHONY: server server: go run cmd/main.go serve --server.kube-config ~/.kube/config --config flyteadmin_config.yaml +.PHONY: scheduler +scheduler: + go run scheduler/main.go run --server.kube-config ~/.kube/config --config flyteadmin_config.yaml + + .PHONY: migrate migrate: go run cmd/main.go migrate run --server.kube-config ~/.kube/config --config flyteadmin_config.yaml diff --git a/flyteadmin/cmd/scheduler/entrypoints/root.go b/flyteadmin/cmd/scheduler/entrypoints/root.go new file mode 100644 index 0000000000..0b4735b58e --- /dev/null +++ b/flyteadmin/cmd/scheduler/entrypoints/root.go @@ -0,0 +1,74 @@ +package entrypoints + +import ( + "context" + "flag" + "fmt" + "os" + + "github.com/flyteorg/flytestdlib/config" + "github.com/flyteorg/flytestdlib/config/viper" + "github.com/spf13/cobra" + "github.com/spf13/pflag" +) + +var ( + cfgFile string + configAccessor = viper.NewAccessor(config.Options{}) +) + +// RootCmd represents the base command when called without any subcommands +var RootCmd = &cobra.Command{ + Use: "flytescheduler", + Short: "Flyte native scheduler to run cron and fixed rate scheduled workflows", + Long: ` +Use the run subcommand which will start the scheduler by connecting to DB containing schedules + + flytescheduler run --config flyteadmin_config.yaml --admin.endpoint dns:///localhost:8089 --admin.insecure +`, + PersistentPreRunE: func(cmd *cobra.Command, args []string) error { + return initConfig(cmd.Flags()) + }, +} + +// Execute adds all child commands to the root command sets flags appropriately. +// This is called by main.main(). It only needs to happen once to the rootCmd. +func Execute() error { + if err := RootCmd.Execute(); err != nil { + fmt.Println(err) + return err + } + return nil +} + +func init() { + // allows `$ flytescheduler --logtostderr` to work + pflag.CommandLine.AddGoFlagSet(flag.CommandLine) + + // Add persistent flags - persistent flags persist through all sub-commands + RootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is ./flyteadmin_config.yaml)") + + RootCmd.AddCommand(viper.GetConfigCommand()) + + // Allow viper to read the value of the flags + configAccessor.InitializePflags(RootCmd.PersistentFlags()) + + err := flag.CommandLine.Parse([]string{}) + if err != nil { + fmt.Println(err) + os.Exit(-1) + } +} + +func initConfig(flags *pflag.FlagSet) error { + configAccessor = viper.NewAccessor(config.Options{ + SearchPaths: []string{cfgFile, ".", "/etc/flyte/config", "$GOPATH/src/github.com/flyteorg/flyteadmin"}, + StrictMode: false, + }) + + fmt.Println("Using config file: ", configAccessor.ConfigFilesUsed()) + + configAccessor.InitializePflags(flags) + + return configAccessor.UpdateConfig(context.TODO()) +} diff --git a/flyteadmin/cmd/scheduler/entrypoints/scheduler.go b/flyteadmin/cmd/scheduler/entrypoints/scheduler.go new file mode 100644 index 0000000000..617eed5eef --- /dev/null +++ b/flyteadmin/cmd/scheduler/entrypoints/scheduler.go @@ -0,0 +1,75 @@ +package entrypoints + +import ( + "context" + "fmt" + "runtime/debug" + + "github.com/flyteorg/flyteadmin/pkg/common" + repositoryCommonConfig "github.com/flyteorg/flyteadmin/pkg/repositories/config" + "github.com/flyteorg/flyteadmin/pkg/runtime" + scheduler "github.com/flyteorg/flyteadmin/scheduler" + schdulerRepoConfig "github.com/flyteorg/flyteadmin/scheduler/repositories" + "github.com/flyteorg/flyteidl/clients/go/admin" + "github.com/flyteorg/flytestdlib/contextutils" + "github.com/flyteorg/flytestdlib/logger" + "github.com/flyteorg/flytestdlib/promutils" + "github.com/flyteorg/flytestdlib/promutils/labeled" + + _ "github.com/jinzhu/gorm/dialects/postgres" // Required to import database driver. + "github.com/spf13/cobra" +) + +var schedulerRunCmd = &cobra.Command{ + Use: "run", + Short: "This command will start the flyte native scheduler and periodically get new schedules from the db for scheduling", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + configuration := runtime.NewConfigurationProvider() + applicationConfiguration := configuration.ApplicationConfiguration().GetTopLevelConfig() + + // Define the schedulerScope for prometheus metrics + schedulerScope := promutils.NewScope(applicationConfiguration.MetricsScope).NewSubScope("flytescheduler") + + defer func() { + if err := recover(); err != nil { + schedulerScope.MustNewCounter("initialization_panic", + "panics encountered initializing the flyte native scheduler").Inc() + logger.Fatalf(ctx, fmt.Sprintf("caught panic: %v [%+v]", err, string(debug.Stack()))) + } + }() + + dbConfigValues := configuration.ApplicationConfiguration().GetDbConfig() + dbConfig := repositoryCommonConfig.NewDbConfig(dbConfigValues) + db := schdulerRepoConfig.GetRepository( + schdulerRepoConfig.POSTGRES, dbConfig, schedulerScope.NewSubScope("database")) + + clientSet, err := admin.ClientSetBuilder().WithConfig(admin.GetConfig(ctx)).Build(ctx) + if err != nil { + logger.Fatalf(ctx, "Flyte native scheduler failed to start due to %v", err) + return err + } + adminServiceClient := clientSet.AdminClient() + + scheduleExecutor := scheduler.NewScheduledExecutor(db, + configuration.ApplicationConfiguration().GetSchedulerConfig().GetWorkflowExecutorConfig(), schedulerScope, adminServiceClient) + + logger.Info(context.Background(), "Successfully initialized a native flyte scheduler") + + err = scheduleExecutor.Run(ctx) + if err != nil { + logger.Fatalf(ctx, "Flyte native scheduler failed to start due to %v", err) + return err + } + return nil + }, +} + +func init() { + RootCmd.AddCommand(schedulerRunCmd) + + // Set Keys + labeled.SetMetricKeys(contextutils.AppNameKey, contextutils.ProjectKey, contextutils.DomainKey, + contextutils.ExecIDKey, contextutils.WorkflowIDKey, contextutils.NodeIDKey, contextutils.TaskIDKey, + contextutils.TaskTypeKey, common.RuntimeTypeKey, common.RuntimeVersionKey) +} diff --git a/flyteadmin/cmd/scheduler/main.go b/flyteadmin/cmd/scheduler/main.go new file mode 100644 index 0000000000..382a6f4bbb --- /dev/null +++ b/flyteadmin/cmd/scheduler/main.go @@ -0,0 +1,14 @@ +package main + +import ( + entrypoints2 "github.com/flyteorg/flyteadmin/cmd/scheduler/entrypoints" + "github.com/golang/glog" +) + +func main() { + glog.V(2).Info("Beginning Flyte Scheduler") + err := entrypoints2.Execute() + if err != nil { + panic(err) + } +} diff --git a/flyteadmin/flyteadmin_config.yaml b/flyteadmin/flyteadmin_config.yaml index 70accc006d..db02cd7879 100644 --- a/flyteadmin/flyteadmin_config.yaml +++ b/flyteadmin/flyteadmin_config.yaml @@ -75,6 +75,10 @@ scheduler: scheduleNamePrefix: "flyte" workflowExecutor: scheme: local + local: + adminRateLimit: + tps: 100 # per sec how many requests to send to admin + burst: 10 # burst count of request to admin region: "my-region" scheduleQueueName: "won't-work-locally" accountId: "abc123" diff --git a/flyteadmin/go.mod b/flyteadmin/go.mod index a79e8e7ee8..6ba90f9ad5 100644 --- a/flyteadmin/go.mod +++ b/flyteadmin/go.mod @@ -25,6 +25,7 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b github.com/golang/protobuf v1.4.3 + github.com/google/uuid v1.2.0 github.com/googleapis/gax-go/v2 v2.0.5 github.com/googleapis/gnostic v0.5.4 // indirect github.com/gorilla/handlers v1.5.1 @@ -48,6 +49,7 @@ require ( github.com/prometheus/common v0.19.0 // indirect github.com/qor/qor v1.2.0 // indirect github.com/qor/validations v0.0.0-20171228122639-f364bca61b46 + github.com/robfig/cron/v3 v3.0.0 github.com/sendgrid/rest v2.6.4+incompatible // indirect github.com/sendgrid/sendgrid-go v3.10.0+incompatible github.com/sirupsen/logrus v1.8.1 // indirect @@ -56,6 +58,7 @@ require ( github.com/stretchr/testify v1.7.0 golang.org/x/crypto v0.0.0-20210314154223-e6e6c4f2bb5b // indirect golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84 + golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba golang.org/x/tools v0.1.2 // indirect google.golang.org/api v0.42.0 google.golang.org/genproto v0.0.0-20210315173758-2651cd453018 @@ -72,3 +75,5 @@ require ( sigs.k8s.io/controller-runtime v0.8.3 sigs.k8s.io/structured-merge-diff/v4 v4.1.0 // indirect ) + +replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 diff --git a/flyteadmin/go.sum b/flyteadmin/go.sum index c77bfd506d..240282cf32 100644 --- a/flyteadmin/go.sum +++ b/flyteadmin/go.sum @@ -86,7 +86,6 @@ github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZ github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbtp2fGCgRFtBroKn4Dk= github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo= github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= -github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= @@ -1159,6 +1158,7 @@ github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4 v2.4.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4 h1:Qj1ukM4GlMWXNdMBuXcXfz/Kw9s1qm0CLY32QxuSImI= github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4/go.mod h1:N6UoU20jOqggOuDwUaBQpluzLNDqif3kq9z2wpdYEfQ= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -1386,6 +1386,8 @@ github.com/uber/jaeger-lib v1.5.0/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/Aaua github.com/uber/jaeger-lib v2.2.0+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 h1:EompdlTtH1GbcgfTNe+sAwHeDdeboYAvywrlVDbnixQ= +github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/unrolled/secure v0.0.0-20180918153822-f340ee86eb8b/go.mod h1:mnPT77IAdsi/kV7+Es7y+pXALeV3h7G6dQF6mNYjcLA= github.com/unrolled/secure v0.0.0-20181005190816-ff9db2ff917f/go.mod h1:mnPT77IAdsi/kV7+Es7y+pXALeV3h7G6dQF6mNYjcLA= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= diff --git a/flyteadmin/pkg/async/schedule/aws/cloud_watch_scheduler.go b/flyteadmin/pkg/async/schedule/aws/cloud_watch_scheduler.go index 6ff2476696..8596f83710 100644 --- a/flyteadmin/pkg/async/schedule/aws/cloud_watch_scheduler.go +++ b/flyteadmin/pkg/async/schedule/aws/cloud_watch_scheduler.go @@ -7,18 +7,18 @@ import ( "github.com/flyteorg/flyteadmin/pkg/async/schedule/aws/interfaces" scheduleInterfaces "github.com/flyteorg/flyteadmin/pkg/async/schedule/interfaces" - + "github.com/flyteorg/flyteadmin/pkg/errors" + appInterfaces "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + "github.com/flyteorg/flytestdlib/logger" "github.com/flyteorg/flytestdlib/promutils" - "github.com/prometheus/client_golang/prometheus" - - "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/cloudwatchevents" - "github.com/flyteorg/flyteadmin/pkg/errors" - "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" - "github.com/flyteorg/flytestdlib/logger" + "github.com/prometheus/client_golang/prometheus" "google.golang.org/grpc/codes" ) @@ -68,7 +68,7 @@ type cloudWatchScheduler struct { metrics cloudWatchSchedulerMetrics } -func getScheduleName(scheduleNamePrefix string, identifier admin.NamedEntityIdentifier) string { +func getScheduleName(scheduleNamePrefix string, identifier core.Identifier) string { hashedIdentifier := hashIdentifier(identifier) if len(scheduleNamePrefix) > 0 { return fmt.Sprintf(scheduleNameFormat, scheduleNamePrefix, hashedIdentifier) @@ -76,7 +76,7 @@ func getScheduleName(scheduleNamePrefix string, identifier admin.NamedEntityIden return fmt.Sprintf("%d", hashedIdentifier) } -func getScheduleDescription(identifier admin.NamedEntityIdentifier) string { +func getScheduleDescription(identifier core.Identifier) string { return fmt.Sprintf(scheduleDescriptionFormat, identifier.Project, identifier.Domain, identifier.Name) } @@ -170,6 +170,37 @@ func (s *cloudWatchScheduler) AddSchedule(ctx context.Context, input scheduleInt return nil } +func (s *cloudWatchScheduler) CreateScheduleInput(ctx context.Context, appConfig *appInterfaces.SchedulerConfig, + identifier core.Identifier, schedule *admin.Schedule) (scheduleInterfaces.AddScheduleInput, error) { + + payload, err := SerializeScheduleWorkflowPayload( + schedule.GetKickoffTimeInputArg(), + admin.NamedEntityIdentifier{ + Project: identifier.Project, + Domain: identifier.Domain, + Name: identifier.Name, + }) + if err != nil { + logger.Errorf(ctx, "failed to serialize schedule workflow payload for launch plan: %v with err: %v", + identifier, err) + return scheduleInterfaces.AddScheduleInput{}, err + } + + // Backward compatible with old EvenSchedulerConfig structure + scheduleNamePrefix := appConfig.EventSchedulerConfig.GetScheduleNamePrefix() + if appConfig.EventSchedulerConfig.GetAWSSchedulerConfig() != nil { + scheduleNamePrefix = appConfig.EventSchedulerConfig.GetAWSSchedulerConfig().GetScheduleNamePrefix() + } + + addScheduleInput := scheduleInterfaces.AddScheduleInput{ + Identifier: identifier, + ScheduleExpression: *schedule, + Payload: payload, + ScheduleNamePrefix: scheduleNamePrefix, + } + return addScheduleInput, nil +} + func isResourceNotFoundException(err error) bool { switch err := err.(type) { case awserr.Error: diff --git a/flyteadmin/pkg/async/schedule/aws/cloud_watch_scheduler_test.go b/flyteadmin/pkg/async/schedule/aws/cloud_watch_scheduler_test.go index 571016e7d3..d4750c2ae9 100644 --- a/flyteadmin/pkg/async/schedule/aws/cloud_watch_scheduler_test.go +++ b/flyteadmin/pkg/async/schedule/aws/cloud_watch_scheduler_test.go @@ -3,20 +3,18 @@ package aws import ( "context" "fmt" + "testing" "github.com/flyteorg/flyteadmin/pkg/async/schedule/aws/interfaces" "github.com/flyteorg/flyteadmin/pkg/async/schedule/aws/mocks" scheduleInterfaces "github.com/flyteorg/flyteadmin/pkg/async/schedule/interfaces" - + flyteAdminErrors "github.com/flyteorg/flyteadmin/pkg/errors" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" "github.com/flyteorg/flytestdlib/promutils" "github.com/aws/aws-sdk-go/aws/awserr" - - "testing" - "github.com/aws/aws-sdk-go/service/cloudwatchevents" - flyteAdminErrors "github.com/flyteorg/flyteadmin/pkg/errors" - "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" "github.com/stretchr/testify/assert" "google.golang.org/grpc/codes" ) @@ -28,10 +26,11 @@ var expectedError = flyteAdminErrors.NewFlyteAdminError(codes.Internal, "foo") var testSerializedPayload = fmt.Sprintf("event triggered at '%s'", awsTimestampPlaceholder) -var testSchedulerIdentifier = admin.NamedEntityIdentifier{ +var testSchedulerIdentifier = core.Identifier{ Project: "project", Domain: "domain", Name: "name", + Version: "ignored", } var scope = promutils.NewScope("test_scheduler") diff --git a/flyteadmin/pkg/async/schedule/aws/serialization_test.go b/flyteadmin/pkg/async/schedule/aws/serialization_test.go index ad3f6bb868..3a41fd8e3a 100644 --- a/flyteadmin/pkg/async/schedule/aws/serialization_test.go +++ b/flyteadmin/pkg/async/schedule/aws/serialization_test.go @@ -8,6 +8,7 @@ import ( "github.com/flyteorg/flyteadmin/pkg/errors" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" "github.com/golang/protobuf/proto" + "github.com/stretchr/testify/assert" "google.golang.org/grpc/codes" ) diff --git a/flyteadmin/pkg/async/schedule/aws/shared.go b/flyteadmin/pkg/async/schedule/aws/shared.go index b529604bfe..a3162fbb83 100644 --- a/flyteadmin/pkg/async/schedule/aws/shared.go +++ b/flyteadmin/pkg/async/schedule/aws/shared.go @@ -5,11 +5,11 @@ import ( "fmt" "hash/fnv" - "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" "github.com/flyteorg/flytestdlib/logger" ) -func hashIdentifier(identifier admin.NamedEntityIdentifier) uint64 { +func hashIdentifier(identifier core.Identifier) uint64 { h := fnv.New64() _, err := h.Write([]byte(fmt.Sprintf(scheduleNameInputsFormat, identifier.Project, identifier.Domain, identifier.Name))) diff --git a/flyteadmin/pkg/async/schedule/aws/shared_test.go b/flyteadmin/pkg/async/schedule/aws/shared_test.go index 0456e016bd..1d7b50a396 100644 --- a/flyteadmin/pkg/async/schedule/aws/shared_test.go +++ b/flyteadmin/pkg/async/schedule/aws/shared_test.go @@ -3,15 +3,17 @@ package aws import ( "testing" - "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + "github.com/magiconair/properties/assert" ) func TestHashIdentifier(t *testing.T) { - identifier := admin.NamedEntityIdentifier{ + identifier := core.Identifier{ Project: "project", Domain: "domain", Name: "name", + Version: "ignored", } hashedValue := hashIdentifier(identifier) assert.Equal(t, uint64(16301494360130577061), hashedValue) diff --git a/flyteadmin/pkg/async/schedule/aws/workflow_executor.go b/flyteadmin/pkg/async/schedule/aws/workflow_executor.go index ab2595eb68..03064fef6b 100644 --- a/flyteadmin/pkg/async/schedule/aws/workflow_executor.go +++ b/flyteadmin/pkg/async/schedule/aws/workflow_executor.go @@ -127,7 +127,7 @@ func (e *workflowExecutor) getActiveLaunchPlanVersion(launchPlanIdentifier *admi } func generateExecutionName(launchPlan admin.LaunchPlan, kickoffTime time.Time) string { - hashedIdentifier := hashIdentifier(admin.NamedEntityIdentifier{ + hashedIdentifier := hashIdentifier(core.Identifier{ Project: launchPlan.Id.Project, Domain: launchPlan.Id.Domain, Name: launchPlan.Id.Name, diff --git a/flyteadmin/pkg/async/schedule/factory.go b/flyteadmin/pkg/async/schedule/factory.go index f8a8bb53f5..7b22312c89 100644 --- a/flyteadmin/pkg/async/schedule/factory.go +++ b/flyteadmin/pkg/async/schedule/factory.go @@ -4,17 +4,18 @@ import ( "context" "time" - "github.com/flyteorg/flyteadmin/pkg/async" - gizmoConfig "github.com/NYTimes/gizmo/pubsub/aws" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" + "github.com/flyteorg/flyteadmin/pkg/async" awsSchedule "github.com/flyteorg/flyteadmin/pkg/async/schedule/aws" "github.com/flyteorg/flyteadmin/pkg/async/schedule/interfaces" "github.com/flyteorg/flyteadmin/pkg/async/schedule/noop" "github.com/flyteorg/flyteadmin/pkg/common" managerInterfaces "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" + "github.com/flyteorg/flyteadmin/pkg/repositories" runtimeInterfaces "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" + flytescheduler "github.com/flyteorg/flyteadmin/scheduler/dbapi" "github.com/flyteorg/flytestdlib/logger" "github.com/flyteorg/flytestdlib/promutils" ) @@ -56,7 +57,7 @@ func (w *workflowScheduler) GetWorkflowExecutor( return w.workflowExecutor } -func NewWorkflowScheduler(cfg WorkflowSchedulerConfig) WorkflowScheduler { +func NewWorkflowScheduler(db repositories.RepositoryInterface, cfg WorkflowSchedulerConfig) WorkflowScheduler { var eventScheduler interfaces.EventScheduler var workflowExecutor interfaces.WorkflowExecutor @@ -81,7 +82,9 @@ func NewWorkflowScheduler(cfg WorkflowSchedulerConfig) WorkflowScheduler { cfg.SchedulerConfig.EventSchedulerConfig.ScheduleRole, cfg.SchedulerConfig.EventSchedulerConfig.TargetName, sess, awsConfig, cfg.Scope.NewSubScope("cloudwatch_scheduler")) case common.Local: - fallthrough + logger.Infof(context.Background(), + "Using default flyte scheduler implementation") + eventScheduler = flytescheduler.New(db) default: logger.Infof(context.Background(), "Using default noop event scheduler implementation for cloud provider type [%s]", diff --git a/flyteadmin/pkg/async/schedule/interfaces/event_scheduler.go b/flyteadmin/pkg/async/schedule/interfaces/event_scheduler.go index 2798df277b..11252d24a4 100644 --- a/flyteadmin/pkg/async/schedule/interfaces/event_scheduler.go +++ b/flyteadmin/pkg/async/schedule/interfaces/event_scheduler.go @@ -4,12 +4,14 @@ package interfaces import ( "context" + appInterfaces "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" ) type AddScheduleInput struct { // Defines the unique identifier associated with the schedule - Identifier admin.NamedEntityIdentifier + Identifier core.Identifier // Defines the schedule expression. ScheduleExpression admin.Schedule // Message payload encoded as an CloudWatch event rule InputTemplate. @@ -20,7 +22,7 @@ type AddScheduleInput struct { type RemoveScheduleInput struct { // Defines the unique identifier associated with the schedule - Identifier admin.NamedEntityIdentifier + Identifier core.Identifier // Optional: The application-wide prefix to be applied for schedule names. ScheduleNamePrefix string } @@ -29,6 +31,10 @@ type EventScheduler interface { // Schedules an event. AddSchedule(ctx context.Context, input AddScheduleInput) error + // CreateScheduleInput using the scheduler config and launch plan identifier and schedule + CreateScheduleInput(ctx context.Context, appConfig *appInterfaces.SchedulerConfig, identifier core.Identifier, + schedule *admin.Schedule) (AddScheduleInput, error) + // Removes an existing schedule. RemoveSchedule(ctx context.Context, input RemoveScheduleInput) error } diff --git a/flyteadmin/pkg/async/schedule/mocks/mock_event_scheduler.go b/flyteadmin/pkg/async/schedule/mocks/mock_event_scheduler.go index 388dc749b8..1d63ddf42c 100644 --- a/flyteadmin/pkg/async/schedule/mocks/mock_event_scheduler.go +++ b/flyteadmin/pkg/async/schedule/mocks/mock_event_scheduler.go @@ -3,7 +3,11 @@ package mocks import ( "context" + "github.com/flyteorg/flyteadmin/pkg/async/schedule/aws" "github.com/flyteorg/flyteadmin/pkg/async/schedule/interfaces" + runtimeInterfaces "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" ) type AddScheduleFunc func(ctx context.Context, input interfaces.AddScheduleInput) error @@ -13,6 +17,18 @@ type MockEventScheduler struct { removeScheduleFunc RemoveScheduleFunc } +func (s *MockEventScheduler) CreateScheduleInput(ctx context.Context, appConfig *runtimeInterfaces.SchedulerConfig, + identifier core.Identifier, schedule *admin.Schedule) (interfaces.AddScheduleInput, error) { + payload, _ := aws.SerializeScheduleWorkflowPayload( + schedule.GetKickoffTimeInputArg(), + admin.NamedEntityIdentifier{ + Project: identifier.Project, + Domain: identifier.Domain, + Name: identifier.Name, + }) + return interfaces.AddScheduleInput{Identifier: identifier, ScheduleExpression: *schedule, Payload: payload}, nil +} + func (s *MockEventScheduler) AddSchedule(ctx context.Context, input interfaces.AddScheduleInput) error { if s.addScheduleFunc != nil { return s.addScheduleFunc(ctx, input) diff --git a/flyteadmin/pkg/async/schedule/noop/event_scheduler.go b/flyteadmin/pkg/async/schedule/noop/event_scheduler.go index 2ac444ae2f..42d2eefe6f 100644 --- a/flyteadmin/pkg/async/schedule/noop/event_scheduler.go +++ b/flyteadmin/pkg/async/schedule/noop/event_scheduler.go @@ -5,12 +5,18 @@ import ( "context" "github.com/flyteorg/flyteadmin/pkg/async/schedule/interfaces" - + runtimeInterfaces "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" "github.com/flyteorg/flytestdlib/logger" ) type EventScheduler struct{} +func (s *EventScheduler) CreateScheduleInput(ctx context.Context, appConfig *runtimeInterfaces.SchedulerConfig, identifier core.Identifier, schedule *admin.Schedule) (interfaces.AddScheduleInput, error) { + panic("implement me") +} + func (s *EventScheduler) AddSchedule(ctx context.Context, input interfaces.AddScheduleInput) error { logger.Debugf(ctx, "Received call to add schedule [%+v]", input) logger.Debug(ctx, "Not scheduling anything") diff --git a/flyteadmin/pkg/manager/impl/launch_plan_manager.go b/flyteadmin/pkg/manager/impl/launch_plan_manager.go index a3614268d0..19ae1ebb4c 100644 --- a/flyteadmin/pkg/manager/impl/launch_plan_manager.go +++ b/flyteadmin/pkg/manager/impl/launch_plan_manager.go @@ -7,8 +7,6 @@ import ( "github.com/flyteorg/flytestdlib/contextutils" - "github.com/flyteorg/flyteadmin/pkg/async/schedule/aws" - "github.com/flyteorg/flytestdlib/promutils" "github.com/prometheus/client_golang/prometheus" @@ -157,28 +155,21 @@ func isScheduleEmpty(launchPlanSpec admin.LaunchPlanSpec) bool { return true } -func (m *LaunchPlanManager) enableSchedule(ctx context.Context, launchPlanIdentifier admin.NamedEntityIdentifier, +func (m *LaunchPlanManager) enableSchedule(ctx context.Context, launchPlanIdentifier core.Identifier, launchPlanSpec admin.LaunchPlanSpec) error { - payload, err := aws.SerializeScheduleWorkflowPayload( - launchPlanSpec.EntityMetadata.Schedule.GetKickoffTimeInputArg(), - launchPlanIdentifier) + addScheduleInput, err := m.scheduler.CreateScheduleInput(ctx, + m.config.ApplicationConfiguration().GetSchedulerConfig(), launchPlanIdentifier, + launchPlanSpec.EntityMetadata.Schedule) if err != nil { - logger.Errorf(ctx, "failed to serialize schedule workflow payload for launch plan: %v with err: %v", - launchPlanIdentifier, err) return err } - addScheduleInput := scheduleInterfaces.AddScheduleInput{ - Identifier: launchPlanIdentifier, - ScheduleExpression: *launchPlanSpec.EntityMetadata.Schedule, - Payload: payload, - ScheduleNamePrefix: m.config.ApplicationConfiguration().GetSchedulerConfig().EventSchedulerConfig.ScheduleNamePrefix, - } + return m.scheduler.AddSchedule(ctx, addScheduleInput) } func (m *LaunchPlanManager) disableSchedule( - ctx context.Context, launchPlanIdentifier admin.NamedEntityIdentifier) error { + ctx context.Context, launchPlanIdentifier core.Identifier) error { return m.scheduler.RemoveSchedule(ctx, scheduleInterfaces.RemoveScheduleInput{ Identifier: launchPlanIdentifier, ScheduleNamePrefix: m.config.ApplicationConfiguration().GetSchedulerConfig().EventSchedulerConfig.ScheduleNamePrefix, @@ -193,10 +184,11 @@ func (m *LaunchPlanManager) updateSchedules( logger.Errorf(ctx, "failed to unmarshal newly enabled launch plan spec") return errors.NewFlyteAdminErrorf(codes.Internal, "failed to unmarshal newly enabled launch plan spec") } - launchPlanIdentifier := admin.NamedEntityIdentifier{ + launchPlanIdentifier := core.Identifier{ Project: newlyActiveLaunchPlan.Project, Domain: newlyActiveLaunchPlan.Domain, Name: newlyActiveLaunchPlan.Name, + Version: newlyActiveLaunchPlan.Version, } var formerlyActiveLaunchPlanSpec admin.LaunchPlanSpec if formerlyActiveLaunchPlan != nil { @@ -205,16 +197,16 @@ func (m *LaunchPlanManager) updateSchedules( return errors.NewFlyteAdminErrorf(codes.Internal, "failed to unmarshal formerly enabled launch plan spec") } } - if proto.Equal(formerlyActiveLaunchPlanSpec.GetEntityMetadata().GetSchedule(), - newlyActiveLaunchPlanSpec.GetEntityMetadata().GetSchedule()) { - // Nothing to change/update. - logger.Infof(ctx, "activating launch plan [%+v] with identical schedule to previous version. "+ - "Not updating any schedules", launchPlanIdentifier) - return nil - } + if !isScheduleEmpty(formerlyActiveLaunchPlanSpec) { // Disable previous schedule - if err = m.disableSchedule(ctx, launchPlanIdentifier); err != nil { + formerlyActiveLaunchPlanIdentifier := core.Identifier{ + Project: formerlyActiveLaunchPlan.Project, + Domain: formerlyActiveLaunchPlan.Domain, + Name: formerlyActiveLaunchPlan.Name, + Version: formerlyActiveLaunchPlan.Version, + } + if err = m.disableSchedule(ctx, formerlyActiveLaunchPlanIdentifier); err != nil { return err } logger.Infof(ctx, "Disabled schedules for deactivated launch plan [%+v]", launchPlanIdentifier) @@ -240,11 +232,7 @@ func (m *LaunchPlanManager) disableLaunchPlan(ctx context.Context, request admin logger.Debugf(ctx, "couldn't find launch plan [%+v] to disable with err: %v", request.Id, err) return nil, err } - if launchPlanModel.State == nil || *launchPlanModel.State == int32(admin.LaunchPlanState_INACTIVE) { - // Nothing to do. - logger.Debugf(ctx, "disable launch plan called on already inactive launch plan [%+v] nothing to do", request.Id) - return &admin.LaunchPlanUpdateResponse{}, nil - } + err = m.updateLaunchPlanModelState(&launchPlanModel, admin.LaunchPlanState_INACTIVE) if err != nil { logger.Debugf(ctx, "failed to disable launch plan [%+v] with err: %v", request.Id, err) @@ -259,10 +247,11 @@ func (m *LaunchPlanManager) disableLaunchPlan(ctx context.Context, request admin "failed to unmarshal launch plan spec when disabling schedule for %+v", request.Id) } if launchPlanSpec.EntityMetadata != nil && launchPlanSpec.EntityMetadata.Schedule != nil { - err = m.disableSchedule(ctx, admin.NamedEntityIdentifier{ + err = m.disableSchedule(ctx, core.Identifier{ Project: launchPlanModel.Project, Domain: launchPlanModel.Domain, Name: launchPlanModel.Name, + Version: launchPlanModel.Version, }) if err != nil { return nil, err diff --git a/flyteadmin/pkg/manager/impl/launch_plan_manager_test.go b/flyteadmin/pkg/manager/impl/launch_plan_manager_test.go index e6f4d72ab9..942cf710e0 100644 --- a/flyteadmin/pkg/manager/impl/launch_plan_manager_test.go +++ b/flyteadmin/pkg/manager/impl/launch_plan_manager_test.go @@ -45,10 +45,11 @@ var launchPlanIdentifier = core.Identifier{ Version: version, } -var launchPlanNamedIdentifier = admin.NamedEntityIdentifier{ +var launchPlanNamedIdentifier = core.Identifier{ Project: project, Domain: domain, Name: name, + Version: "version", } func getMockRepositoryForLpTest() repositories.RepositoryInterface { @@ -512,6 +513,7 @@ func TestUpdateSchedules(t *testing.T) { Project: project, Domain: domain, Name: name, + Version: "version", }, Spec: newLaunchPlanSpecBytes, }, @@ -520,6 +522,7 @@ func TestUpdateSchedules(t *testing.T) { Project: project, Domain: domain, Name: name, + Version: "version", }, Spec: oldLaunchPlanSpecBytes, }) @@ -528,7 +531,7 @@ func TestUpdateSchedules(t *testing.T) { assert.True(t, addCalled) } -func TestUpdateSchedules_NothingToDisable(t *testing.T) { +func TestUpdateSchedules_NothingToDisableButRedo(t *testing.T) { newScheduleExpression := admin.Schedule{ ScheduleExpression: &admin.Schedule_CronExpression{ CronExpression: "cron", @@ -544,10 +547,11 @@ func TestUpdateSchedules_NothingToDisable(t *testing.T) { var addCalled bool mockScheduler.(*mocks.MockEventScheduler).SetAddScheduleFunc( func(ctx context.Context, input scheduleInterfaces.AddScheduleInput) error { - assert.True(t, proto.Equal(&admin.NamedEntityIdentifier{ + assert.True(t, proto.Equal(&core.Identifier{ Project: project, Domain: domain, Name: name, + Version: "v1", }, &input.Identifier)) assert.True(t, proto.Equal(&newScheduleExpression, &input.ScheduleExpression)) addCalled = true @@ -560,6 +564,7 @@ func TestUpdateSchedules_NothingToDisable(t *testing.T) { Project: project, Domain: domain, Name: name, + Version: "v1", }, Spec: newLaunchPlanSpecBytes, }, nil) @@ -581,6 +586,7 @@ func TestUpdateSchedules_NothingToDisable(t *testing.T) { Project: project, Domain: domain, Name: name, + Version: "v1", }, Spec: newLaunchPlanSpecBytes, }, &models.LaunchPlan{ @@ -590,7 +596,7 @@ func TestUpdateSchedules_NothingToDisable(t *testing.T) { assert.True(t, addCalled) } -func TestUpdateSchedules_NothingToEnable(t *testing.T) { +func TestUpdateSchedules_NothingToEnableButRedo(t *testing.T) { oldScheduleExpression := admin.Schedule{ ScheduleExpression: &admin.Schedule_Rate{ Rate: &admin.FixedRate{ @@ -609,11 +615,13 @@ func TestUpdateSchedules_NothingToEnable(t *testing.T) { var removeCalled bool mockScheduler.(*mocks.MockEventScheduler).SetRemoveScheduleFunc( func(ctx context.Context, input scheduleInterfaces.RemoveScheduleInput) error { - assert.True(t, proto.Equal(&admin.NamedEntityIdentifier{ + areEqual := proto.Equal(&core.Identifier{ Project: project, Domain: domain, Name: name, - }, &input.Identifier)) + Version: "v1", + }, &input.Identifier) + assert.True(t, areEqual) removeCalled = true return nil }) @@ -625,12 +633,14 @@ func TestUpdateSchedules_NothingToEnable(t *testing.T) { Project: project, Domain: domain, Name: name, + Version: "v1", }, }, &models.LaunchPlan{ LaunchPlanKey: models.LaunchPlanKey{ Project: project, Domain: domain, Name: name, + Version: "v1", }, Spec: oldLaunchPlanSpecBytes, }) @@ -638,7 +648,7 @@ func TestUpdateSchedules_NothingToEnable(t *testing.T) { assert.True(t, removeCalled) } -func TestUpdateSchedules_NothingToDo(t *testing.T) { +func TestUpdateSchedules_NothingToDoButRedo(t *testing.T) { scheduleExpression := admin.Schedule{ ScheduleExpression: &admin.Schedule_CronExpression{ CronExpression: "cron", @@ -672,6 +682,7 @@ func TestUpdateSchedules_NothingToDo(t *testing.T) { Project: project, Domain: domain, Name: name, + Version: "v1", }, Spec: launchPlanSpecBytes, }, &models.LaunchPlan{ @@ -679,29 +690,32 @@ func TestUpdateSchedules_NothingToDo(t *testing.T) { Project: project, Domain: domain, Name: name, + Version: "v1", }, Spec: launchPlanSpecBytes, }) assert.Nil(t, err) - assert.False(t, removeCalled) - assert.False(t, addCalled) + assert.True(t, removeCalled) + assert.True(t, addCalled) err = lpManager.(*LaunchPlanManager).updateSchedules(context.Background(), models.LaunchPlan{ LaunchPlanKey: models.LaunchPlanKey{ Project: project, Domain: domain, Name: name, + Version: "v1", }, }, &models.LaunchPlan{ LaunchPlanKey: models.LaunchPlanKey{ Project: project, Domain: domain, Name: name, + Version: "v1", }, }) assert.Nil(t, err) - assert.False(t, removeCalled) - assert.False(t, addCalled) + assert.True(t, removeCalled) + assert.True(t, addCalled) } func TestUpdateSchedules_EnableNoSchedule(t *testing.T) { diff --git a/flyteadmin/pkg/repositories/config/database.go b/flyteadmin/pkg/repositories/config/database.go index 6f3f0dde77..6b00bd4a29 100644 --- a/flyteadmin/pkg/repositories/config/database.go +++ b/flyteadmin/pkg/repositories/config/database.go @@ -1,5 +1,7 @@ package config +import "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" + // Database config. Contains values necessary to open a database connection. type DbConfig struct { BaseConfig @@ -10,3 +12,17 @@ type DbConfig struct { Password string `json:"password"` ExtraOptions string `json:"options"` } + +func NewDbConfig(dbConfigValues interfaces.DbConfig) DbConfig { + return DbConfig{ + BaseConfig: BaseConfig{ + IsDebug: dbConfigValues.Debug, + }, + Host: dbConfigValues.Host, + Port: dbConfigValues.Port, + DbName: dbConfigValues.DbName, + User: dbConfigValues.User, + Password: dbConfigValues.Password, + ExtraOptions: dbConfigValues.ExtraOptions, + } +} diff --git a/flyteadmin/pkg/repositories/config/migrations.go b/flyteadmin/pkg/repositories/config/migrations.go index df2f3b58e2..8b1518d0ef 100644 --- a/flyteadmin/pkg/repositories/config/migrations.go +++ b/flyteadmin/pkg/repositories/config/migrations.go @@ -2,6 +2,7 @@ package config import ( "github.com/flyteorg/flyteadmin/pkg/repositories/models" + schedulerModels "github.com/flyteorg/flyteadmin/scheduler/repositories/models" "github.com/jinzhu/gorm" gormigrate "gopkg.in/gormigrate.v1" ) @@ -305,4 +306,24 @@ var Migrations = []*gormigrate.Migration{ return tx.Model(&models.NodeExecution{}).DropColumn("dynamic_workflow_remote_closure_reference").Error }, }, + + { + ID: "2021-07-22-schedulable_entities", + Migrate: func(tx *gorm.DB) error { + return tx.AutoMigrate(&schedulerModels.SchedulableEntity{}).Error + }, + Rollback: func(tx *gorm.DB) error { + return tx.DropTable("schedulable_entities").Error + }, + }, + + { + ID: "2021-08-05-schedulable_entities_snapshot", + Migrate: func(tx *gorm.DB) error { + return tx.AutoMigrate(&schedulerModels.ScheduleEntitiesSnapshot{}).Error + }, + Rollback: func(tx *gorm.DB) error { + return tx.DropTable("schedulable_entities_snapshot").Error + }, + }, } diff --git a/flyteadmin/pkg/repositories/config/postgres.go b/flyteadmin/pkg/repositories/config/postgres.go index 1f0198387e..7eb1a35e4f 100644 --- a/flyteadmin/pkg/repositories/config/postgres.go +++ b/flyteadmin/pkg/repositories/config/postgres.go @@ -4,7 +4,6 @@ import ( "fmt" "github.com/flyteorg/flytestdlib/promutils" - "github.com/jinzhu/gorm" _ "github.com/jinzhu/gorm/dialects/postgres" // Required to import database driver. "github.com/qor/validations" diff --git a/flyteadmin/pkg/repositories/errors/errors.go b/flyteadmin/pkg/repositories/errors/errors.go index b87e1f39c4..072bcc2871 100644 --- a/flyteadmin/pkg/repositories/errors/errors.go +++ b/flyteadmin/pkg/repositories/errors/errors.go @@ -8,15 +8,20 @@ import ( ) const ( - notFound = "missing entity of type %s with identifier %v" - idNotFound = "missing entity of type %s" - invalidInput = "missing and/or invalid parameters: %s" + singletonNotFound = "missing singleton entity of type %s" + notFound = "missing entity of type %s with identifier %v" + idNotFound = "missing entity of type %s" + invalidInput = "missing and/or invalid parameters: %s" ) func GetMissingEntityError(entityType string, identifier proto.Message) errors.FlyteAdminError { return errors.NewFlyteAdminErrorf(codes.NotFound, notFound, entityType, identifier) } +func GetSingletonMissingEntityError(entityType string) errors.FlyteAdminError { + return errors.NewFlyteAdminErrorf(codes.NotFound, singletonNotFound, entityType) +} + func GetMissingEntityByIDError(entityType string) errors.FlyteAdminError { return errors.NewFlyteAdminErrorf(codes.NotFound, idNotFound, entityType) } diff --git a/flyteadmin/pkg/repositories/factory.go b/flyteadmin/pkg/repositories/factory.go index 900eae10d7..25eb0c05d7 100644 --- a/flyteadmin/pkg/repositories/factory.go +++ b/flyteadmin/pkg/repositories/factory.go @@ -6,6 +6,7 @@ import ( "github.com/flyteorg/flyteadmin/pkg/repositories/config" "github.com/flyteorg/flyteadmin/pkg/repositories/errors" "github.com/flyteorg/flyteadmin/pkg/repositories/interfaces" + schedulerInterfaces "github.com/flyteorg/flyteadmin/scheduler/repositories/interfaces" "github.com/flyteorg/flytestdlib/promutils" ) @@ -34,6 +35,8 @@ type RepositoryInterface interface { NodeExecutionEventRepo() interfaces.NodeExecutionEventRepoInterface TaskExecutionRepo() interfaces.TaskExecutionRepoInterface NamedEntityRepo() interfaces.NamedEntityRepoInterface + SchedulableEntityRepo() schedulerInterfaces.SchedulableEntityRepoInterface + ScheduleEntitiesSnapshotRepo() schedulerInterfaces.ScheduleEntitiesSnapShotRepoInterface } func GetRepository(repoType RepoConfig, dbConfig config.DbConfig, scope promutils.Scope) RepositoryInterface { diff --git a/flyteadmin/pkg/repositories/mocks/repository.go b/flyteadmin/pkg/repositories/mocks/repository.go index aa701d08cb..ab0a43e292 100644 --- a/flyteadmin/pkg/repositories/mocks/repository.go +++ b/flyteadmin/pkg/repositories/mocks/repository.go @@ -3,20 +3,32 @@ package mocks import ( "github.com/flyteorg/flyteadmin/pkg/repositories" "github.com/flyteorg/flyteadmin/pkg/repositories/interfaces" + sIface "github.com/flyteorg/flyteadmin/scheduler/repositories/interfaces" + sMocks "github.com/flyteorg/flyteadmin/scheduler/repositories/mocks" ) type MockRepository struct { - taskRepo interfaces.TaskRepoInterface - workflowRepo interfaces.WorkflowRepoInterface - launchPlanRepo interfaces.LaunchPlanRepoInterface - executionRepo interfaces.ExecutionRepoInterface - ExecutionEventRepoIface interfaces.ExecutionEventRepoInterface - nodeExecutionRepo interfaces.NodeExecutionRepoInterface - NodeExecutionEventRepoIface interfaces.NodeExecutionEventRepoInterface - projectRepo interfaces.ProjectRepoInterface - resourceRepo interfaces.ResourceRepoInterface - taskExecutionRepo interfaces.TaskExecutionRepoInterface - namedEntityRepo interfaces.NamedEntityRepoInterface + taskRepo interfaces.TaskRepoInterface + workflowRepo interfaces.WorkflowRepoInterface + launchPlanRepo interfaces.LaunchPlanRepoInterface + executionRepo interfaces.ExecutionRepoInterface + ExecutionEventRepoIface interfaces.ExecutionEventRepoInterface + nodeExecutionRepo interfaces.NodeExecutionRepoInterface + NodeExecutionEventRepoIface interfaces.NodeExecutionEventRepoInterface + projectRepo interfaces.ProjectRepoInterface + resourceRepo interfaces.ResourceRepoInterface + taskExecutionRepo interfaces.TaskExecutionRepoInterface + namedEntityRepo interfaces.NamedEntityRepoInterface + schedulableEntityRepo sIface.SchedulableEntityRepoInterface + schedulableEntitySnapshotRepo sIface.ScheduleEntitiesSnapShotRepoInterface +} + +func (r *MockRepository) SchedulableEntityRepo() sIface.SchedulableEntityRepoInterface { + return r.schedulableEntityRepo +} + +func (r *MockRepository) ScheduleEntitiesSnapshotRepo() sIface.ScheduleEntitiesSnapShotRepoInterface { + return r.schedulableEntitySnapshotRepo } func (r *MockRepository) TaskRepo() interfaces.TaskRepoInterface { @@ -65,16 +77,18 @@ func (r *MockRepository) NamedEntityRepo() interfaces.NamedEntityRepoInterface { func NewMockRepository() repositories.RepositoryInterface { return &MockRepository{ - taskRepo: NewMockTaskRepo(), - workflowRepo: NewMockWorkflowRepo(), - launchPlanRepo: NewMockLaunchPlanRepo(), - executionRepo: NewMockExecutionRepo(), - nodeExecutionRepo: NewMockNodeExecutionRepo(), - projectRepo: NewMockProjectRepo(), - resourceRepo: NewMockResourceRepo(), - taskExecutionRepo: NewMockTaskExecutionRepo(), - namedEntityRepo: NewMockNamedEntityRepo(), - ExecutionEventRepoIface: &ExecutionEventRepoInterface{}, - NodeExecutionEventRepoIface: &NodeExecutionEventRepoInterface{}, + taskRepo: NewMockTaskRepo(), + workflowRepo: NewMockWorkflowRepo(), + launchPlanRepo: NewMockLaunchPlanRepo(), + executionRepo: NewMockExecutionRepo(), + nodeExecutionRepo: NewMockNodeExecutionRepo(), + projectRepo: NewMockProjectRepo(), + resourceRepo: NewMockResourceRepo(), + taskExecutionRepo: NewMockTaskExecutionRepo(), + namedEntityRepo: NewMockNamedEntityRepo(), + ExecutionEventRepoIface: &ExecutionEventRepoInterface{}, + NodeExecutionEventRepoIface: &NodeExecutionEventRepoInterface{}, + schedulableEntityRepo: &sMocks.SchedulableEntityRepoInterface{}, + schedulableEntitySnapshotRepo: &sMocks.ScheduleEntitiesSnapShotRepoInterface{}, } } diff --git a/flyteadmin/pkg/repositories/postgres_repo.go b/flyteadmin/pkg/repositories/postgres_repo.go index a9fadfd142..183d6e6c16 100644 --- a/flyteadmin/pkg/repositories/postgres_repo.go +++ b/flyteadmin/pkg/repositories/postgres_repo.go @@ -4,22 +4,26 @@ import ( "github.com/flyteorg/flyteadmin/pkg/repositories/errors" "github.com/flyteorg/flyteadmin/pkg/repositories/gormimpl" "github.com/flyteorg/flyteadmin/pkg/repositories/interfaces" + schedulerGormImpl "github.com/flyteorg/flyteadmin/scheduler/repositories/gormimpl" + schedulerInterfaces "github.com/flyteorg/flyteadmin/scheduler/repositories/interfaces" "github.com/flyteorg/flytestdlib/promutils" "github.com/jinzhu/gorm" ) type PostgresRepo struct { - executionRepo interfaces.ExecutionRepoInterface - executionEventRepo interfaces.ExecutionEventRepoInterface - namedEntityRepo interfaces.NamedEntityRepoInterface - launchPlanRepo interfaces.LaunchPlanRepoInterface - projectRepo interfaces.ProjectRepoInterface - nodeExecutionRepo interfaces.NodeExecutionRepoInterface - nodeExecutionEventRepo interfaces.NodeExecutionEventRepoInterface - taskRepo interfaces.TaskRepoInterface - taskExecutionRepo interfaces.TaskExecutionRepoInterface - workflowRepo interfaces.WorkflowRepoInterface - resourceRepo interfaces.ResourceRepoInterface + executionRepo interfaces.ExecutionRepoInterface + executionEventRepo interfaces.ExecutionEventRepoInterface + namedEntityRepo interfaces.NamedEntityRepoInterface + launchPlanRepo interfaces.LaunchPlanRepoInterface + projectRepo interfaces.ProjectRepoInterface + nodeExecutionRepo interfaces.NodeExecutionRepoInterface + nodeExecutionEventRepo interfaces.NodeExecutionEventRepoInterface + taskRepo interfaces.TaskRepoInterface + taskExecutionRepo interfaces.TaskExecutionRepoInterface + workflowRepo interfaces.WorkflowRepoInterface + resourceRepo interfaces.ResourceRepoInterface + schedulableEntityRepo schedulerInterfaces.SchedulableEntityRepoInterface + scheduleEntitiesSnapshotRepo schedulerInterfaces.ScheduleEntitiesSnapShotRepoInterface } func (p *PostgresRepo) ExecutionRepo() interfaces.ExecutionRepoInterface { @@ -66,18 +70,28 @@ func (p *PostgresRepo) ResourceRepo() interfaces.ResourceRepoInterface { return p.resourceRepo } +func (p *PostgresRepo) SchedulableEntityRepo() schedulerInterfaces.SchedulableEntityRepoInterface { + return p.schedulableEntityRepo +} + +func (p *PostgresRepo) ScheduleEntitiesSnapshotRepo() schedulerInterfaces.ScheduleEntitiesSnapShotRepoInterface { + return p.scheduleEntitiesSnapshotRepo +} + func NewPostgresRepo(db *gorm.DB, errorTransformer errors.ErrorTransformer, scope promutils.Scope) RepositoryInterface { return &PostgresRepo{ - executionRepo: gormimpl.NewExecutionRepo(db, errorTransformer, scope.NewSubScope("executions")), - executionEventRepo: gormimpl.NewExecutionEventRepo(db, errorTransformer, scope.NewSubScope("execution_events")), - launchPlanRepo: gormimpl.NewLaunchPlanRepo(db, errorTransformer, scope.NewSubScope("launch_plans")), - projectRepo: gormimpl.NewProjectRepo(db, errorTransformer, scope.NewSubScope("project")), - namedEntityRepo: gormimpl.NewNamedEntityRepo(db, errorTransformer, scope.NewSubScope("named_entity")), - nodeExecutionRepo: gormimpl.NewNodeExecutionRepo(db, errorTransformer, scope.NewSubScope("node_executions")), - nodeExecutionEventRepo: gormimpl.NewNodeExecutionEventRepo(db, errorTransformer, scope.NewSubScope("node_execution_events")), - taskRepo: gormimpl.NewTaskRepo(db, errorTransformer, scope.NewSubScope("tasks")), - taskExecutionRepo: gormimpl.NewTaskExecutionRepo(db, errorTransformer, scope.NewSubScope("task_executions")), - workflowRepo: gormimpl.NewWorkflowRepo(db, errorTransformer, scope.NewSubScope("workflows")), - resourceRepo: gormimpl.NewResourceRepo(db, errorTransformer, scope.NewSubScope("resources")), + executionRepo: gormimpl.NewExecutionRepo(db, errorTransformer, scope.NewSubScope("executions")), + executionEventRepo: gormimpl.NewExecutionEventRepo(db, errorTransformer, scope.NewSubScope("execution_events")), + launchPlanRepo: gormimpl.NewLaunchPlanRepo(db, errorTransformer, scope.NewSubScope("launch_plans")), + projectRepo: gormimpl.NewProjectRepo(db, errorTransformer, scope.NewSubScope("project")), + namedEntityRepo: gormimpl.NewNamedEntityRepo(db, errorTransformer, scope.NewSubScope("named_entity")), + nodeExecutionRepo: gormimpl.NewNodeExecutionRepo(db, errorTransformer, scope.NewSubScope("node_executions")), + nodeExecutionEventRepo: gormimpl.NewNodeExecutionEventRepo(db, errorTransformer, scope.NewSubScope("node_execution_events")), + taskRepo: gormimpl.NewTaskRepo(db, errorTransformer, scope.NewSubScope("tasks")), + taskExecutionRepo: gormimpl.NewTaskExecutionRepo(db, errorTransformer, scope.NewSubScope("task_executions")), + workflowRepo: gormimpl.NewWorkflowRepo(db, errorTransformer, scope.NewSubScope("workflows")), + resourceRepo: gormimpl.NewResourceRepo(db, errorTransformer, scope.NewSubScope("resources")), + schedulableEntityRepo: schedulerGormImpl.NewSchedulableEntityRepo(db, errorTransformer, scope.NewSubScope("schedulable_entity")), + scheduleEntitiesSnapshotRepo: schedulerGormImpl.NewScheduleEntitiesSnapshotRepo(db, errorTransformer, scope.NewSubScope("schedule_entities_snapshot")), } } diff --git a/flyteadmin/pkg/rpc/adminservice/base.go b/flyteadmin/pkg/rpc/adminservice/base.go index 132b8334c8..95e8514017 100644 --- a/flyteadmin/pkg/rpc/adminservice/base.go +++ b/flyteadmin/pkg/rpc/adminservice/base.go @@ -113,7 +113,7 @@ func NewAdminServer(kubeConfig, master string) *AdminService { // Configure workflow scheduler async processes. schedulerConfig := configuration.ApplicationConfiguration().GetSchedulerConfig() - workflowScheduler := schedule.NewWorkflowScheduler(schedule.WorkflowSchedulerConfig{ + workflowScheduler := schedule.NewWorkflowScheduler(db, schedule.WorkflowSchedulerConfig{ Retries: defaultRetries, SchedulerConfig: *schedulerConfig, Scope: adminScope, diff --git a/flyteadmin/pkg/runtime/application_config_provider.go b/flyteadmin/pkg/runtime/application_config_provider.go index ed7f87b33c..84755a0224 100644 --- a/flyteadmin/pkg/runtime/application_config_provider.go +++ b/flyteadmin/pkg/runtime/application_config_provider.go @@ -40,10 +40,17 @@ var flyteAdminConfig = config.MustRegisterSection(flyteAdmin, &interfaces.Applic }) var schedulerConfig = config.MustRegisterSection(scheduler, &interfaces.SchedulerConfig{ EventSchedulerConfig: interfaces.EventSchedulerConfig{ - Scheme: common.Local, + Scheme: common.Local, + FlyteSchedulerConfig: &interfaces.FlyteSchedulerConfig{}, }, WorkflowExecutorConfig: interfaces.WorkflowExecutorConfig{ Scheme: common.Local, + FlyteWorkflowExecutorConfig: &interfaces.FlyteWorkflowExecutorConfig{ + AdminRateLimit: &interfaces.AdminRateLimit{ + Tps: 100, + Burst: 10, + }, + }, }, }) var remoteDataConfig = config.MustRegisterSection(remoteData, &interfaces.RemoteDataConfig{ diff --git a/flyteadmin/pkg/runtime/interfaces/application_configuration.go b/flyteadmin/pkg/runtime/interfaces/application_configuration.go index 03620d9de9..029fc17116 100644 --- a/flyteadmin/pkg/runtime/interfaces/application_configuration.go +++ b/flyteadmin/pkg/runtime/interfaces/application_configuration.go @@ -1,5 +1,7 @@ package interfaces +import "golang.org/x/time/rate" + // This configuration section is used to for initiating the database connection with the store that holds registered // entities (e.g. workflows, tasks, launch plans...) // This struct specifically maps to the flyteadmin config yaml structure. @@ -69,6 +71,48 @@ type EventSchedulerConfig struct { // Defines the cloud provider that backs the scheduler. In the absence of a specification the no-op, 'local' // scheme is used. Scheme string `json:"scheme"` + + // Deprecated : Some cloud providers require a region to be set. + Region string `json:"region"` + // Deprecated : The role assumed to register and activate schedules. + ScheduleRole string `json:"scheduleRole"` + // Deprecated : The name of the queue for which scheduled events should enqueue. + TargetName string `json:"targetName"` + // Deprecated : Optional: The application-wide prefix to be applied for schedule names. + ScheduleNamePrefix string `json:"scheduleNamePrefix"` + AWSSchedulerConfig *AWSSchedulerConfig `json:"aws"` + FlyteSchedulerConfig *FlyteSchedulerConfig `json:"local"` +} + +func (e *EventSchedulerConfig) GetScheme() string { + return e.Scheme +} + +func (e *EventSchedulerConfig) GetRegion() string { + return e.Region +} + +func (e *EventSchedulerConfig) GetScheduleRole() string { + return e.ScheduleRole +} + +func (e *EventSchedulerConfig) GetTargetName() string { + return e.TargetName +} + +func (e *EventSchedulerConfig) GetScheduleNamePrefix() string { + return e.ScheduleNamePrefix +} + +func (e *EventSchedulerConfig) GetAWSSchedulerConfig() *AWSSchedulerConfig { + return e.AWSSchedulerConfig +} + +func (e *EventSchedulerConfig) GetFlyteSchedulerConfig() *FlyteSchedulerConfig { + return e.FlyteSchedulerConfig +} + +type AWSSchedulerConfig struct { // Some cloud providers require a region to be set. Region string `json:"region"` // The role assumed to register and activate schedules. @@ -79,11 +123,67 @@ type EventSchedulerConfig struct { ScheduleNamePrefix string `json:"scheduleNamePrefix"` } +func (a *AWSSchedulerConfig) GetRegion() string { + return a.Region +} + +func (a *AWSSchedulerConfig) GetScheduleRole() string { + return a.ScheduleRole +} + +func (a *AWSSchedulerConfig) GetTargetName() string { + return a.TargetName +} + +func (a *AWSSchedulerConfig) GetScheduleNamePrefix() string { + return a.ScheduleNamePrefix +} + +// FlyteSchedulerConfig is the config for native or default flyte scheduler +type FlyteSchedulerConfig struct { +} + // This section holds configuration for the executor that processes workflow scheduled events fired. type WorkflowExecutorConfig struct { // Defines the cloud provider that backs the scheduler. In the absence of a specification the no-op, 'local' // scheme is used. Scheme string `json:"scheme"` + // Deprecated : Some cloud providers require a region to be set. + Region string `json:"region"` + // Deprecated : The name of the queue onto which scheduled events will enqueue. + ScheduleQueueName string `json:"scheduleQueueName"` + // Deprecated : The account id (according to whichever cloud provider scheme is used) that has permission to read from the above + // queue. + AccountID string `json:"accountId"` + AWSWorkflowExecutorConfig *AWSWorkflowExecutorConfig `json:"aws"` + FlyteWorkflowExecutorConfig *FlyteWorkflowExecutorConfig `json:"local"` +} + +func (w *WorkflowExecutorConfig) GetScheme() string { + return w.Scheme +} + +func (w *WorkflowExecutorConfig) GetRegion() string { + return w.Region +} + +func (w *WorkflowExecutorConfig) GetScheduleScheduleQueueName() string { + return w.ScheduleQueueName +} + +func (w *WorkflowExecutorConfig) GetAccountID() string { + return w.AccountID +} + +func (w *WorkflowExecutorConfig) GetAWSWorkflowExecutorConfig() *AWSWorkflowExecutorConfig { + return w.AWSWorkflowExecutorConfig +} + +func (w *WorkflowExecutorConfig) GetFlyteWorkflowExecutorConfig() *FlyteWorkflowExecutorConfig { + return w.FlyteWorkflowExecutorConfig +} + +type AWSWorkflowExecutorConfig struct { // Some cloud providers require a region to be set. Region string `json:"region"` // The name of the queue onto which scheduled events will enqueue. @@ -93,6 +193,43 @@ type WorkflowExecutorConfig struct { AccountID string `json:"accountId"` } +func (a *AWSWorkflowExecutorConfig) GetRegion() string { + return a.Region +} + +func (a *AWSWorkflowExecutorConfig) GetScheduleScheduleQueueName() string { + return a.ScheduleQueueName +} + +func (a *AWSWorkflowExecutorConfig) GetAccountID() string { + return a.AccountID +} + +// FlyteWorkflowExecutorConfig specifies the workflow executor configuration for the native flyte scheduler +type FlyteWorkflowExecutorConfig struct { + // This allows to control the number of TPS that hit admin using the scheduler. + // eg : 100 TPS will send at the max 100 schedule requests to admin per sec. + // Burst specifies burst traffic count + AdminRateLimit *AdminRateLimit `json:"adminRateLimit"` +} + +func (f *FlyteWorkflowExecutorConfig) GetAdminRateLimit() *AdminRateLimit { + return f.AdminRateLimit +} + +type AdminRateLimit struct { + Tps rate.Limit `json:"tps"` + Burst int `json:"burst"` +} + +func (f *AdminRateLimit) GetTps() rate.Limit { + return f.Tps +} + +func (f *AdminRateLimit) GetBurst() int { + return f.Burst +} + // This configuration is the base configuration for all scheduler-related set-up. type SchedulerConfig struct { EventSchedulerConfig EventSchedulerConfig `json:"eventScheduler"` @@ -103,6 +240,22 @@ type SchedulerConfig struct { ReconnectDelaySeconds int `json:"reconnectDelaySeconds"` } +func (s *SchedulerConfig) GetEventSchedulerConfig() EventSchedulerConfig { + return s.EventSchedulerConfig +} + +func (s *SchedulerConfig) GetWorkflowExecutorConfig() WorkflowExecutorConfig { + return s.WorkflowExecutorConfig +} + +func (s *SchedulerConfig) GetReconnectAttempts() int { + return s.ReconnectAttempts +} + +func (s *SchedulerConfig) GetReconnectDelaySeconds() int { + return s.ReconnectDelaySeconds +} + // Configuration specific to setting up signed urls. type SignedURL struct { // The amount of time for which a signed URL is valid. diff --git a/flyteadmin/scheduler/core/doc.go b/flyteadmin/scheduler/core/doc.go new file mode 100644 index 0000000000..e909d8f181 --- /dev/null +++ b/flyteadmin/scheduler/core/doc.go @@ -0,0 +1,9 @@ +// Package core +// This is core package for the scheduler which includes +// - scheduler interface +// - scheduler implementation using gocron https://github.com/robfig/cron +// - updater which updates the schedules in the scheduler by reading periodically from the DB +// - snapshot runner which snapshot the schedules with there last exec times so that it can be used as check point +// in case of a crash. After a crash the scheduler replays the schedules from the last recorded snapshot. +// It relies on the admin idempotency aspect to fail executions if the execution with a scheduled time already exists with it. +package core diff --git a/flyteadmin/scheduler/core/gocron_job.go b/flyteadmin/scheduler/core/gocron_job.go new file mode 100644 index 0000000000..e40a5af32a --- /dev/null +++ b/flyteadmin/scheduler/core/gocron_job.go @@ -0,0 +1,41 @@ +package core + +import ( + "context" + "fmt" + "runtime/pprof" + "time" + + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flytestdlib/contextutils" + "github.com/flyteorg/flytestdlib/logger" + + "github.com/robfig/cron/v3" +) + +// GoCronJob this provides a wrapper around the go cron libraries job function. +type GoCronJob struct { + ctx context.Context + nameOfSchedule string + schedule models.SchedulableEntity + funcWithSchedule TimedFuncWithSchedule + lastTime *time.Time + catchupFromTime *time.Time + entryID cron.EntryID +} + +func (g *GoCronJob) Run(t time.Time) { + // Create job function label to be used for creating the child context + jobFuncLabel := fmt.Sprintf("jobfunc-%v", g.nameOfSchedule) + jobFuncCtxWithLabel := contextutils.WithGoroutineLabel(g.ctx, jobFuncLabel) + // TODO : add panic counter metric + + pprof.SetGoroutineLabels(jobFuncCtxWithLabel) + if err := g.funcWithSchedule(jobFuncCtxWithLabel, g.schedule, t); err != nil { + logger.Errorf(jobFuncCtxWithLabel, "Got error while scheduling %v", err) + } + // Update the lastTime only if new trigger time t is after lastTime. + if g.lastTime == nil || g.lastTime.Before(t) { + g.lastTime = &t + } +} diff --git a/flyteadmin/scheduler/core/gocron_scheduler.go b/flyteadmin/scheduler/core/gocron_scheduler.go new file mode 100644 index 0000000000..cfc3253e04 --- /dev/null +++ b/flyteadmin/scheduler/core/gocron_scheduler.go @@ -0,0 +1,337 @@ +package core + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/flyteorg/flyteadmin/scheduler/executor" + "github.com/flyteorg/flyteadmin/scheduler/identifier" + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flyteadmin/scheduler/snapshoter" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flytestdlib/logger" + "github.com/flyteorg/flytestdlib/promutils" + + "github.com/prometheus/client_golang/prometheus" + "github.com/robfig/cron/v3" + "golang.org/x/time/rate" +) + +// goCronMetrics mertrics recorded for go cron. +type goCronMetrics struct { + Scope promutils.Scope + JobFuncPanicCounter prometheus.Counter + JobScheduledFailedCounter prometheus.Counter + CatchupErrCounter prometheus.Counter +} + +// GoCronScheduler this provides a scheduler functionality using the https://github.com/robfig/cron library. +type GoCronScheduler struct { + cron *cron.Cron + jobStore sync.Map + metrics goCronMetrics + rateLimiter *rate.Limiter + executor executor.Executor + snapshot snapshoter.Snapshot +} + +func (g *GoCronScheduler) GetTimedFuncWithSchedule() TimedFuncWithSchedule { + return func(jobCtx context.Context, schedule models.SchedulableEntity, scheduleTime time.Time) error { + _ = g.rateLimiter.Wait(jobCtx) + err := g.executor.Execute(jobCtx, scheduleTime, schedule) + if err != nil { + logger.Errorf(jobCtx, "unable to fire the schedule %+v at %v time due to %v", schedule, scheduleTime, + err) + } + return err + } +} + +func (g *GoCronScheduler) BootStrapSchedulesFromSnapShot(ctx context.Context, schedules []models.SchedulableEntity, + snapshot snapshoter.Snapshot) { + for _, s := range schedules { + if *s.Active { + funcRef := g.GetTimedFuncWithSchedule() + nameOfSchedule := identifier.GetScheduleName(ctx, s) + // Initialize the lastExectime as the updatedAt time + // Assumption here that schedule was activated and that the 0th execution of the schedule + // which will be used as a reference + lastExecTime := &s.UpdatedAt + + fromSnapshot := snapshot.GetLastExecutionTime(nameOfSchedule) + // Use the latest time if available in the snapshot + if fromSnapshot != nil && fromSnapshot.After(s.UpdatedAt) { + lastExecTime = fromSnapshot + } + err := g.ScheduleJob(ctx, s, funcRef, lastExecTime) + if err != nil { + g.metrics.JobScheduledFailedCounter.Inc() + logger.Errorf(ctx, "unable to register the schedule %+v due to %v", s, err) + } + } + } +} + +func (g *GoCronScheduler) UpdateSchedules(ctx context.Context, schedules []models.SchedulableEntity) { + for _, s := range schedules { + // Schedule or Deschedule job from the scheduler based on the activation status + if !*s.Active { + g.DeScheduleJob(ctx, s) + } else { + // Get the TimedFuncWithSchedule + funcRef := g.GetTimedFuncWithSchedule() + err := g.ScheduleJob(ctx, s, funcRef, nil) + if err != nil { + g.metrics.JobScheduledFailedCounter.Inc() + logger.Errorf(ctx, "unable to register the schedule %+v due to %v", s, err) + } + } + } // Done iterating over all the read schedules +} + +func (g *GoCronScheduler) CalculateSnapshot(ctx context.Context) snapshoter.Snapshot { + snapshot := g.snapshot.Create() + g.jobStore.Range(func(key, value interface{}) bool { + job := value.(*GoCronJob) + scheduleIdentifier := key.(string) + if job.lastTime != nil { + snapshot.UpdateLastExecutionTime(scheduleIdentifier, job.lastTime) + } + return true + }) + return snapshot +} + +func (g *GoCronScheduler) ScheduleJob(ctx context.Context, schedule models.SchedulableEntity, + funcWithSchedule TimedFuncWithSchedule, lastTime *time.Time) error { + + nameOfSchedule := identifier.GetScheduleName(ctx, schedule) + + if _, ok := g.jobStore.Load(nameOfSchedule); ok { + logger.Debugf(ctx, "Job already exists in the map for name %v with schedule %+v", + nameOfSchedule, schedule) + return nil + } + + // Update the catchupFrom time as the lastTime. + // Here lastTime is passed to this function only from BootStrapSchedulesFromSnapShot which is during bootup + // Once initialized we wont be changing the catchupTime until the next boot + job := &GoCronJob{nameOfSchedule: nameOfSchedule, schedule: schedule, funcWithSchedule: funcWithSchedule, + catchupFromTime: lastTime, ctx: ctx} + + // Define the timed job function to be used for the callback at the scheduled time + //jobFunc := job.GetTimedFunc(ctx, g.metrics) + + if len(job.schedule.CronExpression) > 0 { + err := g.AddCronJob(ctx, job) + if err != nil { + logger.Errorf(ctx, "failed to add cron schedule %+v due to %v", schedule, err) + return err + } + } else { + err := g.AddFixedIntervalJob(ctx, job) + if err != nil { + logger.Errorf(ctx, "failed to add fixed rate schedule %+v due to %v", schedule, err) + return err + } + } + // Store only if there are no errors. + g.jobStore.Store(nameOfSchedule, job) + return nil +} + +func (g *GoCronScheduler) DeScheduleJob(ctx context.Context, schedule models.SchedulableEntity) { + nameOfSchedule := identifier.GetScheduleName(ctx, schedule) + if _, ok := g.jobStore.Load(nameOfSchedule); !ok { + logger.Debugf(ctx, "Job doesn't exists in the map for name %v with schedule %+v "+ + " and hence already removed", nameOfSchedule, schedule) + return + } + val, _ := g.jobStore.Load(nameOfSchedule) + jobWrapper := val.(*GoCronJob) + + s := jobWrapper.schedule + if len(s.CronExpression) > 0 { + g.RemoveCronJob(ctx, jobWrapper) + } else { + g.RemoveFixedIntervalJob(ctx, jobWrapper) + } + + // Delete it from the job store + g.jobStore.Delete(nameOfSchedule) +} + +func (g *GoCronScheduler) CatchupAll(ctx context.Context, until time.Time) bool { + failed := false + g.jobStore.Range(func(key, value interface{}) bool { + job := value.(*GoCronJob) + var fromTime *time.Time + if !*job.schedule.Active { + logger.Debugf(ctx, "schedule %+v was inactive during catchup", job.schedule) + return true + } + + fromTime = job.catchupFromTime + if fromTime != nil { + logger.Infof(ctx, "catching up schedule %+v from %v to %v", job.schedule, fromTime, until) + err := g.CatchUpSingleSchedule(ctx, job.schedule, *fromTime, until) + if err != nil { + // stop the iteration since one of the catchups failed + failed = true + return false + } + logger.Infof(ctx, "caught up successfully on the schedule %+v from %v to %v", job.schedule, fromTime, until) + } + return true + }) + return !failed +} + +func (g *GoCronScheduler) CatchUpSingleSchedule(ctx context.Context, s models.SchedulableEntity, fromTime time.Time, toTime time.Time) error { + var catchUpTimes []time.Time + var err error + catchUpTimes, err = GetCatchUpTimes(s, fromTime, toTime) + if err != nil { + return err + } + var catchupTime time.Time + for _, catchupTime = range catchUpTimes { + _ = g.rateLimiter.Wait(ctx) + err := g.executor.Execute(ctx, catchupTime, s) + if err != nil { + g.metrics.CatchupErrCounter.Inc() + logger.Errorf(ctx, "unable to fire the schedule %+v at %v time due to %v", s, catchupTime, err) + return err + } + } + return nil +} + +func GetCatchUpTimes(s models.SchedulableEntity, from time.Time, to time.Time) ([]time.Time, error) { + var scheduledTimes []time.Time + currFrom := from + for currFrom.Before(to) { + scheduledTime, err := GetScheduledTime(s, currFrom) + if err != nil { + return nil, err + } + scheduledTimes = append(scheduledTimes, scheduledTime) + currFrom = scheduledTime + } + return scheduledTimes, nil +} + +func GetScheduledTime(s models.SchedulableEntity, fromTime time.Time) (time.Time, error) { + if len(s.CronExpression) > 0 { + return getCronScheduledTime(s.CronExpression, fromTime) + } + return getFixedIntervalScheduledTime(s.Unit, s.FixedRateValue, fromTime) +} + +func getCronScheduledTime(cronString string, fromTime time.Time) (time.Time, error) { + sched, err := cron.ParseStandard(cronString) + if err != nil { + return time.Time{}, err + } + return sched.Next(fromTime), nil +} + +func getFixedIntervalScheduledTime(unit admin.FixedRateUnit, fixedRateValue uint32, fromTime time.Time) (time.Time, error) { + d, err := getFixedRateDurationFromSchedule(unit, fixedRateValue) + if err != nil { + return time.Time{}, err + } + fixedRateSchedule := cron.ConstantDelaySchedule{Delay: d} + return fixedRateSchedule.Next(fromTime), nil +} + +func (g *GoCronScheduler) AddFixedIntervalJob(ctx context.Context, job *GoCronJob) error { + d, err := getFixedRateDurationFromSchedule(job.schedule.Unit, job.schedule.FixedRateValue) + if err != nil { + return err + } + + //nolint + var jobFunc cron.TimedFuncJob + jobFunc = job.Run + + g.cron.ScheduleTimedJob(cron.ConstantDelaySchedule{Delay: d}, jobFunc) + logger.Infof(ctx, "successfully added the fixed rate schedule %s to the scheduler for schedule %+v", + job.nameOfSchedule, job.schedule) + + return nil +} + +func (g *GoCronScheduler) RemoveFixedIntervalJob(ctx context.Context, job *GoCronJob) { + g.cron.Remove(job.entryID) + logger.Infof(ctx, "successfully removed the schedule %s from scheduler for schedule %+v", + job.nameOfSchedule, job.schedule) +} + +func (g *GoCronScheduler) AddCronJob(ctx context.Context, job *GoCronJob) error { + //nolint + var jobFunc cron.TimedFuncJob + jobFunc = job.Run + + entryID, err := g.cron.AddTimedJob(job.schedule.CronExpression, jobFunc) + // Update the enttry id in the job which is handle to be used for removal + job.entryID = entryID + if err == nil { + logger.Infof(ctx, "successfully added the schedule %s to the scheduler for schedule %+v", + job.nameOfSchedule, job.schedule) + } + return err +} + +func (g *GoCronScheduler) RemoveCronJob(ctx context.Context, job *GoCronJob) { + g.cron.Remove(job.entryID) + logger.Infof(ctx, "successfully removed the schedule %s from scheduler for schedue %+v", + job.nameOfSchedule, job.schedule) + +} + +func getFixedRateDurationFromSchedule(unit admin.FixedRateUnit, fixedRateValue uint32) (time.Duration, error) { + d := time.Duration(fixedRateValue) + switch unit { + case admin.FixedRateUnit_MINUTE: + d = d * time.Minute + case admin.FixedRateUnit_HOUR: + d = d * time.Hour + case admin.FixedRateUnit_DAY: + d = d * time.Hour * 24 + default: + return -1, fmt.Errorf("unsupported unit %v for fixed rate scheduling ", unit) + } + return d, nil +} + +func NewGoCronScheduler(ctx context.Context, schedules []models.SchedulableEntity, scope promutils.Scope, + snapshot snapshoter.Snapshot, rateLimiter *rate.Limiter, executor executor.Executor) Scheduler { + // Create the new cron scheduler and start it off + c := cron.New() + c.Start() + scheduler := &GoCronScheduler{ + cron: c, + jobStore: sync.Map{}, + metrics: getCronMetrics(scope), + rateLimiter: rateLimiter, + executor: executor, + snapshot: snapshot, + } + scheduler.BootStrapSchedulesFromSnapShot(ctx, schedules, snapshot) + return scheduler +} + +func getCronMetrics(scope promutils.Scope) goCronMetrics { + return goCronMetrics{ + Scope: scope, + JobFuncPanicCounter: scope.MustNewCounter("job_func_panic_counter", + "count of crashes for the job functions executed by the scheduler"), + JobScheduledFailedCounter: scope.MustNewCounter("job_schedule_failed_counter", + "count of scheduling failures by the scheduler"), + CatchupErrCounter: scope.MustNewCounter("catchup_error_counter", + "count of unsuccessful attempts to catchup on the schedules"), + } +} diff --git a/flyteadmin/scheduler/core/scheduler.go b/flyteadmin/scheduler/core/scheduler.go new file mode 100644 index 0000000000..b0ede2550e --- /dev/null +++ b/flyteadmin/scheduler/core/scheduler.go @@ -0,0 +1,28 @@ +package core + +import ( + "context" + "time" + + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flyteadmin/scheduler/snapshoter" +) + +type TimedFuncWithSchedule func(ctx context.Context, s models.SchedulableEntity, t time.Time) error + +// Scheduler is the main scheduler interfaces for scheduling/descheduling jobs, updating the schedules, +// calculating snapshot of the schedules , bootstrapping the scheduler from the snapshot as well as the catcup functionality +type Scheduler interface { + // ScheduleJob allows to schedule a job using the implemented scheduler + ScheduleJob(ctx context.Context, s models.SchedulableEntity, f TimedFuncWithSchedule, lastT *time.Time) error + // DeScheduleJob allows to remove a scheduled job using the implemented scheduler + DeScheduleJob(ctx context.Context, s models.SchedulableEntity) + // BootStrapSchedulesFromSnapShot allows to initialize the scheduler from a previous snapshot of the schedule executions + BootStrapSchedulesFromSnapShot(ctx context.Context, schedules []models.SchedulableEntity, snapshot snapshoter.Snapshot) + // UpdateSchedules updates all the schedules in the schedulers job store + UpdateSchedules(ctx context.Context, s []models.SchedulableEntity) + // CalculateSnapshot creates a snapshot of the existing state of the schedules run by the scheduler which can be used in case of failure. + CalculateSnapshot(ctx context.Context) snapshoter.Snapshot + // CatchupAll catches up all the schedules in the schedulers job store to the until time + CatchupAll(ctx context.Context, until time.Time) bool +} diff --git a/flyteadmin/scheduler/core/snapshot_runner.go b/flyteadmin/scheduler/core/snapshot_runner.go new file mode 100644 index 0000000000..55f6e31642 --- /dev/null +++ b/flyteadmin/scheduler/core/snapshot_runner.go @@ -0,0 +1,26 @@ +package core + +import ( + "context" + + sImpl "github.com/flyteorg/flyteadmin/scheduler/snapshoter" +) + +const snapShotVersion = 1 + +// Snapshotrunner allows the ability to snapshot the scheduler state and save it to the db. +// Its invoked periodically from the scheduledExecutor +type Snapshotrunner struct { + snapshoter sImpl.Persistence + scheduler Scheduler +} + +func (u Snapshotrunner) Run(ctx context.Context) { + snapshot := u.scheduler.CalculateSnapshot(ctx) + snapshotWriter := &sImpl.VersionedSnapshot{Version: snapShotVersion} + u.snapshoter.Save(ctx, snapshotWriter, snapshot) +} + +func NewSnapshotRunner(snapshoter sImpl.Persistence, scheduler Scheduler) Snapshotrunner { + return Snapshotrunner{snapshoter: snapshoter, scheduler: scheduler} +} diff --git a/flyteadmin/scheduler/core/updater.go b/flyteadmin/scheduler/core/updater.go new file mode 100644 index 0000000000..fa8af26ac0 --- /dev/null +++ b/flyteadmin/scheduler/core/updater.go @@ -0,0 +1,28 @@ +package core + +import ( + "context" + + "github.com/flyteorg/flyteadmin/scheduler/repositories" + "github.com/flyteorg/flytestdlib/logger" +) + +// Updater this updates the scheduler with the latest state of schedules from the DB. +type Updater struct { + db repositories.SchedulerRepoInterface + scheduler Scheduler +} + +func (u Updater) UpdateGoCronSchedules(ctx context.Context) { + schedules, err := u.db.SchedulableEntityRepo().GetAll(ctx) + if err != nil { + logger.Errorf(ctx, "Failed to fetch the schedules in this round due to %v", err) + return + } + u.scheduler.UpdateSchedules(ctx, schedules) +} + +func NewUpdater(db repositories.SchedulerRepoInterface, + scheduler Scheduler) Updater { + return Updater{db: db, scheduler: scheduler} +} diff --git a/flyteadmin/scheduler/dbapi/doc.go b/flyteadmin/scheduler/dbapi/doc.go new file mode 100644 index 0000000000..ee30331470 --- /dev/null +++ b/flyteadmin/scheduler/dbapi/doc.go @@ -0,0 +1,5 @@ +// Package dbapi +// This package implements the event scheduler interface which is called whenever a launchplan is enabled or disabled. +// Using this api the launchplan manager chooses to activate or deactivate a schedule if it has a cron or fixed rate +// schedule. +package dbapi diff --git a/flyteadmin/scheduler/dbapi/event_scheduler_impl.go b/flyteadmin/scheduler/dbapi/event_scheduler_impl.go new file mode 100644 index 0000000000..030f2515a3 --- /dev/null +++ b/flyteadmin/scheduler/dbapi/event_scheduler_impl.go @@ -0,0 +1,87 @@ +package dbapi + +import ( + "context" + "fmt" + + "github.com/flyteorg/flyteadmin/pkg/async/schedule/interfaces" + scheduleInterfaces "github.com/flyteorg/flyteadmin/pkg/async/schedule/interfaces" + runtimeInterfaces "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" + "github.com/flyteorg/flyteadmin/scheduler/repositories" + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + "github.com/flyteorg/flytestdlib/logger" +) + +// eventScheduler used for saving the scheduler entries after launch plans are enabled or disabled. +type eventScheduler struct { + db repositories.SchedulerRepoInterface +} + +func (s *eventScheduler) CreateScheduleInput(ctx context.Context, appConfig *runtimeInterfaces.SchedulerConfig, + identifier core.Identifier, schedule *admin.Schedule) (interfaces.AddScheduleInput, error) { + + addScheduleInput := scheduleInterfaces.AddScheduleInput{ + Identifier: identifier, + ScheduleExpression: *schedule, + } + return addScheduleInput, nil +} + +func (s *eventScheduler) AddSchedule(ctx context.Context, input interfaces.AddScheduleInput) error { + logger.Infof(ctx, "Received call to add schedule [%+v]", input) + var cronString string + var fixedRateValue uint32 + var fixedRateUnit admin.FixedRateUnit + switch v := input.ScheduleExpression.GetScheduleExpression().(type) { + case *admin.Schedule_Rate: + fixedRateValue = v.Rate.Value + fixedRateUnit = v.Rate.Unit + case *admin.Schedule_CronSchedule: + cronString = v.CronSchedule.Schedule + default: + return fmt.Errorf("failed adding schedule for unknown schedule expression type %v", v) + } + active := true + modelInput := models.SchedulableEntity{ + CronExpression: cronString, + FixedRateValue: fixedRateValue, + Unit: fixedRateUnit, + KickoffTimeInputArg: input.ScheduleExpression.KickoffTimeInputArg, + Active: &active, + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: input.Identifier.Project, + Domain: input.Identifier.Domain, + Name: input.Identifier.Name, + Version: input.Identifier.Version, + }, + } + err := s.db.SchedulableEntityRepo().Activate(ctx, modelInput) + if err != nil { + return err + } + logger.Infof(ctx, "Activated scheduled entity for %v ", input) + return nil +} + +func (s *eventScheduler) RemoveSchedule(ctx context.Context, input interfaces.RemoveScheduleInput) error { + logger.Infof(ctx, "Received call to remove schedule [%+v]. Will deactivate it in the scheduler", input.Identifier) + + err := s.db.SchedulableEntityRepo().Deactivate(ctx, models.SchedulableEntityKey{ + Project: input.Identifier.Project, + Domain: input.Identifier.Domain, + Name: input.Identifier.Name, + Version: input.Identifier.Version, + }) + + if err != nil { + return err + } + logger.Infof(ctx, "Deactivated the schedule %v in the scheduler", input) + return nil +} + +func New(db repositories.SchedulerRepoInterface) interfaces.EventScheduler { + return &eventScheduler{db: db} +} diff --git a/flyteadmin/scheduler/dbapi/event_scheduler_impl_test.go b/flyteadmin/scheduler/dbapi/event_scheduler_impl_test.go new file mode 100644 index 0000000000..3d38ef5fdd --- /dev/null +++ b/flyteadmin/scheduler/dbapi/event_scheduler_impl_test.go @@ -0,0 +1,141 @@ +package dbapi + +import ( + "context" + "testing" + + "github.com/flyteorg/flyteadmin/pkg/async/schedule/interfaces" + "github.com/flyteorg/flyteadmin/pkg/repositories" + "github.com/flyteorg/flyteadmin/pkg/repositories/mocks" + schedMocks "github.com/flyteorg/flyteadmin/scheduler/repositories/mocks" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" +) + +var ( + db repositories.RepositoryInterface +) + +func setupEventScheduler() interfaces.EventScheduler { + db = mocks.NewMockRepository() + return New(db) +} + +func TestCreateScheduleInput(t *testing.T) { + eventScheduler := setupEventScheduler() + schedule := &admin.Schedule{ + ScheduleExpression: &admin.Schedule_CronSchedule{ + CronSchedule: &admin.CronSchedule{ + Schedule: "*/1 * * * *", + }, + }, + KickoffTimeInputArg: "kickoff_time", + } + addScheduleInput, err := eventScheduler.CreateScheduleInput(context.Background(), nil, core.Identifier{ + Project: "project", + Domain: "domain", + Name: "scheduled_wroflow", + Version: "v1", + }, schedule) + assert.Nil(t, err) + assert.NotNil(t, addScheduleInput) +} + +func TestRemoveSchedule(t *testing.T) { + eventScheduler := setupEventScheduler() + + scheduleEntitiesRepo := db.SchedulableEntityRepo().(*schedMocks.SchedulableEntityRepoInterface) + scheduleEntitiesRepo.OnDeactivateMatch(mock.Anything, mock.Anything).Return(nil) + + err := eventScheduler.RemoveSchedule(context.Background(), interfaces.RemoveScheduleInput{ + Identifier: core.Identifier{ + Project: "project", + Domain: "domain", + Name: "scheduled_wroflow", + Version: "v1", + }, + }) + assert.Nil(t, err) +} + +func TestAddSchedule(t *testing.T) { + t.Run("schedule_rate", func(t *testing.T) { + eventScheduler := setupEventScheduler() + schedule := admin.Schedule{ + ScheduleExpression: &admin.Schedule_Rate{ + Rate: &admin.FixedRate{ + Value: 1, + Unit: admin.FixedRateUnit_MINUTE, + }, + }, + KickoffTimeInputArg: "kickoff_time", + } + + scheduleEntitiesRepo := db.SchedulableEntityRepo().(*schedMocks.SchedulableEntityRepoInterface) + scheduleEntitiesRepo.OnActivateMatch(mock.Anything, mock.Anything).Return(nil) + + err := eventScheduler.AddSchedule(context.Background(), interfaces.AddScheduleInput{ + Identifier: core.Identifier{ + Project: "project", + Domain: "domain", + Name: "scheduled_wroflow", + Version: "v1", + }, + ScheduleExpression: schedule, + }) + assert.Nil(t, err) + }) + + t.Run("cron_schedule", func(t *testing.T) { + eventScheduler := setupEventScheduler() + schedule := admin.Schedule{ + ScheduleExpression: &admin.Schedule_CronSchedule{ + CronSchedule: &admin.CronSchedule{ + Schedule: "*/1 * * * *", + }, + }, + KickoffTimeInputArg: "kickoff_time", + } + + scheduleEntitiesRepo := db.SchedulableEntityRepo().(*schedMocks.SchedulableEntityRepoInterface) + scheduleEntitiesRepo.OnActivateMatch(mock.Anything, mock.Anything).Return(nil) + + err := eventScheduler.AddSchedule(context.Background(), interfaces.AddScheduleInput{ + Identifier: core.Identifier{ + Project: "project", + Domain: "domain", + Name: "scheduled_wroflow", + Version: "v1", + }, + ScheduleExpression: schedule, + }) + assert.Nil(t, err) + }) + + t.Run("cron_expression_unsupported", func(t *testing.T) { + eventScheduler := setupEventScheduler() + schedule := admin.Schedule{ + ScheduleExpression: &admin.Schedule_CronExpression{ + CronExpression: "* */1 * * * *", + }, + KickoffTimeInputArg: "kickoff_time", + } + + scheduleEntitiesRepo := db.SchedulableEntityRepo().(*schedMocks.SchedulableEntityRepoInterface) + scheduleEntitiesRepo.OnActivateMatch(mock.Anything, mock.Anything).Return(nil) + + err := eventScheduler.AddSchedule(context.Background(), interfaces.AddScheduleInput{ + Identifier: core.Identifier{ + Project: "project", + Domain: "domain", + Name: "scheduled_wroflow", + Version: "v1", + }, + ScheduleExpression: schedule, + }) + assert.NotNil(t, err) + }) +} diff --git a/flyteadmin/scheduler/doc.go b/flyteadmin/scheduler/doc.go new file mode 100644 index 0000000000..48691e35e1 --- /dev/null +++ b/flyteadmin/scheduler/doc.go @@ -0,0 +1,100 @@ +// Package scheduler +// Flyte scheduler implementation that allows to schedule fixed rate and cron schedules on sandbox deployment +// Scheduler has two components +// 1] Schedule management +// This component is part of the pkg/async/schedule/flytescheduler package +// Role of this component is to create / activate / deactivate schedules +// The above actions are exposed through launchplan activation/deactivation api's and donot have separate controls. +// Whenever a launchplan with a schedule is activated, a new schedule entry is created in the datastore +// On deactivation the created scheduled and launchplan is deactivated through a flag +// Atmost one launchplan is active at any moment across its various versions and same semantics apply for the +// schedules as well. +// 2] Scheduler +// This component is a singleton and has its source in the current folder and is responsible for reading the schedules +// from the DB and running them at the cadence defined by the schedule +// The lowest granularity supported is minutes for scheduling through cron and fixed rate scheduler +// The scheduler should be running in one replica , two at the most during redeployment. Multiple replicas will just +// duplicate the work since each execution for a scheduleTime will have unique identifier derived from schedule name +// and time of the schedule. The idempotency aspect of the admin for same identifier prevents duplication on the admin +// side. +// The scheduler runs continuously in a loop reading the updated schedule entries in the data store and adding or removing +// the schedules. Removing a schedule will not alter in-flight go-routines launched by the scheduler. +// Thus the behavior of these executions is undefined (most probably will get executed). +// Sub components: +// a) Snapshoter +// This component is responsible for writing the snapshot state of all the schedules at a regular cadence to a +// persistent store. The current implementation uses DB to store the GOB format of the snapshot which is versioned. +// The snapshot is map[string]time.Time which stores a map of schedules names to there last execution times +// During bootup the snapshot is bootstraped from the data store and loaded in memory +// The Scheduler use this snapshot to schedule any missed schedules. +// +// We cannot use global snapshot time since each time snapshot doesn't contain information on how many schedules +// were executed till that point in time. And hence the need to maintain map[string]time.Time of schedules to there +// lastExectimes +// In the future we may support global snapshots, such that we can record the last successfully considered +// time for each schedule and select the lowest as the watermark. currently since the underlying scheduler +// does not expose the last considered time, we just calculate our own watermark per schedule. +// b) CatchupAll-System : +// This component runs at bootup and catches up all the schedules to there current time.Now() +// The scheduler is not run until all the schedules have been caught up. +// The current design is also not to snapshot until all the schedules are caught up. +// This might be drawback in case catch up runs for a long time and hasn't been snapshotted.(reassess) +// c) GOCronWrapper : +// This component is responsible for locking in the time for the scheduled job to be invoked and adding those +// to the cron scheduler. Right now this uses https://github.com/robfig/cron/v3 framework for fixed rate and cron +// schedules +// The scheduler provides ability to schedule a function with scheduleTime parameter. This is useful to know +// once the scheduled function is invoked that what scheduled time is this invocation for. +// This scheduler supports standard cron scheduling which has 5 fields +// https://en.wikipedia.org/wiki/Cron +// It requires 5 entries +// representing: minute, hour, day of month, month and day of week, in that order. +// +// It accepts +// - Standard crontab specs, e.g. "* * * * ?" +// - Descriptors, e.g. "@midnight", "@every 1h30m" +// d) Job function : +// The job function accepts the scheduleTime and the schedule which is used for creating an execution request +// to the admin. Each job function is tied to schedule which gets executed in separate go routine by the gogf +// framework in according the schedule cadence. + +// Failure scenarios: +// a) Case when the schedule is activated but launchplan is not. Ideally admin should throw an error here but it +// allows to launch the scheduled execution.Bug marked here https://github.com/flyteorg/flyte/issues/1354 +// Once this issue is fixed, then the scheduler behavior would be find the specific new error defined for this +// scenario.Eg : LaunchPlanNotActivated and skip the scheduled time execution after the failure. +// It will continue to hit the admin with new future scheduled times where the problem can get fixed for the launchplan. +// Hence its expected to not schedule the executions during such a discrepancy. The user need to reactivate the +// launchplan to fix the issue. +// eg: activate launch plan L1 with version V1 and create schedule. (One Api call) +// - Create schedule for L1,V1 succeeds +// - Activate launchplan fails for L1, V1 +// - API return failure +// Reactivate the launchplan by calling the API again to fix the discrepancy between the schedule and launchplan +// During the discrepancy the executions won't be scheduled on admin once the bug(1354) is fixed. +// +// b) Case when scheduled time T1 execution fails. The goroutine executing for T1 will go through 30 repetitions before +// aborting the run. In such a scenario its possible that furture scheduled time T2 succeeds and gets executed successfully +// by the admin. i.e admin could execute the schedules in this order T2, T1. This is rare case though +// +// c) Case when the scheduler goes down then once it comes back up it will run catch up on all the schedules using +// the last snapshoted timestamp to time.Now() +// +// d) Case when the snapshoter fails to record the last execution at T2 but has recorded at T1, where T1 < T2 , +// then new schedules would be created from T1 -> time.Now() during catchup and the idempotency aspect of the admin +// will take care of not rescheduling the already scheduled execution from T1 -> Crash time +// +// e) Case when the scheduler is down and the old schedule gets deactivated, then during catchup the scheduler won't +// create executions for it. It doesn't matter how many activation/deactivations have happened during the downtime, +// but the scheduler will go by the recent activation state +// +// f) Similarly in case of scheduler being down and an old schedule gets activated,then during catchup the scheduler +// would run catch from updated_at timestamp till now. It doesn't matter how many activation/deactivations have +// happened during the downtime, but the scheduler will go by the recent activation state. +// +// g) Case there are multiple pod running with the scheduler , then we rely on the idempotency aspect of the executions +// which have a identifier derived from the hash of schedule time + launch plan identifier which would remain the same +// any other instance of the scheduler picks up and admin will return the AlreadyExists error. +// + +package scheduler diff --git a/flyteadmin/scheduler/executor/doc.go b/flyteadmin/scheduler/executor/doc.go new file mode 100644 index 0000000000..1dc13fa3f5 --- /dev/null +++ b/flyteadmin/scheduler/executor/doc.go @@ -0,0 +1,5 @@ +// Package executor +// This package provides an interface to talk to admin for scheduled executions. +// The implementation constructs a request using the schedule details and the passed in schedule time to be sent to admin +// for execution +package executor diff --git a/flyteadmin/scheduler/executor/executor.go b/flyteadmin/scheduler/executor/executor.go new file mode 100644 index 0000000000..8b0bc3ab87 --- /dev/null +++ b/flyteadmin/scheduler/executor/executor.go @@ -0,0 +1,14 @@ +package executor + +import ( + "context" + "time" + + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" +) + +// Executor allows the ability to create scheduled executions on admin +type Executor interface { + // Execute sends a scheduled execution request to admin + Execute(ctx context.Context, scheduledTime time.Time, s models.SchedulableEntity) error +} diff --git a/flyteadmin/scheduler/executor/executor_impl.go b/flyteadmin/scheduler/executor/executor_impl.go new file mode 100644 index 0000000000..f46dc35421 --- /dev/null +++ b/flyteadmin/scheduler/executor/executor_impl.go @@ -0,0 +1,146 @@ +package executor + +import ( + "context" + "strings" + "time" + + "github.com/flyteorg/flyteadmin/scheduler/identifier" + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/service" + "github.com/flyteorg/flytestdlib/logger" + "github.com/flyteorg/flytestdlib/promutils" + + "github.com/prometheus/client_golang/prometheus" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/types/known/timestamppb" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" +) + +// executor allows to call the admin with scheduled execution +type executor struct { + adminServiceClient service.AdminServiceClient + metrics executorMetrics +} + +type executorMetrics struct { + Scope promutils.Scope + FailedExecutionCounter prometheus.Counter + SuccessfulExecutionCounter prometheus.Counter +} + +func (w *executor) Execute(ctx context.Context, scheduledTime time.Time, s models.SchedulableEntity) error { + + literalsInputMap := map[string]*core.Literal{} + // Only add kickoff time input arg for cron based schedules + if len(s.CronExpression) > 0 { + literalsInputMap[s.KickoffTimeInputArg] = &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_Primitive{ + Primitive: &core.Primitive{ + Value: &core.Primitive_Datetime{ + Datetime: timestamppb.New(scheduledTime), + }, + }, + }, + }, + }, + } + } + + // Making the identifier deterministic using the hash of the identifier and scheduled time + executionIdentifier, err := identifier.GetExecutionIdentifier(ctx, core.Identifier{ + Project: s.Project, + Domain: s.Domain, + Name: s.Name, + Version: s.Version, + }, scheduledTime) + + if err != nil { + logger.Error(ctx, "failed to generate execution identifier for schedule %+v due to %v", s, err) + return err + } + + executionRequest := &admin.ExecutionCreateRequest{ + Project: s.Project, + Domain: s.Domain, + Name: "f" + strings.ReplaceAll(executionIdentifier.String(), "-", "")[:19], + Spec: &admin.ExecutionSpec{ + LaunchPlan: &core.Identifier{ + ResourceType: core.ResourceType_LAUNCH_PLAN, + Project: s.Project, + Domain: s.Domain, + Name: s.Name, + Version: s.Version, + }, + Metadata: &admin.ExecutionMetadata{ + Mode: admin.ExecutionMetadata_SCHEDULED, + ScheduledAt: timestamppb.New(scheduledTime), + }, + // No dynamic notifications are configured either. + }, + // No additional inputs beyond the to-be-filled-out kickoff time arg are specified. + Inputs: &core.LiteralMap{ + Literals: literalsInputMap, + }, + } + if !*s.Active { + // no longer active + logger.Debugf(ctx, "schedule %+v is no longer active", s) + return nil + } + + // Do maximum of 30 retries on failures with constant backoff factor + opts := wait.Backoff{Duration: 3000, Factor: 2.0, Steps: 30} + err = retry.OnError(opts, + func(err error) bool { + // For idempotent behavior ignore the AlreadyExists error which happens if we try to schedule a launchplan + // for execution at the same time which is already available in admin. + // This is possible since idempotency guarantees are using the schedule time and the identifier + if grpcError := status.Code(err); grpcError == codes.AlreadyExists { + logger.Debugf(ctx, "duplicate schedule %+v already exists for schedule", s) + return false + } + w.metrics.FailedExecutionCounter.Inc() + logger.Error(ctx, "failed to create execution create request %+v due to %v", executionRequest, err) + // TODO: Handle the case when admin launch plan state is archived but the schedule is active. + // After this bug is fixed in admin https://github.com/flyteorg/flyte/issues/1354 + return true + }, + func() error { + _, execErr := w.adminServiceClient.CreateExecution(context.Background(), executionRequest) + return execErr + }, + ) + if err != nil && status.Code(err) != codes.AlreadyExists { + logger.Error(ctx, "failed to create execution create request %+v due to %v after all retries", executionRequest, err) + return err + } + w.metrics.SuccessfulExecutionCounter.Inc() + logger.Infof(ctx, "successfully fired the request for schedule %+v for time %v", s, scheduledTime) + return nil +} + +func New(scope promutils.Scope, + adminServiceClient service.AdminServiceClient) Executor { + + return &executor{ + adminServiceClient: adminServiceClient, + metrics: getExecutorMetrics(scope), + } +} + +func getExecutorMetrics(scope promutils.Scope) executorMetrics { + return executorMetrics{ + Scope: scope, + FailedExecutionCounter: scope.MustNewCounter("failed_execution_counter", + "count of unsuccessful attempts to fire execution for a schedules"), + SuccessfulExecutionCounter: scope.MustNewCounter("successful_execution_counter", + "count of successful attempts to fire execution for a schedules"), + } +} diff --git a/flyteadmin/scheduler/executor/executor_impl_test.go b/flyteadmin/scheduler/executor/executor_impl_test.go new file mode 100644 index 0000000000..a1f653296c --- /dev/null +++ b/flyteadmin/scheduler/executor/executor_impl_test.go @@ -0,0 +1,84 @@ +package executor + +import ( + "context" + "testing" + "time" + + "github.com/flyteorg/flyteadmin/pkg/errors" + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + adminMocks "github.com/flyteorg/flyteidl/clients/go/admin/mocks" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flytestdlib/promutils" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "google.golang.org/grpc/codes" +) + +var ( + mockAdminClient *adminMocks.AdminServiceClient +) + +func setupExecutor(scope string) Executor { + mockAdminClient = new(adminMocks.AdminServiceClient) + return New(promutils.NewScope(scope), mockAdminClient) +} + +func TestExecutor(t *testing.T) { + executor := setupExecutor("testExecutor1") + active := true + schedule := models.SchedulableEntity{ + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: "project", + Domain: "domain", + Name: "cron_schedule", + Version: "v1", + }, + CronExpression: "*/1 * * * *", + KickoffTimeInputArg: "kickoff_time", + Active: &active, + } + mockAdminClient.OnCreateExecutionMatch(context.Background(), mock.Anything).Return(&admin.ExecutionCreateResponse{}, nil) + err := executor.Execute(context.Background(), time.Now(), schedule) + assert.Nil(t, err) +} + +func TestExecutorAlreadyExists(t *testing.T) { + executor := setupExecutor("testExecutor2") + active := true + schedule := models.SchedulableEntity{ + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: "project", + Domain: "domain", + Name: "cron_schedule", + Version: "v1", + }, + CronExpression: "*/1 * * * *", + KickoffTimeInputArg: "kickoff_time", + Active: &active, + } + mockAdminClient.OnCreateExecutionMatch(mock.Anything, mock.Anything).Return(nil, + errors.NewFlyteAdminErrorf(codes.AlreadyExists, "Already exists")) + err := executor.Execute(context.Background(), time.Now(), schedule) + assert.Nil(t, err) +} + +func TestExecutorInactiveSchedule(t *testing.T) { + executor := setupExecutor("testExecutor3") + active := false + schedule := models.SchedulableEntity{ + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: "project", + Domain: "domain", + Name: "cron_schedule", + Version: "v1", + }, + CronExpression: "*/1 * * * *", + KickoffTimeInputArg: "kickoff_time", + Active: &active, + } + mockAdminClient.OnCreateExecutionMatch(context.Background(), mock.Anything).Return(&admin.ExecutionCreateResponse{}, nil) + err := executor.Execute(context.Background(), time.Now(), schedule) + assert.Nil(t, err) +} diff --git a/flyteadmin/scheduler/identifier/doc.go b/flyteadmin/scheduler/identifier/doc.go new file mode 100644 index 0000000000..53217e09f7 --- /dev/null +++ b/flyteadmin/scheduler/identifier/doc.go @@ -0,0 +1,3 @@ +// Package identifier +// This package provides utility functions for creating a unique schedule name and execution name +package identifier diff --git a/flyteadmin/scheduler/identifier/identifier.go b/flyteadmin/scheduler/identifier/identifier.go new file mode 100644 index 0000000000..7f6b9a2492 --- /dev/null +++ b/flyteadmin/scheduler/identifier/identifier.go @@ -0,0 +1,71 @@ +package identifier + +import ( + "context" + "encoding/binary" + "fmt" + "hash/fnv" + "strconv" + "time" + + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + "github.com/flyteorg/flytestdlib/logger" + + "github.com/google/uuid" +) + +// Utility functions used by the flyte native scheduler + +const ( + scheduleNameInputsFormat = "%s:%s:%s:%s" + executionIDInputsFormat = scheduleNameInputsFormat + ":%d" +) + +// GetScheduleName generate the schedule name to be used as unique identification string within the scheduler +func GetScheduleName(ctx context.Context, s models.SchedulableEntity) string { + return strconv.FormatUint(hashIdentifier(ctx, core.Identifier{ + Project: s.Project, + Domain: s.Domain, + Name: s.Name, + Version: s.Version, + }), 10) +} + +// GetExecutionIdentifier returns UUID using the hashed value of the schedule identifier and the scheduledTime +func GetExecutionIdentifier(ctx context.Context, identifier core.Identifier, scheduledTime time.Time) (uuid.UUID, error) { + hashValue := hashScheduledTimeStamp(ctx, identifier, scheduledTime) + b := make([]byte, 16) + binary.LittleEndian.PutUint64(b, hashValue) + return uuid.FromBytes(b) +} + +// hashIdentifier returns the hash of the identifier +func hashIdentifier(ctx context.Context, identifier core.Identifier) uint64 { + h := fnv.New64() + _, err := h.Write([]byte(fmt.Sprintf(scheduleNameInputsFormat, + identifier.Project, identifier.Domain, identifier.Name, identifier.Version))) + if err != nil { + // This shouldn't occur. + logger.Errorf(ctx, + "failed to hash launch plan identifier: %+v to get schedule name with err: %v", identifier, err) + return 0 + } + logger.Debugf(ctx, "Returning hash for [%+v]: %d", identifier, h.Sum64()) + return h.Sum64() +} + +// hashScheduledTimeStamp return the hash of the identifier and the scheduledTime +func hashScheduledTimeStamp(ctx context.Context, identifier core.Identifier, scheduledTime time.Time) uint64 { + h := fnv.New64() + _, err := h.Write([]byte(fmt.Sprintf(executionIDInputsFormat, + identifier.Project, identifier.Domain, identifier.Name, identifier.Version, scheduledTime.Unix()))) + if err != nil { + // This shouldn't occur. + logger.Errorf(ctx, + "failed to hash launch plan identifier: %+v with scheduled time %v to get execution identifier with err: %v", identifier, scheduledTime, err) + return 0 + } + logger.Debugf(ctx, "Returning hash for [%+v] %v: %d", identifier, scheduledTime, h.Sum64()) + return h.Sum64() +} diff --git a/flyteadmin/scheduler/repositories/doc.go b/flyteadmin/scheduler/repositories/doc.go new file mode 100644 index 0000000000..304d23f0a9 --- /dev/null +++ b/flyteadmin/scheduler/repositories/doc.go @@ -0,0 +1,4 @@ +// Package repositories +// This package provides the interfaces and implementations to save and retrieve schedules and snapshots from the DB. +// Along with activating and deactivating the schedules. +package repositories diff --git a/flyteadmin/scheduler/repositories/factory.go b/flyteadmin/scheduler/repositories/factory.go new file mode 100644 index 0000000000..6d86ac62d8 --- /dev/null +++ b/flyteadmin/scheduler/repositories/factory.go @@ -0,0 +1,44 @@ +package repositories + +import ( + "fmt" + + "github.com/flyteorg/flyteadmin/pkg/repositories/config" + "github.com/flyteorg/flyteadmin/pkg/repositories/errors" + "github.com/flyteorg/flyteadmin/scheduler/repositories/interfaces" + "github.com/flyteorg/flytestdlib/promutils" +) + +type RepoConfig int32 + +const ( + POSTGRES RepoConfig = 0 +) + +var RepositoryConfigurationName = map[int32]string{ + 0: "POSTGRES", +} + +// The SchedulerRepoInterface indicates the methods that each Repository must support. +// A Repository indicates a Database which is collection of Tables/models. +// The goal is allow databases to be Plugged in easily. +// This interface contains only scheduler specific models and tables. + +type SchedulerRepoInterface interface { + SchedulableEntityRepo() interfaces.SchedulableEntityRepoInterface + ScheduleEntitiesSnapshotRepo() interfaces.ScheduleEntitiesSnapShotRepoInterface +} + +func GetRepository(repoType RepoConfig, dbConfig config.DbConfig, scope promutils.Scope) SchedulerRepoInterface { + switch repoType { + case POSTGRES: + postgresScope := scope.NewSubScope("postgres") + db := config.OpenDbConnection(config.NewPostgresConfigProvider(dbConfig, postgresScope)) + return NewPostgresRepo( + db, + errors.NewPostgresErrorTransformer(postgresScope.NewSubScope("errors")), + postgresScope.NewSubScope("repositories")) + default: + panic(fmt.Sprintf("Invalid repoType %v", repoType)) + } +} diff --git a/flyteadmin/scheduler/repositories/gormimpl/metrics.go b/flyteadmin/scheduler/repositories/gormimpl/metrics.go new file mode 100644 index 0000000000..f00225b4b8 --- /dev/null +++ b/flyteadmin/scheduler/repositories/gormimpl/metrics.go @@ -0,0 +1,37 @@ +package gormimpl + +import ( + "time" + + "github.com/flyteorg/flytestdlib/promutils" +) + +// Common metrics emitted by gormimpl repos. +type gormMetrics struct { + Scope promutils.Scope + CreateDuration promutils.StopWatch + GetDuration promutils.StopWatch + UpdateDuration promutils.StopWatch + ListDuration promutils.StopWatch + ListIdentifiersDuration promutils.StopWatch + DeleteDuration promutils.StopWatch + ExistsDuration promutils.StopWatch +} + +func newMetrics(scope promutils.Scope) gormMetrics { + return gormMetrics{ + Scope: scope, + CreateDuration: scope.MustNewStopWatch( + "create", "time taken to create a new entry", time.Millisecond), + GetDuration: scope.MustNewStopWatch( + "get", "time taken to get an entry", time.Millisecond), + UpdateDuration: scope.MustNewStopWatch( + "update", "time taken to update an entry", time.Millisecond), + ListDuration: scope.MustNewStopWatch( + "list", "time taken to list entries", time.Millisecond), + ListIdentifiersDuration: scope.MustNewStopWatch( + "list_identifiers", "time taken to list identifier entries", time.Millisecond), + DeleteDuration: scope.MustNewStopWatch("delete", "time taken to delete an individual entry", time.Millisecond), + ExistsDuration: scope.MustNewStopWatch("exists", "time taken to determine whether an individual entry exists", time.Millisecond), + } +} diff --git a/flyteadmin/scheduler/repositories/gormimpl/schedulable_entity_repo.go b/flyteadmin/scheduler/repositories/gormimpl/schedulable_entity_repo.go new file mode 100644 index 0000000000..9db11d45d8 --- /dev/null +++ b/flyteadmin/scheduler/repositories/gormimpl/schedulable_entity_repo.go @@ -0,0 +1,148 @@ +package gormimpl + +import ( + "context" + "fmt" + + "github.com/flyteorg/flyteadmin/pkg/repositories/errors" + "github.com/flyteorg/flyteadmin/scheduler/repositories/interfaces" + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + "github.com/flyteorg/flytestdlib/promutils" + + "github.com/jinzhu/gorm" +) + +// SchedulableEntityRepo Implementation of SchedulableEntityRepoInterface. +type SchedulableEntityRepo struct { + db *gorm.DB + errorTransformer errors.ErrorTransformer + metrics gormMetrics +} + +func (r *SchedulableEntityRepo) Create(ctx context.Context, input models.SchedulableEntity) error { + timer := r.metrics.GetDuration.Start() + var record models.SchedulableEntity + tx := r.db.FirstOrCreate(&record, input) + timer.Stop() + if tx.Error != nil { + return r.errorTransformer.ToFlyteAdminError(tx.Error) + } + return nil +} + +func (r *SchedulableEntityRepo) Activate(ctx context.Context, input models.SchedulableEntity) error { + var schedulableEntity models.SchedulableEntity + timer := r.metrics.GetDuration.Start() + // Find the existence of a scheduled entity + tx := r.db.Where(&models.SchedulableEntity{ + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: input.Project, + Domain: input.Domain, + Name: input.Name, + Version: input.Version, + }, + }).Take(&schedulableEntity) + timer.Stop() + + if tx.Error != nil { + if tx.RecordNotFound() { + // Not found and hence create one + return r.Create(ctx, input) + } + return r.errorTransformer.ToFlyteAdminError(tx.Error) + } + + // Activate the already existing schedule + return activateOrDeactivate(r, input.SchedulableEntityKey, true) +} + +func (r *SchedulableEntityRepo) Deactivate(ctx context.Context, ID models.SchedulableEntityKey) error { + // Activate the schedule + return activateOrDeactivate(r, ID, false) +} + +func (r *SchedulableEntityRepo) GetAll(ctx context.Context) ([]models.SchedulableEntity, error) { + var schedulableEntities []models.SchedulableEntity + timer := r.metrics.GetDuration.Start() + + tx := r.db.Find(&schedulableEntities) + + timer.Stop() + + if tx.Error != nil { + if tx.RecordNotFound() { + return nil, + fmt.Errorf("no active schedulable entities found") + } + return nil, r.errorTransformer.ToFlyteAdminError(tx.Error) + } + + return schedulableEntities, nil +} + +func (r *SchedulableEntityRepo) Get(ctx context.Context, ID models.SchedulableEntityKey) (models.SchedulableEntity, error) { + var schedulableEntity models.SchedulableEntity + timer := r.metrics.GetDuration.Start() + tx := r.db.Where(&models.SchedulableEntity{ + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: ID.Project, + Domain: ID.Domain, + Name: ID.Name, + Version: ID.Version, + }, + }).Take(&schedulableEntity) + timer.Stop() + + if tx.Error != nil { + if tx.RecordNotFound() { + return models.SchedulableEntity{}, + errors.GetMissingEntityError("schedulable entity", &core.Identifier{ + Project: ID.Project, + Domain: ID.Domain, + Name: ID.Name, + Version: ID.Version, + }) + } + return models.SchedulableEntity{}, r.errorTransformer.ToFlyteAdminError(tx.Error) + } + + return schedulableEntity, nil +} + +// Helper function to activate and deactivate a schedule +func activateOrDeactivate(r *SchedulableEntityRepo, ID models.SchedulableEntityKey, activate bool) error { + timer := r.metrics.GetDuration.Start() + tx := r.db.Model(&models.SchedulableEntity{}).Where(&models.SchedulableEntity{ + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: ID.Project, + Domain: ID.Domain, + Name: ID.Name, + Version: ID.Version, + }, + }).Update("active", activate) + timer.Stop() + if tx.Error != nil { + if tx.RecordNotFound() { + return errors.GetMissingEntityError("schedulable entity", &core.Identifier{ + Project: ID.Project, + Domain: ID.Domain, + Name: ID.Name, + Version: ID.Version, + }) + } + return r.errorTransformer.ToFlyteAdminError(tx.Error) + } + return nil +} + +// NewSchedulableEntityRepo Returns an instance of SchedulableEntityRepoInterface +func NewSchedulableEntityRepo( + db *gorm.DB, errorTransformer errors.ErrorTransformer, scope promutils.Scope) interfaces.SchedulableEntityRepoInterface { + metrics := newMetrics(scope) + return &SchedulableEntityRepo{ + db: db, + errorTransformer: errorTransformer, + metrics: metrics, + } +} diff --git a/flyteadmin/scheduler/repositories/gormimpl/schedule_entities_snapshot_repo.go b/flyteadmin/scheduler/repositories/gormimpl/schedule_entities_snapshot_repo.go new file mode 100644 index 0000000000..fcec55db3d --- /dev/null +++ b/flyteadmin/scheduler/repositories/gormimpl/schedule_entities_snapshot_repo.go @@ -0,0 +1,57 @@ +package gormimpl + +import ( + "context" + + "github.com/flyteorg/flyteadmin/pkg/repositories/errors" + interfaces2 "github.com/flyteorg/flyteadmin/scheduler/repositories/interfaces" + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flytestdlib/promutils" + + "github.com/jinzhu/gorm" +) + +// ScheduleEntitiesSnapshotRepo Implementation of ScheduleEntitiesSnapshotRepoInterface. +type ScheduleEntitiesSnapshotRepo struct { + db *gorm.DB + errorTransformer errors.ErrorTransformer + metrics gormMetrics +} + +// TODO : always overwrite the exisiting snapshot instead of creating new rows +func (r *ScheduleEntitiesSnapshotRepo) Write(ctx context.Context, input models.ScheduleEntitiesSnapshot) error { + timer := r.metrics.GetDuration.Start() + tx := r.db.Create(&input) + timer.Stop() + if tx.Error != nil { + return r.errorTransformer.ToFlyteAdminError(tx.Error) + } + return nil +} + +func (r *ScheduleEntitiesSnapshotRepo) Read(ctx context.Context) (models.ScheduleEntitiesSnapshot, error) { + var schedulableEntitiesSnapshot models.ScheduleEntitiesSnapshot + timer := r.metrics.GetDuration.Start() + tx := r.db.Last(&schedulableEntitiesSnapshot) + timer.Stop() + + if tx.Error != nil { + if tx.RecordNotFound() { + return models.ScheduleEntitiesSnapshot{}, errors.GetSingletonMissingEntityError("schedule_entities_snapshots") + } + return models.ScheduleEntitiesSnapshot{}, r.errorTransformer.ToFlyteAdminError(tx.Error) + } + + return schedulableEntitiesSnapshot, nil +} + +// NewScheduleEntitiesSnapshotRepo Returns an instance of ScheduleEntitiesSnapshotRepoInterface +func NewScheduleEntitiesSnapshotRepo( + db *gorm.DB, errorTransformer errors.ErrorTransformer, scope promutils.Scope) interfaces2.ScheduleEntitiesSnapShotRepoInterface { + metrics := newMetrics(scope) + return &ScheduleEntitiesSnapshotRepo{ + db: db, + errorTransformer: errorTransformer, + metrics: metrics, + } +} diff --git a/flyteadmin/scheduler/repositories/interfaces/schedulable_entity_repo.go b/flyteadmin/scheduler/repositories/interfaces/schedulable_entity_repo.go new file mode 100644 index 0000000000..57ae77f0c5 --- /dev/null +++ b/flyteadmin/scheduler/repositories/interfaces/schedulable_entity_repo.go @@ -0,0 +1,28 @@ +package interfaces + +import ( + "context" + + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" +) + +//go:generate mockery -name=SchedulableEntityRepoInterface -output=../mocks -case=underscore + +// SchedulableEntityRepoInterface : An Interface for interacting with the schedulable entity in the database +type SchedulableEntityRepoInterface interface { + + // Create a schedulable entity in the database store + Create(ctx context.Context, input models.SchedulableEntity) error + + // Activate a schedulable entity in the database store. + Activate(ctx context.Context, input models.SchedulableEntity) error + + // Deactivate a schedulable entity in the database store. + Deactivate(ctx context.Context, ID models.SchedulableEntityKey) error + + // Get a schedulable entity from the database store using the schedulable entity id. + Get(ctx context.Context, ID models.SchedulableEntityKey) (models.SchedulableEntity, error) + + // GetAll Gets all the active schedulable entities from the db + GetAll(ctx context.Context) ([]models.SchedulableEntity, error) +} diff --git a/flyteadmin/scheduler/repositories/interfaces/schedule_entities_snapshot_repo.go b/flyteadmin/scheduler/repositories/interfaces/schedule_entities_snapshot_repo.go new file mode 100644 index 0000000000..4efdcfe2e5 --- /dev/null +++ b/flyteadmin/scheduler/repositories/interfaces/schedule_entities_snapshot_repo.go @@ -0,0 +1,19 @@ +package interfaces + +import ( + "context" + + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" +) + +//go:generate mockery -name=ScheduleEntitiesSnapShotRepoInterface -output=../mocks -case=underscore + +// ScheduleEntitiesSnapShotRepoInterface : An Interface for interacting with the snapshot of schedulable entities in the database +type ScheduleEntitiesSnapShotRepoInterface interface { + + // Create/ Update the snapshot in the database store + Write(ctx context.Context, input models.ScheduleEntitiesSnapshot) error + + // Get the latest snapshot from the database store. + Read(ctx context.Context) (models.ScheduleEntitiesSnapshot, error) +} diff --git a/flyteadmin/scheduler/repositories/mocks/schedulable_entity_repo_interface.go b/flyteadmin/scheduler/repositories/mocks/schedulable_entity_repo_interface.go new file mode 100644 index 0000000000..5b1139518f --- /dev/null +++ b/flyteadmin/scheduler/repositories/mocks/schedulable_entity_repo_interface.go @@ -0,0 +1,192 @@ +// Code generated by mockery v1.0.1. DO NOT EDIT. + +package mocks + +import ( + context "context" + + mock "github.com/stretchr/testify/mock" + + models "github.com/flyteorg/flyteadmin/scheduler/repositories/models" +) + +// SchedulableEntityRepoInterface is an autogenerated mock type for the SchedulableEntityRepoInterface type +type SchedulableEntityRepoInterface struct { + mock.Mock +} + +type SchedulableEntityRepoInterface_Activate struct { + *mock.Call +} + +func (_m SchedulableEntityRepoInterface_Activate) Return(_a0 error) *SchedulableEntityRepoInterface_Activate { + return &SchedulableEntityRepoInterface_Activate{Call: _m.Call.Return(_a0)} +} + +func (_m *SchedulableEntityRepoInterface) OnActivate(ctx context.Context, input models.SchedulableEntity) *SchedulableEntityRepoInterface_Activate { + c := _m.On("Activate", ctx, input) + return &SchedulableEntityRepoInterface_Activate{Call: c} +} + +func (_m *SchedulableEntityRepoInterface) OnActivateMatch(matchers ...interface{}) *SchedulableEntityRepoInterface_Activate { + c := _m.On("Activate", matchers...) + return &SchedulableEntityRepoInterface_Activate{Call: c} +} + +// Activate provides a mock function with given fields: ctx, input +func (_m *SchedulableEntityRepoInterface) Activate(ctx context.Context, input models.SchedulableEntity) error { + ret := _m.Called(ctx, input) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, models.SchedulableEntity) error); ok { + r0 = rf(ctx, input) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +type SchedulableEntityRepoInterface_Create struct { + *mock.Call +} + +func (_m SchedulableEntityRepoInterface_Create) Return(_a0 error) *SchedulableEntityRepoInterface_Create { + return &SchedulableEntityRepoInterface_Create{Call: _m.Call.Return(_a0)} +} + +func (_m *SchedulableEntityRepoInterface) OnCreate(ctx context.Context, input models.SchedulableEntity) *SchedulableEntityRepoInterface_Create { + c := _m.On("Create", ctx, input) + return &SchedulableEntityRepoInterface_Create{Call: c} +} + +func (_m *SchedulableEntityRepoInterface) OnCreateMatch(matchers ...interface{}) *SchedulableEntityRepoInterface_Create { + c := _m.On("Create", matchers...) + return &SchedulableEntityRepoInterface_Create{Call: c} +} + +// Create provides a mock function with given fields: ctx, input +func (_m *SchedulableEntityRepoInterface) Create(ctx context.Context, input models.SchedulableEntity) error { + ret := _m.Called(ctx, input) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, models.SchedulableEntity) error); ok { + r0 = rf(ctx, input) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +type SchedulableEntityRepoInterface_Deactivate struct { + *mock.Call +} + +func (_m SchedulableEntityRepoInterface_Deactivate) Return(_a0 error) *SchedulableEntityRepoInterface_Deactivate { + return &SchedulableEntityRepoInterface_Deactivate{Call: _m.Call.Return(_a0)} +} + +func (_m *SchedulableEntityRepoInterface) OnDeactivate(ctx context.Context, ID models.SchedulableEntityKey) *SchedulableEntityRepoInterface_Deactivate { + c := _m.On("Deactivate", ctx, ID) + return &SchedulableEntityRepoInterface_Deactivate{Call: c} +} + +func (_m *SchedulableEntityRepoInterface) OnDeactivateMatch(matchers ...interface{}) *SchedulableEntityRepoInterface_Deactivate { + c := _m.On("Deactivate", matchers...) + return &SchedulableEntityRepoInterface_Deactivate{Call: c} +} + +// Deactivate provides a mock function with given fields: ctx, ID +func (_m *SchedulableEntityRepoInterface) Deactivate(ctx context.Context, ID models.SchedulableEntityKey) error { + ret := _m.Called(ctx, ID) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, models.SchedulableEntityKey) error); ok { + r0 = rf(ctx, ID) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +type SchedulableEntityRepoInterface_Get struct { + *mock.Call +} + +func (_m SchedulableEntityRepoInterface_Get) Return(_a0 models.SchedulableEntity, _a1 error) *SchedulableEntityRepoInterface_Get { + return &SchedulableEntityRepoInterface_Get{Call: _m.Call.Return(_a0, _a1)} +} + +func (_m *SchedulableEntityRepoInterface) OnGet(ctx context.Context, ID models.SchedulableEntityKey) *SchedulableEntityRepoInterface_Get { + c := _m.On("Get", ctx, ID) + return &SchedulableEntityRepoInterface_Get{Call: c} +} + +func (_m *SchedulableEntityRepoInterface) OnGetMatch(matchers ...interface{}) *SchedulableEntityRepoInterface_Get { + c := _m.On("Get", matchers...) + return &SchedulableEntityRepoInterface_Get{Call: c} +} + +// Get provides a mock function with given fields: ctx, ID +func (_m *SchedulableEntityRepoInterface) Get(ctx context.Context, ID models.SchedulableEntityKey) (models.SchedulableEntity, error) { + ret := _m.Called(ctx, ID) + + var r0 models.SchedulableEntity + if rf, ok := ret.Get(0).(func(context.Context, models.SchedulableEntityKey) models.SchedulableEntity); ok { + r0 = rf(ctx, ID) + } else { + r0 = ret.Get(0).(models.SchedulableEntity) + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, models.SchedulableEntityKey) error); ok { + r1 = rf(ctx, ID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type SchedulableEntityRepoInterface_GetAll struct { + *mock.Call +} + +func (_m SchedulableEntityRepoInterface_GetAll) Return(_a0 []models.SchedulableEntity, _a1 error) *SchedulableEntityRepoInterface_GetAll { + return &SchedulableEntityRepoInterface_GetAll{Call: _m.Call.Return(_a0, _a1)} +} + +func (_m *SchedulableEntityRepoInterface) OnGetAll(ctx context.Context) *SchedulableEntityRepoInterface_GetAll { + c := _m.On("GetAll", ctx) + return &SchedulableEntityRepoInterface_GetAll{Call: c} +} + +func (_m *SchedulableEntityRepoInterface) OnGetAllMatch(matchers ...interface{}) *SchedulableEntityRepoInterface_GetAll { + c := _m.On("GetAll", matchers...) + return &SchedulableEntityRepoInterface_GetAll{Call: c} +} + +// GetAll provides a mock function with given fields: ctx +func (_m *SchedulableEntityRepoInterface) GetAll(ctx context.Context) ([]models.SchedulableEntity, error) { + ret := _m.Called(ctx) + + var r0 []models.SchedulableEntity + if rf, ok := ret.Get(0).(func(context.Context) []models.SchedulableEntity); ok { + r0 = rf(ctx) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]models.SchedulableEntity) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context) error); ok { + r1 = rf(ctx) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} diff --git a/flyteadmin/scheduler/repositories/mocks/schedule_entities_snap_shot_repo_interface.go b/flyteadmin/scheduler/repositories/mocks/schedule_entities_snap_shot_repo_interface.go new file mode 100644 index 0000000000..1c0aa792f7 --- /dev/null +++ b/flyteadmin/scheduler/repositories/mocks/schedule_entities_snap_shot_repo_interface.go @@ -0,0 +1,87 @@ +// Code generated by mockery v1.0.1. DO NOT EDIT. + +package mocks + +import ( + context "context" + + mock "github.com/stretchr/testify/mock" + + models "github.com/flyteorg/flyteadmin/scheduler/repositories/models" +) + +// ScheduleEntitiesSnapShotRepoInterface is an autogenerated mock type for the ScheduleEntitiesSnapShotRepoInterface type +type ScheduleEntitiesSnapShotRepoInterface struct { + mock.Mock +} + +type ScheduleEntitiesSnapShotRepoInterface_Read struct { + *mock.Call +} + +func (_m ScheduleEntitiesSnapShotRepoInterface_Read) Return(_a0 models.ScheduleEntitiesSnapshot, _a1 error) *ScheduleEntitiesSnapShotRepoInterface_Read { + return &ScheduleEntitiesSnapShotRepoInterface_Read{Call: _m.Call.Return(_a0, _a1)} +} + +func (_m *ScheduleEntitiesSnapShotRepoInterface) OnRead(ctx context.Context) *ScheduleEntitiesSnapShotRepoInterface_Read { + c := _m.On("Read", ctx) + return &ScheduleEntitiesSnapShotRepoInterface_Read{Call: c} +} + +func (_m *ScheduleEntitiesSnapShotRepoInterface) OnReadMatch(matchers ...interface{}) *ScheduleEntitiesSnapShotRepoInterface_Read { + c := _m.On("Read", matchers...) + return &ScheduleEntitiesSnapShotRepoInterface_Read{Call: c} +} + +// Read provides a mock function with given fields: ctx +func (_m *ScheduleEntitiesSnapShotRepoInterface) Read(ctx context.Context) (models.ScheduleEntitiesSnapshot, error) { + ret := _m.Called(ctx) + + var r0 models.ScheduleEntitiesSnapshot + if rf, ok := ret.Get(0).(func(context.Context) models.ScheduleEntitiesSnapshot); ok { + r0 = rf(ctx) + } else { + r0 = ret.Get(0).(models.ScheduleEntitiesSnapshot) + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context) error); ok { + r1 = rf(ctx) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type ScheduleEntitiesSnapShotRepoInterface_Write struct { + *mock.Call +} + +func (_m ScheduleEntitiesSnapShotRepoInterface_Write) Return(_a0 error) *ScheduleEntitiesSnapShotRepoInterface_Write { + return &ScheduleEntitiesSnapShotRepoInterface_Write{Call: _m.Call.Return(_a0)} +} + +func (_m *ScheduleEntitiesSnapShotRepoInterface) OnWrite(ctx context.Context, input models.ScheduleEntitiesSnapshot) *ScheduleEntitiesSnapShotRepoInterface_Write { + c := _m.On("Write", ctx, input) + return &ScheduleEntitiesSnapShotRepoInterface_Write{Call: c} +} + +func (_m *ScheduleEntitiesSnapShotRepoInterface) OnWriteMatch(matchers ...interface{}) *ScheduleEntitiesSnapShotRepoInterface_Write { + c := _m.On("Write", matchers...) + return &ScheduleEntitiesSnapShotRepoInterface_Write{Call: c} +} + +// Write provides a mock function with given fields: ctx, input +func (_m *ScheduleEntitiesSnapShotRepoInterface) Write(ctx context.Context, input models.ScheduleEntitiesSnapshot) error { + ret := _m.Called(ctx, input) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, models.ScheduleEntitiesSnapshot) error); ok { + r0 = rf(ctx, input) + } else { + r0 = ret.Error(0) + } + + return r0 +} diff --git a/flyteadmin/scheduler/repositories/models/schedulable_entity.go b/flyteadmin/scheduler/repositories/models/schedulable_entity.go new file mode 100644 index 0000000000..550a2292a4 --- /dev/null +++ b/flyteadmin/scheduler/repositories/models/schedulable_entity.go @@ -0,0 +1,25 @@ +package models + +import ( + "github.com/flyteorg/flyteadmin/pkg/repositories/models" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" +) + +// Database model to encapsulate metadata associated with a SchedulableEntity +type SchedulableEntity struct { + models.BaseModel + SchedulableEntityKey + CronExpression string + FixedRateValue uint32 + Unit admin.FixedRateUnit + KickoffTimeInputArg string + Active *bool +} + +// Schedulable entity primary key +type SchedulableEntityKey struct { + Project string `gorm:"primary_key"` + Domain string `gorm:"primary_key"` + Name string `gorm:"primary_key"` + Version string `gorm:"primary_key"` +} diff --git a/flyteadmin/scheduler/repositories/models/schedule_entities_snapshot.go b/flyteadmin/scheduler/repositories/models/schedule_entities_snapshot.go new file mode 100644 index 0000000000..891d6ea7b3 --- /dev/null +++ b/flyteadmin/scheduler/repositories/models/schedule_entities_snapshot.go @@ -0,0 +1,13 @@ +package models + +import "github.com/flyteorg/flyteadmin/pkg/repositories/models" + +// Database model to save the snapshot for the schedulable entities in the db +type ScheduleEntitiesSnapshot struct { + models.BaseModel + Snapshot []byte `gorm:"column:snapshot" schema:"-"` +} + +type ScheduleEntitiesSnapshotCollectionOutput struct { + Snapshots []ScheduleEntitiesSnapshot +} diff --git a/flyteadmin/scheduler/repositories/postgres_repo.go b/flyteadmin/scheduler/repositories/postgres_repo.go new file mode 100644 index 0000000000..c3000128ed --- /dev/null +++ b/flyteadmin/scheduler/repositories/postgres_repo.go @@ -0,0 +1,29 @@ +package repositories + +import ( + "github.com/flyteorg/flyteadmin/pkg/repositories/errors" + "github.com/flyteorg/flyteadmin/scheduler/repositories/gormimpl" + "github.com/flyteorg/flyteadmin/scheduler/repositories/interfaces" + "github.com/flyteorg/flytestdlib/promutils" + "github.com/jinzhu/gorm" +) + +type PostgresRepo struct { + schedulableEntityRepo interfaces.SchedulableEntityRepoInterface + scheduleEntitiesSnapshotRepo interfaces.ScheduleEntitiesSnapShotRepoInterface +} + +func (p *PostgresRepo) SchedulableEntityRepo() interfaces.SchedulableEntityRepoInterface { + return p.schedulableEntityRepo +} + +func (p *PostgresRepo) ScheduleEntitiesSnapshotRepo() interfaces.ScheduleEntitiesSnapShotRepoInterface { + return p.scheduleEntitiesSnapshotRepo +} + +func NewPostgresRepo(db *gorm.DB, errorTransformer errors.ErrorTransformer, scope promutils.Scope) SchedulerRepoInterface { + return &PostgresRepo{ + schedulableEntityRepo: gormimpl.NewSchedulableEntityRepo(db, errorTransformer, scope.NewSubScope("schedulable_entity")), + scheduleEntitiesSnapshotRepo: gormimpl.NewScheduleEntitiesSnapshotRepo(db, errorTransformer, scope.NewSubScope("schedule_entities_snapshot")), + } +} diff --git a/flyteadmin/scheduler/schedule_executor.go b/flyteadmin/scheduler/schedule_executor.go new file mode 100644 index 0000000000..f77d9ccce3 --- /dev/null +++ b/flyteadmin/scheduler/schedule_executor.go @@ -0,0 +1,115 @@ +package scheduler + +import ( + "context" + "time" + + runtimeInterfaces "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" + "github.com/flyteorg/flyteadmin/scheduler/core" + "github.com/flyteorg/flyteadmin/scheduler/executor" + "github.com/flyteorg/flyteadmin/scheduler/repositories" + "github.com/flyteorg/flyteadmin/scheduler/snapshoter" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/service" + "github.com/flyteorg/flytestdlib/futures" + "github.com/flyteorg/flytestdlib/logger" + "github.com/flyteorg/flytestdlib/promutils" + + "golang.org/x/time/rate" + "k8s.io/apimachinery/pkg/util/wait" +) + +const snapshotWriterDuration = 30 * time.Second +const scheduleUpdaterDuration = 30 * time.Second + +const snapShotVersion = 1 + +// ScheduledExecutor used for executing the schedules saved by the native flyte scheduler in the database. +type ScheduledExecutor struct { + scheduler core.Scheduler + snapshoter snapshoter.Persistence + db repositories.SchedulerRepoInterface + scope promutils.Scope + adminServiceClient service.AdminServiceClient + workflowExecutorConfig *runtimeInterfaces.FlyteWorkflowExecutorConfig +} + +func (w *ScheduledExecutor) Run(ctx context.Context) error { + logger.Infof(ctx, "Flyte native scheduler started successfully") + + defer logger.Infof(ctx, "Flyte native scheduler shutdown") + + // Read snapshot from the DB. Each snapshot is versioned and helps in maintaining backward compatibility + // Snapshot contains the lastexecution times for each schedule and is captured every 30 secs + snapShotReader := &snapshoter.VersionedSnapshot{Version: snapShotVersion} + snapshot, err := w.snapshoter.Read(ctx, snapShotReader) + + if err != nil { + logger.Errorf(ctx, "unable to read the snapshot from the db due to %v. Aborting", err) + return err + } + + // Read all the schedules from the DB + schedules, err := w.db.SchedulableEntityRepo().GetAll(ctx) + if err != nil { + logger.Errorf(ctx, "unable to read the schedules from the db due to %v. Aborting", err) + return err + } + logger.Infof(ctx, "Number of schedules retrieved %v", len(schedules)) + adminRateLimit := w.workflowExecutorConfig.GetAdminRateLimit() + + // Set the rate limit on the admin + rateLimiter := rate.NewLimiter(adminRateLimit.GetTps(), adminRateLimit.GetBurst()) + + // Set the executor to send executions to admin + executor := executor.New(w.scope, w.adminServiceClient) + + // Create the scheduler using GoCronScheduler implementation + // Also Bootstrap the schedules from the snapshot + bootStrapCtx, bootStrapCancel := context.WithCancel(ctx) + defer bootStrapCancel() + gcronScheduler := core.NewGoCronScheduler(bootStrapCtx, schedules, w.scope, snapshot, rateLimiter, executor) + w.scheduler = gcronScheduler + + // Start the go routine to write the update schedules periodically + updaterCtx, updaterCancel := context.WithCancel(ctx) + defer updaterCancel() + gcronUpdater := core.NewUpdater(w.db, gcronScheduler) + go wait.UntilWithContext(updaterCtx, gcronUpdater.UpdateGoCronSchedules, scheduleUpdaterDuration) + + // Catch up simulataneously on all the schedules in the scheduler + currTime := time.Now() + af := futures.NewAsyncFuture(ctx, func(ctx context.Context) (interface{}, error) { + return gcronScheduler.CatchupAll(ctx, currTime), nil + }) + isCatchupSuccess, err := af.Get(ctx) + if err != nil { + logger.Errorf(ctx, "failed to get future value for catchup due to %v", err) + return err + } + + if !isCatchupSuccess.(bool) { + logger.Errorf(ctx, "failed to catch up on all the schedules. Aborting") + return err + } + + snapshotRunner := core.NewSnapshotRunner(w.snapshoter, w.scheduler) + // Start the go routine to write the snapshot periodically + snapshoterCtx, snapshoterCancel := context.WithCancel(ctx) + defer snapshoterCancel() + wait.UntilWithContext(snapshoterCtx, snapshotRunner.Run, snapshotWriterDuration) + <-ctx.Done() + + return nil +} + +func NewScheduledExecutor(db repositories.SchedulerRepoInterface, + workflowExecutorConfig runtimeInterfaces.WorkflowExecutorConfig, + scope promutils.Scope, adminServiceClient service.AdminServiceClient) ScheduledExecutor { + return ScheduledExecutor{ + db: db, + scope: scope, + adminServiceClient: adminServiceClient, + workflowExecutorConfig: workflowExecutorConfig.GetFlyteWorkflowExecutorConfig(), + snapshoter: snapshoter.New(scope, db), + } +} diff --git a/flyteadmin/scheduler/schedule_executor_test.go b/flyteadmin/scheduler/schedule_executor_test.go new file mode 100644 index 0000000000..00c6e5996b --- /dev/null +++ b/flyteadmin/scheduler/schedule_executor_test.go @@ -0,0 +1,182 @@ +// +build !race + +package scheduler + +import ( + "bytes" + "context" + "fmt" + "testing" + "time" + + "github.com/flyteorg/flyteadmin/pkg/repositories" + "github.com/flyteorg/flyteadmin/pkg/repositories/mocks" + adminModels "github.com/flyteorg/flyteadmin/pkg/repositories/models" + runtimeInterfaces "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" + schedMocks "github.com/flyteorg/flyteadmin/scheduler/repositories/mocks" + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flyteadmin/scheduler/snapshoter" + adminMocks "github.com/flyteorg/flyteidl/clients/go/admin/mocks" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flytestdlib/promutils" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" +) + +var schedules []models.SchedulableEntity +var db repositories.RepositoryInterface + +func setupScheduleExecutor(t *testing.T, s string) ScheduledExecutor { + db = mocks.NewMockRepository() + var scope = promutils.NewScope(s) + scheduleExecutorConfig := runtimeInterfaces.WorkflowExecutorConfig{ + FlyteWorkflowExecutorConfig: &runtimeInterfaces.FlyteWorkflowExecutorConfig{ + AdminRateLimit: &runtimeInterfaces.AdminRateLimit{ + Tps: 100, + Burst: 10, + }, + }, + } + var bytesArray []byte + f := bytes.NewBuffer(bytesArray) + writer := snapshoter.VersionedSnapshot{} + snapshot := &snapshoter.SnapshotV1{ + LastTimes: map[string]*time.Time{}, + } + err := writer.WriteSnapshot(f, snapshot) + assert.Nil(t, err) + mockAdminClient := new(adminMocks.AdminServiceClient) + snapshotRepo := db.ScheduleEntitiesSnapshotRepo().(*schedMocks.ScheduleEntitiesSnapShotRepoInterface) + snapshotModel := models.ScheduleEntitiesSnapshot{ + BaseModel: adminModels.BaseModel{ + ID: 17, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + }, + Snapshot: f.Bytes(), + } + snapshotRepo.OnReadMatch(mock.Anything).Return(snapshotModel, nil) + snapshotRepo.OnWriteMatch(mock.Anything, mock.Anything).Return(nil) + mockAdminClient.OnCreateExecutionMatch(context.Background(), mock.Anything). + Return(&admin.ExecutionCreateResponse{}, nil) + return NewScheduledExecutor(db, scheduleExecutorConfig, + scope, mockAdminClient) +} + +func TestSuccessfulSchedulerExec(t *testing.T) { + t.Run("add cron schedule", func(t *testing.T) { + scheduleExecutor := setupScheduleExecutor(t, "cron") + scheduleEntitiesRepo := db.SchedulableEntityRepo().(*schedMocks.SchedulableEntityRepoInterface) + activeV2 := true + createAt := time.Now() + schedules = append(schedules, models.SchedulableEntity{ + BaseModel: adminModels.BaseModel{ + ID: 1, + CreatedAt: createAt, + UpdatedAt: time.Now(), + }, + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: "project", + Domain: "domain", + Name: "cron_schedule", + Version: "v2", + }, + CronExpression: "@every 1s", + KickoffTimeInputArg: "kickoff_time", + Active: &activeV2, + }) + + scheduleEntitiesRepo.OnGetAllMatch(mock.Anything).Return(schedules, nil) + go func() { + err := scheduleExecutor.Run(context.Background()) + assert.Nil(t, err) + }() + time.Sleep(10 * time.Second) + scheduleEntitiesRepo = db.SchedulableEntityRepo().(*schedMocks.SchedulableEntityRepoInterface) + activeV2 = false + schedules = nil + schedules = append(schedules, models.SchedulableEntity{ + BaseModel: adminModels.BaseModel{ + ID: 1, + CreatedAt: createAt, + UpdatedAt: time.Now(), + }, + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: "project", + Domain: "domain", + Name: "cron_schedule", + Version: "v2", + }, + CronExpression: "@every 1s", + KickoffTimeInputArg: "kickoff_time", + Active: &activeV2, + }) + scheduleEntitiesRepo.OnGetAllMatch(mock.Anything).Return(schedules, nil) + time.Sleep(30 * time.Second) + }) + + t.Run("add fixed rate schedule", func(t *testing.T) { + scheduleExecutor := setupScheduleExecutor(t, "fixed") + scheduleEntitiesRepo := db.SchedulableEntityRepo().(*schedMocks.SchedulableEntityRepoInterface) + activeV2 := true + createAt := time.Now() + schedules = append(schedules, models.SchedulableEntity{ + BaseModel: adminModels.BaseModel{ + ID: 1, + CreatedAt: createAt, + UpdatedAt: time.Now(), + }, + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: "project", + Domain: "domain", + Name: "fixed_rate_schedule", + Version: "v2", + }, + FixedRateValue: 1, + Unit: admin.FixedRateUnit_MINUTE, + KickoffTimeInputArg: "kickoff_time", + Active: &activeV2, + }) + scheduleEntitiesRepo.OnGetAllMatch(mock.Anything).Return(schedules, nil) + + go func() { + err := scheduleExecutor.Run(context.Background()) + assert.Nil(t, err) + }() + time.Sleep(10 * time.Second) + scheduleEntitiesRepo = db.SchedulableEntityRepo().(*schedMocks.SchedulableEntityRepoInterface) + activeV2 = false + schedules = nil + schedules = append(schedules, models.SchedulableEntity{ + BaseModel: adminModels.BaseModel{ + ID: 1, + CreatedAt: createAt, + UpdatedAt: time.Now(), + }, + SchedulableEntityKey: models.SchedulableEntityKey{ + Project: "project", + Domain: "domain", + Name: "fixed_rate_schedule", + Version: "v2", + }, + FixedRateValue: 1, + Unit: admin.FixedRateUnit_MINUTE, + KickoffTimeInputArg: "kickoff_time", + Active: &activeV2, + }) + scheduleEntitiesRepo.OnGetAllMatch(mock.Anything).Return(schedules, nil) + time.Sleep(30 * time.Second) + }) + + t.Run("unable to read schedules", func(t *testing.T) { + scheduleExecutor := setupScheduleExecutor(t, "unable_read_schedules") + scheduleEntitiesRepo := db.SchedulableEntityRepo().(*schedMocks.SchedulableEntityRepoInterface) + scheduleEntitiesRepo.OnGetAllMatch(mock.Anything).Return(nil, fmt.Errorf("unable to read schedules")) + + go func() { + err := scheduleExecutor.Run(context.Background()) + assert.NotNil(t, err) + }() + }) +} diff --git a/flyteadmin/scheduler/snapshoter/doc.go b/flyteadmin/scheduler/snapshoter/doc.go new file mode 100644 index 0000000000..654225eaaf --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/doc.go @@ -0,0 +1,5 @@ +// Package snapshoter +// This package provides the ability to snapshot all the schedules in the scheduler job store and persist them in the DB +// in GOB binary format. Also it provides ability to bootstrap the scheduler from this snapshot so that the scheduler +// can run catchup for all the schedules from the snapshoted time. +package snapshoter diff --git a/flyteadmin/scheduler/snapshoter/persistence.go b/flyteadmin/scheduler/snapshoter/persistence.go new file mode 100644 index 0000000000..9b916e298c --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/persistence.go @@ -0,0 +1,15 @@ +package snapshoter + +import ( + "context" +) + +// Persistence allows to read and save the serialized form of the snapshot from a storage. +// Currently we have DB implementation for it. +type Persistence interface { + // Save Run(ctx context.Context) + // Save saves the snapshot to the storage in a serialized form. + Save(ctx context.Context, writer Writer, snapshot Snapshot) + // Read reads the serialized snapshot from the storage and deserializes to its in memory format. + Read(ctx context.Context, reader Reader) (Snapshot, error) +} diff --git a/flyteadmin/scheduler/snapshoter/reader.go b/flyteadmin/scheduler/snapshoter/reader.go new file mode 100644 index 0000000000..b17143161d --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/reader.go @@ -0,0 +1,9 @@ +package snapshoter + +import "io" + +// Reader provides an interface to read the snapshot and deserialize it to its in memory format. +type Reader interface { + // ReadSnapshot reads the snapshot from the reader + ReadSnapshot(reader io.Reader) (Snapshot, error) +} diff --git a/flyteadmin/scheduler/snapshoter/snapshot.go b/flyteadmin/scheduler/snapshoter/snapshot.go new file mode 100644 index 0000000000..e720de3057 --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/snapshot.go @@ -0,0 +1,23 @@ +package snapshoter + +import ( + "time" +) + +// Snapshot used by the scheduler for creating, updating and reading snapshots of the schedules. +type Snapshot interface { + // GetLastExecutionTime of the schedule given by the key + GetLastExecutionTime(key string) *time.Time + // UpdateLastExecutionTime of the schedule given by key to the lastExecTime + UpdateLastExecutionTime(key string, lastExecTime *time.Time) + // CreateSnapshot creates the snapshot of all the schedules and there execution times. + Serialize() ([]byte, error) + // BootstrapFrom bootstraps the snapshot from a byte array + Deserialize(snapshot []byte) error + // GetVersion gets the version number of snapshot written + GetVersion() int + // IsEmpty returns true if the snapshot contains no schedules + IsEmpty() bool + // Create an empty snapshot + Create() Snapshot +} diff --git a/flyteadmin/scheduler/snapshoter/snapshot_v1.go b/flyteadmin/scheduler/snapshoter/snapshot_v1.go new file mode 100644 index 0000000000..2ce8ecc65e --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/snapshot_v1.go @@ -0,0 +1,51 @@ +package snapshoter + +import ( + "bytes" + "encoding/gob" + "time" +) + +// SnapshotV1 stores in the inmemory states of the schedules and there last execution timestamps. +// This map is created periodically from the jobstore of the gocron_wrapper and written to the DB. +// During bootup the serialized version of it is read from the DB and the schedules are initialized from it. +// V1 version so that in future if we add more fields for extending the functionality then this provides +// a backward compatible way to read old snapshots. +type SnapshotV1 struct { + // LastTimes map of the schedule name to last execution timestamp + LastTimes map[string]*time.Time +} + +func (s *SnapshotV1) GetLastExecutionTime(key string) *time.Time { + return s.LastTimes[key] +} + +func (s *SnapshotV1) UpdateLastExecutionTime(key string, lastExecTime *time.Time) { + // Load the last exec time for the schedule key and compare if its less than new LastExecTime + // and only if it is then update the map + s.LastTimes[key] = lastExecTime +} + +func (s *SnapshotV1) Serialize() ([]byte, error) { + var b bytes.Buffer + err := gob.NewEncoder(&b).Encode(s) + return b.Bytes(), err +} + +func (s *SnapshotV1) Deserialize(snapshot []byte) error { + return gob.NewDecoder(bytes.NewBuffer(snapshot)).Decode(s) +} + +func (s *SnapshotV1) IsEmpty() bool { + return len(s.LastTimes) == 0 +} + +func (s *SnapshotV1) GetVersion() int { + return 1 +} + +func (s *SnapshotV1) Create() Snapshot { + return &SnapshotV1{ + LastTimes: map[string]*time.Time{}, + } +} diff --git a/flyteadmin/scheduler/snapshoter/snapshoter.go b/flyteadmin/scheduler/snapshoter/snapshoter.go new file mode 100644 index 0000000000..83bc4bb7e8 --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/snapshoter.go @@ -0,0 +1,88 @@ +package snapshoter + +import ( + "bytes" + "context" + "time" + + "github.com/flyteorg/flyteadmin/pkg/errors" + "github.com/flyteorg/flyteadmin/scheduler/repositories" + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flytestdlib/logger" + "github.com/flyteorg/flytestdlib/promutils" + + "github.com/prometheus/client_golang/prometheus" + "google.golang.org/grpc/codes" +) + +type Metrics struct { + Scope promutils.Scope + SnapshotSaveErrCounter prometheus.Counter + SnapshotCreationErrCounter prometheus.Counter +} + +type snapshoter struct { + metrics Metrics + db repositories.SchedulerRepoInterface +} + +func (w *snapshoter) Save(ctx context.Context, writer Writer, snapshot Snapshot) { + var bytesArray []byte + f := bytes.NewBuffer(bytesArray) + // Only write if the snapshot has contents and not equal to the previous snapshot + if !snapshot.IsEmpty() { + err := writer.WriteSnapshot(f, snapshot) + // Just log the error + if err != nil { + w.metrics.SnapshotCreationErrCounter.Inc() + logger.Errorf(ctx, "unable to write the snapshot to buffer due to %v", err) + } + err = w.db.ScheduleEntitiesSnapshotRepo().Write(ctx, models.ScheduleEntitiesSnapshot{ + Snapshot: f.Bytes(), + }) + if err != nil { + w.metrics.SnapshotSaveErrCounter.Inc() + logger.Errorf(ctx, "unable to save the snapshot to the database due to %v", err) + } + } +} + +func (w *snapshoter) Read(ctx context.Context, reader Reader) (Snapshot, error) { + scheduleEntitiesSnapShot, err := w.db.ScheduleEntitiesSnapshotRepo().Read(ctx) + var snapshot Snapshot + snapshot = &SnapshotV1{LastTimes: map[string]*time.Time{}} + // Just log the error but dont interrupt the startup of the scheduler + if err != nil { + if err.(errors.FlyteAdminError).Code() == codes.NotFound { + // This is not an error condition and hence can be ignored. + return snapshot, nil + } + logger.Errorf(ctx, "unable to read the snapshot from the DB due to %v", err) + return nil, err + } + f := bytes.NewReader(scheduleEntitiesSnapShot.Snapshot) + snapshot, err = reader.ReadSnapshot(f) + // Similarly just log the error but dont interrupt the startup of the scheduler + if err != nil { + logger.Errorf(ctx, "unable to construct the snapshot struct from the file due to %v", err) + return nil, err + } + return snapshot, nil +} + +func New(scope promutils.Scope, db repositories.SchedulerRepoInterface) Persistence { + return &snapshoter{ + metrics: getSnapshoterMetrics(scope), + db: db, + } +} + +func getSnapshoterMetrics(scope promutils.Scope) Metrics { + return Metrics{ + Scope: scope, + SnapshotSaveErrCounter: scope.MustNewCounter("checkpoint_save_error_counter", + "count of unsuccessful attempts to save the created snapshot to the DB"), + SnapshotCreationErrCounter: scope.MustNewCounter("checkpoint_creation_error_counter", + "count of unsuccessful attempts to create the snapshot from the inmemory map"), + } +} diff --git a/flyteadmin/scheduler/snapshoter/snapshoter_test.go b/flyteadmin/scheduler/snapshoter/snapshoter_test.go new file mode 100644 index 0000000000..e5e5f1ccaf --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/snapshoter_test.go @@ -0,0 +1,110 @@ +package snapshoter + +import ( + "bytes" + "context" + "testing" + "time" + + "github.com/flyteorg/flyteadmin/pkg/repositories" + "github.com/flyteorg/flyteadmin/pkg/repositories/errors" + "github.com/flyteorg/flyteadmin/pkg/repositories/mocks" + adminModels "github.com/flyteorg/flyteadmin/pkg/repositories/models" + schedMocks "github.com/flyteorg/flyteadmin/scheduler/repositories/mocks" + "github.com/flyteorg/flyteadmin/scheduler/repositories/models" + "github.com/flyteorg/flytestdlib/promutils" + + "github.com/stretchr/testify/assert" +) + +var ( + db repositories.RepositoryInterface +) + +func setupSnapShoter(scope string) Persistence { + db = mocks.NewMockRepository() + return New(promutils.NewScope(scope), db) +} + +func TestSnapShoterRead(t *testing.T) { + + t.Run("successful read", func(t *testing.T) { + snapshoter := setupSnapShoter("TestSnapShoterReadSuccessfulRead") + var bytesArray []byte + f := bytes.NewBuffer(bytesArray) + writer := VersionedSnapshot{} + snapshot := &SnapshotV1{ + LastTimes: map[string]*time.Time{}, + } + currTime := time.Now() + snapshot.LastTimes["schedule1"] = &currTime + err := writer.WriteSnapshot(f, snapshot) + assert.Nil(t, err) + + snapshotRepo := db.ScheduleEntitiesSnapshotRepo().(*schedMocks.ScheduleEntitiesSnapShotRepoInterface) + snapshotModel := models.ScheduleEntitiesSnapshot{ + BaseModel: adminModels.BaseModel{ + ID: 17, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + }, + Snapshot: f.Bytes(), + } + snapshotRepo.OnRead(context.Background()).Return(snapshotModel, nil) + + reader := &VersionedSnapshot{} + snapshotVal, err := snapshoter.Read(context.Background(), reader) + assert.Nil(t, err) + assert.NotNil(t, snapshotVal) + }) + + t.Run("unsuccessful read ignore error", func(t *testing.T) { + snapshoter := setupSnapShoter("TestSnapShoterReadUnsuccessfulReadIgnoreError") + snapshotRepo := db.ScheduleEntitiesSnapshotRepo().(*schedMocks.ScheduleEntitiesSnapShotRepoInterface) + + snapshotRepo.OnRead(context.Background()).Return(models.ScheduleEntitiesSnapshot{}, errors.GetSingletonMissingEntityError("schedule_entities_snapshots")) + + reader := &VersionedSnapshot{} + snapshotVal, err := snapshoter.Read(context.Background(), reader) + assert.Nil(t, err) + assert.NotNil(t, snapshotVal) + }) + + t.Run("unsuccessful read dont ignore error", func(t *testing.T) { + snapshoter := setupSnapShoter("TestSnapShoterReadUnsuccessfulReadDontIgnoreError") + snapshotRepo := db.ScheduleEntitiesSnapshotRepo().(*schedMocks.ScheduleEntitiesSnapShotRepoInterface) + + snapshotRepo.OnRead(context.Background()).Return(models.ScheduleEntitiesSnapshot{}, errors.GetInvalidInputError("invalid input")) + + reader := &VersionedSnapshot{} + _, err := snapshoter.Read(context.Background(), reader) + assert.NotNil(t, err) + }) +} + +func TestSnapShoterSave(t *testing.T) { + snapshoter := setupSnapShoter("TestSnapShoterSave") + writer := &VersionedSnapshot{} + var bytesArray []byte + f := bytes.NewBuffer(bytesArray) + snapshot := &SnapshotV1{ + LastTimes: map[string]*time.Time{}, + } + currTime := time.Now() + snapshot.LastTimes["schedule1"] = &currTime + err := writer.WriteSnapshot(f, snapshot) + assert.Nil(t, err) + + snapshotRepo := db.ScheduleEntitiesSnapshotRepo().(*schedMocks.ScheduleEntitiesSnapShotRepoInterface) + snapshotModel := models.ScheduleEntitiesSnapshot{ + BaseModel: adminModels.BaseModel{ + ID: 0, + CreatedAt: time.Time{}, + UpdatedAt: time.Time{}, + }, + Snapshot: f.Bytes(), + } + snapshotRepo.OnWrite(context.Background(), snapshotModel).Return(nil) + + snapshoter.Save(context.Background(), writer, snapshot) +} diff --git a/flyteadmin/scheduler/snapshoter/versioned_snapshot.go b/flyteadmin/scheduler/snapshoter/versioned_snapshot.go new file mode 100644 index 0000000000..72a82da834 --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/versioned_snapshot.go @@ -0,0 +1,43 @@ +package snapshoter + +import ( + "encoding/gob" + "fmt" + "io" + "time" +) + +// VersionedSnapshot stores the version and gob serialized form of the snapshot +// Provides a read and write methods to serialize and deserialize the gob format of the snapshot. +// Including a version provides compatibility check +type VersionedSnapshot struct { + Version int + Ser []byte +} + +func (s *VersionedSnapshot) WriteSnapshot(w io.Writer, snapshot Snapshot) error { + byteContents, err := snapshot.Serialize() + if err != nil { + return err + } + s.Version = snapshot.GetVersion() + s.Ser = byteContents + enc := gob.NewEncoder(w) + return enc.Encode(s) +} + +func (s *VersionedSnapshot) ReadSnapshot(r io.Reader) (Snapshot, error) { + err := gob.NewDecoder(r).Decode(s) + if err != nil { + return nil, err + } + if s.Version == 1 { + snapShotV1 := SnapshotV1{LastTimes: map[string]*time.Time{}} + err = snapShotV1.Deserialize(s.Ser) + if err != nil { + return nil, err + } + return &snapShotV1, nil + } + return nil, fmt.Errorf("unsupported version %v", s.Version) +} diff --git a/flyteadmin/scheduler/snapshoter/versioned_snapshot_test.go b/flyteadmin/scheduler/snapshoter/versioned_snapshot_test.go new file mode 100644 index 0000000000..048f86c4ff --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/versioned_snapshot_test.go @@ -0,0 +1,50 @@ +package snapshoter + +import ( + "bytes" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestReadWriteSnapshot(t *testing.T) { + t.Run("successful read write", func(t *testing.T) { + var bytesArray []byte + f := bytes.NewBuffer(bytesArray) + writer := VersionedSnapshot{} + snapshot := &SnapshotV1{ + LastTimes: map[string]*time.Time{}, + } + currTime := time.Now() + snapshot.LastTimes["schedule1"] = &currTime + err := writer.WriteSnapshot(f, snapshot) + assert.Nil(t, err) + r := bytes.NewReader(f.Bytes()) + reader := VersionedSnapshot{} + s, err := reader.ReadSnapshot(r) + assert.Nil(t, err) + assert.Equal(t, s.IsEmpty(), false) + assert.NotNil(t, s.GetLastExecutionTime("schedule1")) + }) + + t.Run("successful write unsuccessful read", func(t *testing.T) { + var bytesArray []byte + f := bytes.NewBuffer(bytesArray) + writer := VersionedSnapshot{} + snapshot := &SnapshotV1{ + LastTimes: map[string]*time.Time{}, + } + currTime := time.Now() + snapshot.LastTimes["schedule1"] = &currTime + err := writer.WriteSnapshot(f, snapshot) + assert.Nil(t, err) + + bytesArray = f.Bytes() + bytesArray[len(bytesArray)-1] = 1 + r := bytes.NewReader(f.Bytes()) + reader := VersionedSnapshot{} + _, err = reader.ReadSnapshot(r) + assert.NotNil(t, err) + }) +} diff --git a/flyteadmin/scheduler/snapshoter/writer.go b/flyteadmin/scheduler/snapshoter/writer.go new file mode 100644 index 0000000000..f45243af1e --- /dev/null +++ b/flyteadmin/scheduler/snapshoter/writer.go @@ -0,0 +1,9 @@ +package snapshoter + +import "io" + +// Writer provides an interface to write the serialized form of the snapshot to a writer +type Writer interface { + // WriteSnapshot writes the serialized form of the snapshot to the writer + WriteSnapshot(writer io.Writer, snapshot Snapshot) error +}