diff --git a/.github/workflows/codecov-config/codecov.yml b/.github/workflows/codecov-config/codecov.yml deleted file mode 100644 index 8901728c4..000000000 --- a/.github/workflows/codecov-config/codecov.yml +++ /dev/null @@ -1,5 +0,0 @@ -coverage: - status: - patch: - default: - threshold: 0.03% \ No newline at end of file diff --git a/.github/workflows/merlin.yml b/.github/workflows/merlin.yml index 422df7312..420649e69 100644 --- a/.github/workflows/merlin.yml +++ b/.github/workflows/merlin.yml @@ -137,13 +137,6 @@ jobs: E2E_USE_GOOGLE_OAUTH: false working-directory: ./python/sdk run: make unit-test - - uses: codecov/codecov-action@v4 - with: - flags: sdk-test-${{ matrix.python-version }} - name: sdk-test-${{ matrix.python-version }} - token: ${{ secrets.CODECOV_TOKEN }} - working-directory: ./python/sdk - codecov_yml_path: ../../.github/workflows/codecov-config/codecov.yml lint-api: runs-on: ubuntu-latest @@ -188,13 +181,6 @@ jobs: POSTGRES_USER: ${{ secrets.DB_USERNAME }} POSTGRES_PASSWORD: ${{ secrets.DB_PASSWORD }} run: make it-test-api-ci - - uses: codecov/codecov-action@v4 - with: - flags: api-test - name: api-test - token: ${{ secrets.CODECOV_TOKEN }} - working-directory: ./api - codecov_yml_path: ../.github/workflows/codecov-config/codecov.yml test-observation-publisher: runs-on: ubuntu-latest diff --git a/api/cmd/api/setup.go b/api/cmd/api/setup.go index 26c5b501b..99d58e28b 100644 --- a/api/cmd/api/setup.go +++ b/api/cmd/api/setup.go @@ -6,7 +6,6 @@ import ( "net/http" "time" - gcs "cloud.google.com/go/storage" "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/clientset/versioned" "github.com/caraml-dev/merlin/webhook" "github.com/caraml-dev/mlp/api/pkg/artifact" @@ -105,55 +104,65 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB } var artifactService artifact.Service - if cfg.ImageBuilderConfig.ArtifactServiceType == "gcs" { - gcsClient, err := gcs.NewClient(context.Background()) - if err != nil { - log.Panicf("%s,failed initializing gcs for mlflow delete package", err.Error()) - } - artifactService = artifact.NewGcsArtifactClient(gcsClient) - } else if cfg.ImageBuilderConfig.ArtifactServiceType == "nop" { + if cfg.MlflowConfig.ArtifactServiceType == "gcs" { + artifactService, err = artifact.NewGcsArtifactClient() + } else if cfg.MlflowConfig.ArtifactServiceType == "s3" { + artifactService, err = artifact.NewS3ArtifactClient() + } else if cfg.MlflowConfig.ArtifactServiceType == "nop" { artifactService = artifact.NewNopArtifactClient() } else { - log.Panicf("invalid artifact service type %s", cfg.ImageBuilderConfig.ArtifactServiceType) + log.Panicf("invalid artifact service type %s", cfg.MlflowConfig.ArtifactServiceType) + } + if err != nil { + log.Panicf("%s,failed initializing mlflow artifact service", err.Error()) + } + + if cfg.ImageBuilderConfig.KanikoPushRegistryType != "gcr" && + cfg.ImageBuilderConfig.KanikoPushRegistryType != "docker" { + log.Panicf("invalid kaniko push registry type %s", cfg.ImageBuilderConfig.KanikoPushRegistryType) } webServiceConfig := imagebuilder.Config{ - BaseImage: cfg.ImageBuilderConfig.BaseImage, - BuildNamespace: cfg.ImageBuilderConfig.BuildNamespace, - DockerRegistry: cfg.ImageBuilderConfig.DockerRegistry, - BuildTimeoutDuration: timeout, - KanikoImage: cfg.ImageBuilderConfig.KanikoImage, - KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount, - KanikoAdditionalArgs: cfg.ImageBuilderConfig.KanikoAdditionalArgs, - DefaultResources: cfg.ImageBuilderConfig.DefaultResources, - Tolerations: cfg.ImageBuilderConfig.Tolerations, - NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors, - MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry, - SafeToEvict: cfg.ImageBuilderConfig.SafeToEvict, - ClusterName: cfg.ImageBuilderConfig.ClusterName, - GcpProject: cfg.ImageBuilderConfig.GcpProject, - Environment: cfg.Environment, - SupportedPythonVersions: cfg.ImageBuilderConfig.SupportedPythonVersions, + BaseImage: cfg.ImageBuilderConfig.BaseImage, + BuildNamespace: cfg.ImageBuilderConfig.BuildNamespace, + DockerRegistry: cfg.ImageBuilderConfig.DockerRegistry, + BuildTimeoutDuration: timeout, + KanikoImage: cfg.ImageBuilderConfig.KanikoImage, + KanikoPushRegistryType: cfg.ImageBuilderConfig.KanikoPushRegistryType, + KanikoDockerCredentialSecretName: cfg.ImageBuilderConfig.KanikoDockerCredentialSecretName, + KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount, + KanikoAdditionalArgs: cfg.ImageBuilderConfig.KanikoAdditionalArgs, + DefaultResources: cfg.ImageBuilderConfig.DefaultResources, + Tolerations: cfg.ImageBuilderConfig.Tolerations, + NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors, + MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry, + SafeToEvict: cfg.ImageBuilderConfig.SafeToEvict, + ClusterName: cfg.ImageBuilderConfig.ClusterName, + GcpProject: cfg.ImageBuilderConfig.GcpProject, + Environment: cfg.Environment, + SupportedPythonVersions: cfg.ImageBuilderConfig.SupportedPythonVersions, } webserviceBuilder = imagebuilder.NewModelServiceImageBuilder(kubeClient, webServiceConfig, artifactService) predJobConfig := imagebuilder.Config{ - BaseImage: cfg.ImageBuilderConfig.PredictionJobBaseImage, - BuildNamespace: cfg.ImageBuilderConfig.BuildNamespace, - DockerRegistry: cfg.ImageBuilderConfig.DockerRegistry, - BuildTimeoutDuration: timeout, - KanikoImage: cfg.ImageBuilderConfig.KanikoImage, - KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount, - KanikoAdditionalArgs: cfg.ImageBuilderConfig.KanikoAdditionalArgs, - DefaultResources: cfg.ImageBuilderConfig.DefaultResources, - Tolerations: cfg.ImageBuilderConfig.Tolerations, - NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors, - MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry, - SafeToEvict: cfg.ImageBuilderConfig.SafeToEvict, - ClusterName: cfg.ImageBuilderConfig.ClusterName, - GcpProject: cfg.ImageBuilderConfig.GcpProject, - Environment: cfg.Environment, - SupportedPythonVersions: cfg.ImageBuilderConfig.SupportedPythonVersions, + BaseImage: cfg.ImageBuilderConfig.PredictionJobBaseImage, + BuildNamespace: cfg.ImageBuilderConfig.BuildNamespace, + DockerRegistry: cfg.ImageBuilderConfig.DockerRegistry, + BuildTimeoutDuration: timeout, + KanikoImage: cfg.ImageBuilderConfig.KanikoImage, + KanikoPushRegistryType: cfg.ImageBuilderConfig.KanikoPushRegistryType, + KanikoDockerCredentialSecretName: cfg.ImageBuilderConfig.KanikoDockerCredentialSecretName, + KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount, + KanikoAdditionalArgs: cfg.ImageBuilderConfig.KanikoAdditionalArgs, + DefaultResources: cfg.ImageBuilderConfig.DefaultResources, + Tolerations: cfg.ImageBuilderConfig.Tolerations, + NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors, + MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry, + SafeToEvict: cfg.ImageBuilderConfig.SafeToEvict, + ClusterName: cfg.ImageBuilderConfig.ClusterName, + GcpProject: cfg.ImageBuilderConfig.GcpProject, + Environment: cfg.Environment, + SupportedPythonVersions: cfg.ImageBuilderConfig.SupportedPythonVersions, } predJobBuilder = imagebuilder.NewPredictionJobImageBuilder(kubeClient, predJobConfig, artifactService) diff --git a/api/config/config.go b/api/config/config.go index 4119c93a4..15dc76784 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -104,7 +104,7 @@ type BaseImageConfig struct { ImageName string `validate:"required" json:"imageName"` // Dockerfile Path within the build context DockerfilePath string `validate:"required" json:"dockerfilePath"` - // GCS URL Containing build context + // GCS/S3 URL Containing build context BuildContextURI string `validate:"required" json:"buildContextURI"` // Path to sub folder which is intended to build instead of using root folder BuildContextSubPath string `json:"buildContextSubPath"` @@ -209,18 +209,19 @@ type ClusterConfig struct { } type ImageBuilderConfig struct { - ClusterName string `validate:"required"` - GcpProject string - ArtifactServiceType string - BaseImage BaseImageConfig `validate:"required"` - PredictionJobBaseImage BaseImageConfig `validate:"required"` - BuildNamespace string `validate:"required" default:"mlp"` - DockerRegistry string `validate:"required"` - BuildTimeout string `validate:"required" default:"10m"` - KanikoImage string `validate:"required" default:"gcr.io/kaniko-project/executor:v1.6.0"` - KanikoServiceAccount string - KanikoAdditionalArgs []string - DefaultResources ResourceRequestsLimits `validate:"required"` + ClusterName string `validate:"required"` + GcpProject string + BaseImage BaseImageConfig `validate:"required"` + PredictionJobBaseImage BaseImageConfig `validate:"required"` + BuildNamespace string `validate:"required" default:"mlp"` + DockerRegistry string `validate:"required"` + BuildTimeout string `validate:"required" default:"10m"` + KanikoImage string `validate:"required" default:"gcr.io/kaniko-project/executor:v1.6.0"` + KanikoServiceAccount string + KanikoPushRegistryType string `validate:"required,oneof=docker gcr" default:"docker"` + KanikoDockerCredentialSecretName string + KanikoAdditionalArgs []string + DefaultResources ResourceRequestsLimits `validate:"required"` // How long to keep the image building job resource in the Kubernetes cluster. Default: 2 days (48 hours). Retention time.Duration `validate:"required" default:"48h"` Tolerations Tolerations @@ -453,8 +454,13 @@ type JaegerConfig struct { } type MlflowConfig struct { - TrackingURL string `validate:"required"` - ArtifactServiceType string `validate:"required"` + TrackingURL string `validate:"required_if=ArtifactServiceType gcs ArtifactServiceType s3"` + // Note that the Kaniko image builder needs to be configured correctly to have the necessary credentials to download + // the artifacts from the blob storage tool depending on the artifact service type selected (gcs/s3). For gcs, the + // credentials can be provided via a k8s service account or a secret but for s3, the credentials can be provided via + // additional arguments in the config KanikoAdditionalArgs e.g. + // --build-arg=[AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY/AWS_DEFAULT_REGION/AWS_ENDPOINT_URL]=xxx + ArtifactServiceType string `validate:"required,oneof=nop gcs s3"` } func (cfg *Config) Validate() error { diff --git a/api/config/config_test.go b/api/config/config_test.go index 265be1470..8e1abadfe 100644 --- a/api/config/config_test.go +++ b/api/config/config_test.go @@ -387,9 +387,8 @@ func TestLoad(t *testing.T) { EnvironmentConfigs: []*EnvironmentConfig{}, }, ImageBuilderConfig: ImageBuilderConfig{ - ClusterName: "test-cluster", - GcpProject: "test-project", - ArtifactServiceType: "gcs", + ClusterName: "test-cluster", + GcpProject: "test-project", BaseImage: BaseImageConfig{ ImageName: "ghcr.io/caraml-dev/merlin/merlin-pyfunc-base:0.0.0", DockerfilePath: "pyfunc-server/docker/Dockerfile", @@ -403,12 +402,13 @@ func TestLoad(t *testing.T) { BuildContextSubPath: "python", MainAppPath: "/home/spark/merlin-spark-app/main.py", }, - BuildNamespace: "caraml", - DockerRegistry: "test-docker.pkg.dev/test/caraml-registry", - BuildTimeout: "30m", - KanikoImage: "gcr.io/kaniko-project/executor:v1.21.0", - KanikoServiceAccount: "kaniko-merlin", - KanikoAdditionalArgs: []string{"--test=true", "--no-logs=false"}, + BuildNamespace: "caraml", + DockerRegistry: "test-docker.pkg.dev/test/caraml-registry", + BuildTimeout: "30m", + KanikoImage: "gcr.io/kaniko-project/executor:v1.21.0", + KanikoServiceAccount: "kaniko-merlin", + KanikoPushRegistryType: "docker", + KanikoAdditionalArgs: []string{"--test=true", "--no-logs=false"}, DefaultResources: ResourceRequestsLimits{ Requests: Resource{ CPU: "1", diff --git a/api/go.mod b/api/go.mod index 1f9c67111..deb3aedff 100644 --- a/api/go.mod +++ b/api/go.mod @@ -4,7 +4,6 @@ go 1.22 require ( cloud.google.com/go/bigtable v1.11.0 - cloud.google.com/go/storage v1.39.0 github.com/GoogleCloudPlatform/spark-on-k8s-operator v0.0.0-20221025152940-c261df66a006 github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5 github.com/antihax/optional v1.0.0 @@ -12,7 +11,7 @@ require ( github.com/bboughton/gcp-helpers v0.1.0 github.com/buger/jsonparser v1.1.1 github.com/caraml-dev/merlin-pyspark-app v0.0.3 - github.com/caraml-dev/mlp v1.13.2-rc2 + github.com/caraml-dev/mlp v1.13.2 github.com/caraml-dev/protopath v0.1.0 github.com/caraml-dev/universal-prediction-interface v1.0.0 github.com/cenkalti/backoff/v4 v4.2.1 @@ -246,11 +245,31 @@ require ( ) require ( + cloud.google.com/go/storage v1.39.0 // indirect github.com/avast/retry-go/v4 v4.6.0 // indirect + github.com/aws/aws-sdk-go-v2 v1.30.6-0.20240906182417-827d25db0048 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect + github.com/aws/aws-sdk-go-v2/config v1.17.8 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.12.21 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.17 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.19 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.17 // indirect + github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 // indirect + github.com/aws/smithy-go v1.20.4 // indirect golang.org/x/time v0.5.0 // indirect k8s.io/klog/v2 v2.120.1 // indirect k8s.io/kube-openapi v0.0.0-20231113174909-778a5567bc1e // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + ) replace ( diff --git a/api/go.sum b/api/go.sum index f608b5513..f05153cb9 100644 --- a/api/go.sum +++ b/api/go.sum @@ -142,6 +142,47 @@ github.com/avast/retry-go/v4 v4.6.0/go.mod h1:gvWlPhBVsvBbLkVGDg/KwvBv0bEkCOLRRS github.com/aws/aws-sdk-go v1.17.7/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.50.0 h1:HBtrLeO+QyDKnc3t1+5DR1RxodOHCGr8ZcrHudpv7jI= github.com/aws/aws-sdk-go v1.50.0/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= +github.com/aws/aws-sdk-go-v2 v1.16.16/go.mod h1:SwiyXi/1zTUZ6KIAmLK5V5ll8SiURNUYOqTerZPaF9k= +github.com/aws/aws-sdk-go-v2 v1.30.6-0.20240906182417-827d25db0048 h1:wXvkIvYQ3EPVO5MhCoEv2u5LDwfWp+kLTQMIGyyvi/0= +github.com/aws/aws-sdk-go-v2 v1.30.6-0.20240906182417-827d25db0048/go.mod h1:CT+ZPWXbYrci8chcARI3OmI/qgd+f6WtuLOoaIA8PR0= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 h1:70PVAiL15/aBMh5LThwgXdSQorVr91L127ttckI9QQU= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4/go.mod h1:/MQxMqci8tlqDH+pjmoLu1i0tbWCUP1hhyMRuFxpQCw= +github.com/aws/aws-sdk-go-v2/config v1.17.8 h1:b9LGqNnOdg9vR4Q43tBTVWk4J6F+W774MSchvKJsqnE= +github.com/aws/aws-sdk-go-v2/config v1.17.8/go.mod h1:UkCI3kb0sCdvtjiXYiU4Zx5h07BOpgBTtkPu/49r+kA= +github.com/aws/aws-sdk-go-v2/credentials v1.12.21 h1:4tjlyCD0hRGNQivh5dN8hbP30qQhMLBE/FgQR1vHHWM= +github.com/aws/aws-sdk-go-v2/credentials v1.12.21/go.mod h1:O+4XyAt4e+oBAoIwNUYkRg3CVMscaIJdmZBOcPgJ8D8= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17 h1:r08j4sbZu/RVi+BNxkBJwPMUYY3P8mgSDuKkZ/ZN1lE= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17/go.mod h1:yIkQcCDYNsZfXpd5UX2Cy+sWA1jPgIhGTw9cOBzfVnQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23/go.mod h1:2DFxAQ9pfIRy0imBCJv+vZ2X6RKxves6fbnEuSry6b4= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 h1:pI7Bzt0BJtYA0N/JEC6B8fJ4RBrEMi1LBrkMdFYNSnQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17/go.mod h1:Dh5zzJYMtxfIjYW+/evjQ8uj2OyR/ve2KROHGHlSFqE= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17/go.mod h1:pRwaTYCJemADaqCbUAxltMoHKata7hmB5PjEXeu0kfg= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 h1:Mqr/V5gvrhA2gvgnF42Zh5iMiQNcOYthFYwCyrnuWlc= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17/go.mod h1:aLJpZlCmjE+V+KtN1q1uyZkfnUWpQGpbsn89XPKyzfU= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24 h1:wj5Rwc05hvUSvKuOF29IYb9QrCLjU+rHAy/x/o0DK2c= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24/go.mod h1:jULHjqqjDlbyTa7pfM7WICATnOv+iOhjletM3N0Xbu8= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.17 h1:Roo69qTpfu8OlJ2Tb7pAYVuF0CpuUMB0IYWwYP/4DZM= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.17/go.mod h1:NcWPxQzGM1USQggaTVwz6VpqMZPX1CvDJLDh6jnOCa4= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.4 h1:KypMCbLPPHEmf9DgMGw51jMj77VfGPAN2Kv4cfhlfgI= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.4/go.mod h1:Vz1JQXliGcQktFTN/LN6uGppAIRoLBR2bMvIMP0gOjc= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.19 h1:FLMkfEiRjhgeDTCjjLoc3URo/TBkgeQbocA78lfkzSI= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.19/go.mod h1:Vx+GucNSsdhaxs3aZIKfSUjKVGsxN25nX2SRcdhuw08= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.17/go.mod h1:4nYOrY41Lrbk2170/BGkcJKBhws9Pfn8MG3aGqjjeFI= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19 h1:rfprUlsdzgl7ZL2KlXiUAoJnI/VxfHCvDFr2QDFj6u4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19/go.mod h1:SCWkEdRq8/7EK60NcvvQ6NXKuTcchAD4ROAsC37VEZE= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.17 h1:u+EfGmksnJc/x5tq3A+OD7LrMbSSR/5TrKLvkdy/fhY= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.17/go.mod h1:VaMx6302JHax2vHJWgRo+5n9zvbacs3bLU/23DNQrTY= +github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2 h1:Kp6PWAlXwP1UvIflkIP6MFZYBNDCa4mFCGtxrpICVOg= +github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2/go.mod h1:5FmD/Dqq57gP+XwaUnd5WFPipAuzrf0HmupX27Gvjvc= +github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 h1:pwvCchFUEnlceKIgPUouBJwK81aCkQ8UDMORfeFtW10= +github.com/aws/aws-sdk-go-v2/service/sso v1.11.23/go.mod h1:/w0eg9IhFGjGyyncHIQrXtU8wvNsTJOP0R6PPj0wf80= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 h1:OwhhKc1P9ElfWbMKPIbMMZBV6hzJlL2JKD76wNNVzgQ= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6/go.mod h1:csZuQY65DAdFBt1oIjO5hhBR49kQqop4+lcuCjf2arA= +github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 h1:9pPi0PsFNAGILFfPCk8Y0iyEBGc6lu6OQ97U7hmdesg= +github.com/aws/aws-sdk-go-v2/service/sts v1.16.19/go.mod h1:h4J3oPZQbxLhzGnk+j9dfYHi5qIOVJ5kczZd658/ydM= +github.com/aws/smithy-go v1.13.3/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= +github.com/aws/smithy-go v1.20.4 h1:2HK1zBdPgRbjFOHlfeQZfpC4r72MOb9bZkiFwggKO+4= +github.com/aws/smithy-go v1.20.4/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3/go.mod h1:CIWtjkly68+yqLPbvwwR/fjNJA/idrtULjZWh2v1ys0= github.com/bboughton/gcp-helpers v0.1.0 h1:oCl8Hu1twCcyafW3UU4F+opi2fwrma0NlviZwnDFq5Q= @@ -161,8 +202,8 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dR github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= -github.com/caraml-dev/mlp v1.13.2-rc2 h1:Zmyoy3OTPv2fU+42rxMwUt9erS9J6QA0nlZQy/xCPtk= -github.com/caraml-dev/mlp v1.13.2-rc2/go.mod h1:jKfnUEpCcARv/aJF6qH7vT7VMKICDVOq/pDFvj6V3vQ= +github.com/caraml-dev/mlp v1.13.2 h1:N3lk+ToQ281duZImQLTQ28uJtmoc9Zkxx1CR94rS15U= +github.com/caraml-dev/mlp v1.13.2/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= github.com/caraml-dev/protopath v0.1.0 h1:hjJ/U9RGD6QZ0Ee9SIYbVmwPugps4S5EpL6R+5ZrBe0= github.com/caraml-dev/protopath v0.1.0/go.mod h1:hVA2HkTrMYv+Q57gtrzu9/P7EXlNtBUcTz43z6EE010= github.com/caraml-dev/universal-prediction-interface v1.0.0 h1:3Z6adv1XZnBVRzFIeCu3mPcPnJrdB5IByYfdD9K/atI= diff --git a/api/pkg/imagebuilder/config.go b/api/pkg/imagebuilder/config.go index c0a52063b..e4cd99675 100644 --- a/api/pkg/imagebuilder/config.go +++ b/api/pkg/imagebuilder/config.go @@ -36,6 +36,10 @@ type Config struct { BuildTimeoutDuration time.Duration // Kaniko docker image KanikoImage string + // Kaniko push registry type + KanikoPushRegistryType string + // Kaniko docker credential secret name + KanikoDockerCredentialSecretName string // Kaniko kubernetes service account KanikoServiceAccount string // Kaniko additional args diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index f80136371..78156e297 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -24,7 +24,6 @@ import ( "strings" "time" - "cloud.google.com/go/storage" "github.com/caraml-dev/mlp/api/pkg/artifact" backoff "github.com/cenkalti/backoff/v4" "github.com/google/go-containerregistry/pkg/authn" @@ -105,12 +104,19 @@ const ( // jobDeletionTickDurationMilliseconds is the interval at which the API server checks if a job has been deleted jobDeletionTickDurationMilliseconds = 100 + dockerRegistryPushRegistryType = "docker" + googleCloudRegistryPushRegistryType = "gcr" + googleCloudStorageArtifactServiceType = "gcs" + gacEnvKey = "GOOGLE_APPLICATION_CREDENTIALS" saFilePath = "/secret/kaniko-secret.json" - baseImageEnvKey = "BASE_IMAGE" - modelDependenciesUrlEnvKey = "MODEL_DEPENDENCIES_URL" - modelArtifactsUrlEnvKey = "MODEL_ARTIFACTS_URL" + dockerCredentialConfigPath = "/kaniko/.docker" + + baseImageEnvKey = "BASE_IMAGE" + modelArtifactStorageTypeKey = "MLFLOW_ARTIFACT_STORAGE_TYPE" + modelDependenciesUrlEnvKey = "MODEL_DEPENDENCIES_URL" + modelArtifactsUrlEnvKey = "MODEL_ARTIFACTS_URL" modelDependenciesPath = "/merlin/model_dependencies" ) @@ -159,7 +165,7 @@ func (c *imageBuilder) getHashedModelDependenciesUrl(ctx context.Context, versio return "", err } - condaEnvUrl := fmt.Sprintf("gs://%s/%s/model/conda.yaml", artifactURL.Bucket, artifactURL.Object) + condaEnvUrl := fmt.Sprintf("%s://%s/%s/model/conda.yaml", c.artifactService.GetURLScheme(), artifactURL.Bucket, artifactURL.Object) condaEnv, err := c.artifactService.ReadArtifact(ctx, condaEnvUrl) if err != nil { @@ -167,17 +173,17 @@ func (c *imageBuilder) getHashedModelDependenciesUrl(ctx context.Context, versio } hash := sha256.New() - hash.Write([]byte(condaEnv)) + hash.Write(condaEnv) hashEnv := hash.Sum(nil) - hashedDependenciesUrl := fmt.Sprintf("gs://%s%s/%x", artifactURL.Bucket, modelDependenciesPath, hashEnv) + hashedDependenciesUrl := fmt.Sprintf("%s://%s%s/%x", c.artifactService.GetURLScheme(), artifactURL.Bucket, modelDependenciesPath, hashEnv) _, err = c.artifactService.ReadArtifact(ctx, hashedDependenciesUrl) if err == nil { return hashedDependenciesUrl, nil } - if !errors.Is(err, storage.ErrObjectNotExist) { + if !errors.Is(err, artifact.ErrObjectNotExist) { return "", err } @@ -402,6 +408,8 @@ func getGCPSubDomains() []string { // https://github.com/google/go-containerregistry/blob/master/cmd/crane/README.md // https://github.com/google/go-containerregistry/blob/master/pkg/v1/google/README.md func (c *imageBuilder) imageRefExists(imageName, imageTag string) (bool, error) { + // The DefaultKeychain will use credentials as described in the Docker config file whose location is specified by + // the DOCKER_CONFIG environment variable, if set. var keychain authn.Keychain keychain = authn.DefaultKeychain for _, domain := range getGCPSubDomains() { @@ -615,6 +623,7 @@ func (c *imageBuilder) createKanikoJobSpec( fmt.Sprintf("--dockerfile=%s", baseImageTag.DockerfilePath), fmt.Sprintf("--context=%s", baseImageTag.BuildContextURI), fmt.Sprintf("--build-arg=%s=%s", baseImageEnvKey, baseImageTag.ImageName), + fmt.Sprintf("--build-arg=%s=%s", modelArtifactStorageTypeKey, c.artifactService.GetType()), fmt.Sprintf("--build-arg=%s=%s", modelDependenciesUrlEnvKey, modelDependenciesUrl), fmt.Sprintf("--build-arg=%s=%s/model", modelArtifactsUrlEnvKey, version.ArtifactURI), fmt.Sprintf("--destination=%s", imageRef), @@ -627,37 +636,14 @@ func (c *imageBuilder) createKanikoJobSpec( kanikoArgs = append(kanikoArgs, c.config.KanikoAdditionalArgs...) activeDeadlineSeconds := int64(c.config.BuildTimeoutDuration / time.Second) - var volume []v1.Volume - var volumeMount []v1.VolumeMount + var volumes []v1.Volume + var volumeMounts []v1.VolumeMount var envVar []v1.EnvVar - // If kaniko service account is not set, use kaniko secret - if c.config.KanikoServiceAccount == "" { - kanikoArgs = append(kanikoArgs, - fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath)) - volume = []v1.Volume{ - { - Name: kanikoSecretName, - VolumeSource: v1.VolumeSource{ - Secret: &v1.SecretVolumeSource{ - SecretName: kanikoSecretName, - }, - }, - }, - } - volumeMount = []v1.VolumeMount{ - { - Name: kanikoSecretName, - MountPath: "/secret", - }, - } - envVar = []v1.EnvVar{ - { - Name: gacEnvKey, - Value: saFilePath, - }, - } - } + // Configure additional credentials for specific image registries and artifact services + kanikoArgs = c.configureKanikoArgsToAddCredentials(kanikoArgs) + volumes, volumeMounts = c.configureVolumesAndVolumeMountsToAddCredentials(volumes, volumeMounts) + envVar = c.configureEnvVarsToAddCredentials(envVar) var resourceRequirements RequestLimitResources cpuRequest := resource.MustParse(c.config.DefaultResources.Requests.CPU) @@ -713,13 +699,13 @@ func (c *imageBuilder) createKanikoJobSpec( Name: containerName, Image: c.config.KanikoImage, Args: kanikoArgs, - VolumeMounts: volumeMount, + VolumeMounts: volumeMounts, Env: envVar, Resources: resourceRequirements.Build(), TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError, }, }, - Volumes: volume, + Volumes: volumes, Tolerations: c.config.Tolerations, NodeSelector: c.config.NodeSelectors, }, @@ -732,6 +718,68 @@ func (c *imageBuilder) createKanikoJobSpec( return job, nil } +func (c *imageBuilder) configureKanikoArgsToAddCredentials(kanikoArgs []string) []string { + if c.config.KanikoPushRegistryType == googleCloudRegistryPushRegistryType || + c.artifactService.GetType() == googleCloudStorageArtifactServiceType { + if c.config.KanikoServiceAccount == "" { + kanikoArgs = append(kanikoArgs, + fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath)) + } + } + return kanikoArgs +} + +func (c *imageBuilder) configureVolumesAndVolumeMountsToAddCredentials( + volumes []v1.Volume, + volumeMounts []v1.VolumeMount, +) ([]v1.Volume, []v1.VolumeMount) { + if c.config.KanikoPushRegistryType == googleCloudRegistryPushRegistryType || + c.artifactService.GetType() == googleCloudStorageArtifactServiceType { + if c.config.KanikoServiceAccount == "" { + volumes = append(volumes, v1.Volume{ + Name: kanikoSecretName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: kanikoSecretName, + }, + }, + }) + volumeMounts = append(volumeMounts, v1.VolumeMount{ + Name: kanikoSecretName, + MountPath: "/secret", + }) + } + } + if c.config.KanikoPushRegistryType == dockerRegistryPushRegistryType { + volumes = append(volumes, v1.Volume{ + Name: kanikoSecretName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: c.config.KanikoDockerCredentialSecretName, + }, + }, + }) + volumeMounts = append(volumeMounts, v1.VolumeMount{ + Name: kanikoSecretName, + MountPath: dockerCredentialConfigPath, + }) + } + return volumes, volumeMounts +} + +func (c *imageBuilder) configureEnvVarsToAddCredentials(envVar []v1.EnvVar) []v1.EnvVar { + if c.config.KanikoPushRegistryType == googleCloudRegistryPushRegistryType || + c.artifactService.GetType() == googleCloudStorageArtifactServiceType { + if c.config.KanikoServiceAccount == "" { + envVar = append(envVar, v1.EnvVar{ + Name: gacEnvKey, + Value: saFilePath, + }) + } + } + return envVar +} + func (c *imageBuilder) GetImageBuildingJobStatus(ctx context.Context, project mlp.Project, model *models.Model, version *models.Version) (status models.ImageBuildingJobStatus) { status.State = models.ImageBuildingJobStateUnknown diff --git a/api/pkg/imagebuilder/imagebuilder_test.go b/api/pkg/imagebuilder/imagebuilder_test.go index 1706ba56c..5675a4aa2 100644 --- a/api/pkg/imagebuilder/imagebuilder_test.go +++ b/api/pkg/imagebuilder/imagebuilder_test.go @@ -25,7 +25,6 @@ import ( "testing" "time" - "cloud.google.com/go/storage" "github.com/caraml-dev/merlin/cluster/labeller" cfg "github.com/caraml-dev/merlin/config" "github.com/caraml-dev/merlin/mlp" @@ -50,13 +49,13 @@ import ( ) const ( - testEnvironmentName = "dev" - testOrchestratorName = "merlin" - testProjectName = "test-project" - testModelName = "mymodel" - testArtifactURI = "gs://bucket-name/mlflow/11/68eb8538374c4053b3ecad99a44170bd/artifacts" - testCondaEnvUrl = testArtifactURI + "/model/conda.yaml" - testCondaEnvContent = `dependencies: + testEnvironmentName = "dev" + testOrchestratorName = "merlin" + testProjectName = "test-project" + testModelName = "mymodel" + testArtifactURISuffix = "://bucket-name/mlflow/11/68eb8538374c4053b3ecad99a44170bd/artifacts" + testCondaEnvUrlSuffix = testArtifactURISuffix + "/model/conda.yaml" + testCondaEnvContent = `dependencies: - python=3.8.* - pip: - mlflow @@ -65,9 +64,10 @@ const ( - scikit-learn - xgboost` - testBuildContextURL = "gs://bucket/build.tar.gz" - testBuildNamespace = "mynamespace" - testDockerRegistry = "ghcr.io" + testGCSBuildContextURL = "gs://bucket/build.tar.gz" + testS3BuildContextURL = "s3://bucket/build.tar.gz" + testBuildNamespace = "mynamespace" + testDockerRegistry = "ghcr.io" ) var ( @@ -92,9 +92,18 @@ var ( Name: testModelName, } - modelVersion = &models.Version{ + modelVersionWithGCSArtifact = &models.Version{ ID: models.ID(1), - ArtifactURI: testArtifactURI, + ArtifactURI: fmt.Sprintf("gs%s", testArtifactURISuffix), + PythonVersion: "3.10.*", + Labels: models.KV{ + "test": "true", + }, + } + + modelVersionWithS3Artifact = &models.Version{ + ID: models.ID(1), + ArtifactURI: fmt.Sprintf("s3%s", testArtifactURISuffix), PythonVersion: "3.10.*", Labels: models.KV{ "test": "true", @@ -114,11 +123,11 @@ var ( defaultSupportedPythonVersions = []string{"3.8.*", "3.9.*", "3.10.*"} - config = Config{ + configWithGCRPushRegistry = Config{ BuildNamespace: testBuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, BuildContextSubPath: "python/pyfunc-server", DockerfilePath: "./Dockerfile", }, @@ -127,6 +136,7 @@ var ( ClusterName: "my-cluster", GcpProject: "test-project", Environment: testEnvironmentName, + KanikoPushRegistryType: googleCloudRegistryPushRegistryType, KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, @@ -153,11 +163,52 @@ var ( }, MaximumRetry: jobBackOffLimit, } + configWithDockerPushRegistry = Config{ + BuildNamespace: testBuildNamespace, + BaseImage: cfg.BaseImageConfig{ + ImageName: "gojek/base-image:1", + BuildContextURI: testS3BuildContextURL, + BuildContextSubPath: "python/pyfunc-server", + DockerfilePath: "./Dockerfile", + }, + DockerRegistry: testDockerRegistry, + BuildTimeoutDuration: timeout, + ClusterName: "my-cluster", + GcpProject: "test-project", + Environment: testEnvironmentName, + KanikoPushRegistryType: dockerRegistryPushRegistryType, + KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", + KanikoAdditionalArgs: defaultKanikoAdditionalArgs, + KanikoDockerCredentialSecretName: "docker-secret", + SupportedPythonVersions: defaultSupportedPythonVersions, + DefaultResources: cfg.ResourceRequestsLimits{ + Requests: cfg.Resource{ + CPU: "500m", + Memory: "1Gi", + }, + Limits: cfg.Resource{ + CPU: "500m", + Memory: "1Gi", + }, + }, + Tolerations: []v1.Toleration{ + { + Key: "image-build-job", + Value: "true", + Operator: v1.TolerationOpEqual, + Effect: v1.TaintEffectNoSchedule, + }, + }, + NodeSelectors: map[string]string{ + "cloud.google.com/gke-nodepool": "image-building-job-node-pool", + }, + MaximumRetry: jobBackOffLimit, + } configWithSa = Config{ BuildNamespace: testBuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, BuildContextSubPath: "python/pyfunc-server", DockerfilePath: "./Dockerfile", }, @@ -264,7 +315,7 @@ func TestBuildImage(t *testing.T) { hash := sha256.New() hash.Write([]byte(testCondaEnvContent)) hashEnv := hash.Sum(nil) - modelDependenciesURL := fmt.Sprintf("gs://%s/merlin/model_dependencies/%x", testArtifactGsutilURL.Bucket, hashEnv) + modelDependenciesURLSuffix := fmt.Sprintf("://%s/merlin/model_dependencies/%x", testArtifactGsutilURL.Bucket, hashEnv) type args struct { project mlp.Project @@ -275,30 +326,32 @@ func TestBuildImage(t *testing.T) { } tests := []struct { - name string - args args - existingJob *batchv1.Job - wantCreateJob *batchv1.Job - wantDeleteJobName string - wantImageRef string - config Config + name string + args args + existingJob *batchv1.Job + wantCreateJob *batchv1.Job + wantDeleteJobName string + wantImageRef string + config Config + artifactServiceType string + artifactServiceURLScheme string }{ { - name: "success: no existing job", + name: "success: gcs artifact storage + gcr push registry; no existing job", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -319,7 +372,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -337,13 +390,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -392,28 +446,139 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: googleCloudStorageArtifactServiceType, + artifactServiceURLScheme: "gs", }, { - name: "success: no existing job, use K8s Service account", + name: "success: s3 artifact storage + docker push registry; no existing job", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithS3Artifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithS3Artifact.ID), + Namespace: configWithDockerPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": configWithDockerPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, + "sample": "true", + "test": "true", + }, + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/safe-to-evict": "false", + }, + }, + Spec: batchv1.JobSpec{ + Completions: &jobCompletions, + BackoffLimit: &jobBackOffLimit, + TTLSecondsAfterFinished: &jobTTLSecondAfterComplete, + ActiveDeadlineSeconds: &timeoutInSecond, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": configWithDockerPushRegistry.Environment, + "gojek.com/orchestrator": testOrchestratorName, + "gojek.com/stream": project.Stream, + "gojek.com/team": project.Team, + "sample": "true", + "test": "true", + }, + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/safe-to-evict": "false", + }, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Name: containerName, + Image: "gcr.io/kaniko-project/executor:v1.1.0", + Args: []string{ + fmt.Sprintf("--dockerfile=%s", configWithDockerPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithDockerPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithDockerPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "s3"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=s3%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=s3%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithDockerPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithS3Artifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithDockerPushRegistry.BaseImage.BuildContextSubPath), + "--cache=true", + "--compressed-caching=false", + "--snapshot-mode=redo", + "--use-new-run", + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: kanikoSecretName, + MountPath: "/kaniko/.docker", + }, + }, + Resources: defaultResourceRequests.Build(), + TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError, + }, + }, + Volumes: []v1.Volume{ + { + Name: kanikoSecretName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: configWithDockerPushRegistry.KanikoDockerCredentialSecretName, + }, + }, + }, + }, + Tolerations: []v1.Toleration{ + { + Key: "image-build-job", + Operator: v1.TolerationOpEqual, + Value: "true", + Effect: v1.TaintEffectNoSchedule, + }, + }, + NodeSelector: map[string]string{ + "cloud.google.com/gke-nodepool": "image-building-job-node-pool", + }, + }, + }, + }, + Status: batchv1.JobStatus{}, + }, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithDockerPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithS3Artifact.ID), + config: configWithDockerPushRegistry, + artifactServiceType: "s3", + artifactServiceURLScheme: "s3", + }, + { + name: "success: gcs artifact storage + gcr push registry; no existing job, use K8s Service account", + args: args{ + project: project, + model: model, + version: modelVersionWithGCSArtifact, + }, + existingJob: nil, + wantCreateJob: &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, + Labels: map[string]string{ + "gojek.com/app": model.Name, + "gojek.com/orchestrator": testOrchestratorName, + "gojek.com/stream": project.Stream, + "gojek.com/team": project.Team, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/component": "image-builder", "sample": "true", "test": "true", @@ -434,7 +599,7 @@ func TestBuildImage(t *testing.T) { "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/component": "image-builder", "sample": "true", "test": "true", @@ -450,13 +615,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -483,26 +649,28 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: configWithSa, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithSa, + artifactServiceType: googleCloudStorageArtifactServiceType, + artifactServiceURLScheme: "gs", }, { - name: "success: no existing job, tolerations is not set", + name: "success: gcs artifact storage + gcr push registry; no existing job, tolerations is not set", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -523,7 +691,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -541,13 +709,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -589,12 +758,12 @@ func TestBuildImage(t *testing.T) { Status: batchv1.JobStatus{}, }, wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: Config{ BuildNamespace: testBuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, BuildContextSubPath: "python/pyfunc-server", DockerfilePath: "./Dockerfile", }, @@ -604,31 +773,34 @@ func TestBuildImage(t *testing.T) { GcpProject: "test-project", Environment: testEnvironmentName, KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", + KanikoPushRegistryType: googleCloudRegistryPushRegistryType, KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, - DefaultResources: config.DefaultResources, + DefaultResources: configWithGCRPushRegistry.DefaultResources, NodeSelectors: map[string]string{ "cloud.google.com/gke-nodepool": "image-building-job-node-pool", }, MaximumRetry: jobBackOffLimit, }, + artifactServiceType: googleCloudStorageArtifactServiceType, + artifactServiceURLScheme: "gs", }, { - name: "success: no existing job, node selectors is not set", + name: "success: gcs artifact storage + gcr push registry; no existing job, node selectors is not set", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -649,7 +821,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -667,13 +839,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -720,12 +893,12 @@ func TestBuildImage(t *testing.T) { Status: batchv1.JobStatus{}, }, wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: Config{ BuildNamespace: testBuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, BuildContextSubPath: "python/pyfunc-server", DockerfilePath: "./Dockerfile", }, @@ -735,9 +908,10 @@ func TestBuildImage(t *testing.T) { GcpProject: "test-project", Environment: testEnvironmentName, KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", + KanikoPushRegistryType: googleCloudRegistryPushRegistryType, KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, - DefaultResources: config.DefaultResources, + DefaultResources: configWithGCRPushRegistry.DefaultResources, Tolerations: []v1.Toleration{ { Key: "image-build-job", @@ -748,23 +922,25 @@ func TestBuildImage(t *testing.T) { }, MaximumRetry: jobBackOffLimit, }, + artifactServiceType: googleCloudStorageArtifactServiceType, + artifactServiceURLScheme: "gs", }, { - name: "success: no existing job, not using context sub path", + name: "success: gcs artifact storage + gcr push registry; no existing job, not using context sub path", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -785,7 +961,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -803,12 +979,13 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -858,43 +1035,46 @@ func TestBuildImage(t *testing.T) { Status: batchv1.JobStatus{}, }, wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: Config{ - BuildNamespace: config.BuildNamespace, + BuildNamespace: configWithGCRPushRegistry.BuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, DockerfilePath: "./Dockerfile", }, - DockerRegistry: config.DockerRegistry, - BuildTimeoutDuration: config.BuildTimeoutDuration, - ClusterName: config.ClusterName, - GcpProject: config.GcpProject, - Environment: config.Environment, - KanikoImage: config.KanikoImage, + DockerRegistry: configWithGCRPushRegistry.DockerRegistry, + BuildTimeoutDuration: configWithGCRPushRegistry.BuildTimeoutDuration, + ClusterName: configWithGCRPushRegistry.ClusterName, + GcpProject: configWithGCRPushRegistry.GcpProject, + Environment: configWithGCRPushRegistry.Environment, + KanikoImage: configWithGCRPushRegistry.KanikoImage, + KanikoPushRegistryType: configWithGCRPushRegistry.KanikoPushRegistryType, KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, - DefaultResources: config.DefaultResources, - MaximumRetry: config.MaximumRetry, - NodeSelectors: config.NodeSelectors, - Tolerations: config.Tolerations, + DefaultResources: configWithGCRPushRegistry.DefaultResources, + MaximumRetry: configWithGCRPushRegistry.MaximumRetry, + NodeSelectors: configWithGCRPushRegistry.NodeSelectors, + Tolerations: configWithGCRPushRegistry.Tolerations, }, + artifactServiceType: googleCloudStorageArtifactServiceType, + artifactServiceURLScheme: "gs", }, { - name: "success: existing job is running", + name: "success: gcs artifact storage + gcr push registry; existing job is running", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -915,7 +1095,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -933,13 +1113,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -988,25 +1169,27 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantCreateJob: nil, - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantCreateJob: nil, + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: googleCloudStorageArtifactServiceType, + artifactServiceURLScheme: "gs", }, { - name: "success: existing job already successful", + name: "success: gcs artifact storage + gcr push registry; existing job already successful", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1027,7 +1210,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1045,13 +1228,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -1102,26 +1286,28 @@ func TestBuildImage(t *testing.T) { Succeeded: 1, }, }, - wantCreateJob: nil, - wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantCreateJob: nil, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: googleCloudStorageArtifactServiceType, + artifactServiceURLScheme: "gs", }, { - name: "success: existing job failed", + name: "success: gcs artifact storage + gcr push registry; existing job failed", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1142,7 +1328,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1160,13 +1346,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -1219,12 +1406,12 @@ func TestBuildImage(t *testing.T) { }, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1245,7 +1432,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1263,13 +1450,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -1318,16 +1506,18 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantDeleteJobName: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantDeleteJobName: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: googleCloudStorageArtifactServiceType, + artifactServiceURLScheme: "gs", }, { - name: "success: with custom resource request", + name: "success: gcs artifact storage + gcr push registry; with custom resource request", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, resourceRequest: &models.ResourceRequest{ CPURequest: resource.MustParse("2"), MemoryRequest: resource.MustParse("4Gi"), @@ -1336,12 +1526,12 @@ func TestBuildImage(t *testing.T) { existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1362,7 +1552,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1380,13 +1570,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -1435,9 +1626,11 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: googleCloudStorageArtifactServiceType, + artifactServiceURLScheme: "gs", }, } @@ -1516,9 +1709,15 @@ func TestBuildImage(t *testing.T) { imageBuilderCfg := tt.config artifactServiceMock := &mocks.Service{} - artifactServiceMock.On("ParseURL", testArtifactURI).Return(testArtifactGsutilURL, nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, testCondaEnvUrl).Return([]byte(testCondaEnvContent), nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, modelDependenciesURL).Return([]byte(testCondaEnvContent), nil) + artifactServiceMock.On("ParseURL", fmt.Sprintf("%s%s", + tt.artifactServiceURLScheme, testArtifactURISuffix)).Return(testArtifactGsutilURL, nil) + artifactServiceMock.On("GetURLScheme").Return(tt.artifactServiceURLScheme) + artifactServiceMock.On("GetURLScheme").Return(tt.artifactServiceURLScheme) + artifactServiceMock.On("GetType").Return(tt.artifactServiceType) + artifactServiceMock.On("ReadArtifact", mock.Anything, + fmt.Sprintf("%s%s", tt.artifactServiceURLScheme, testCondaEnvUrlSuffix)).Return([]byte(testCondaEnvContent), nil) + artifactServiceMock.On("ReadArtifact", mock.Anything, + fmt.Sprintf("%s%s", tt.artifactServiceURLScheme, modelDependenciesURLSuffix)).Return([]byte(testCondaEnvContent), nil) c := NewModelServiceImageBuilder(kubeClient, imageBuilderCfg, artifactServiceMock) @@ -1558,7 +1757,7 @@ func TestGetContainers(t *testing.T) { } modelVersion := &models.Version{ ID: models.ID(1), - ArtifactURI: testArtifactURI, + ArtifactURI: testArtifactURISuffix, } type args struct { @@ -1621,7 +1820,7 @@ func TestGetContainers(t *testing.T) { artifaceServiceMock := &mocks.Service{} - c := NewModelServiceImageBuilder(kubeClient, config, artifaceServiceMock) + c := NewModelServiceImageBuilder(kubeClient, configWithGCRPushRegistry, artifaceServiceMock) containers, err := c.GetContainers(context.Background(), tt.args.project, tt.args.model, tt.args.version) if !tt.wantError { @@ -1823,11 +2022,13 @@ func Test_imageBuilder_getHashedModelDependenciesUrl(t *testing.T) { name: "hash dependencies is already exist", args: args{ ctx: context.Background(), - version: modelVersion, + version: modelVersionWithGCSArtifact, }, artifactServiceMock: func(artifactServiceMock *mocks.Service) { - artifactServiceMock.On("ParseURL", testArtifactURI).Return(testArtifactGsutilURL, nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, testCondaEnvUrl).Return([]byte(testCondaEnvContent), nil) + artifactServiceMock.On("ParseURL", fmt.Sprintf("gs%s", testArtifactURISuffix)).Return(testArtifactGsutilURL, nil) + artifactServiceMock.On("GetURLScheme").Return("gs") + artifactServiceMock.On("GetURLScheme").Return("gs") + artifactServiceMock.On("ReadArtifact", mock.Anything, fmt.Sprintf("gs%s", testCondaEnvUrlSuffix)).Return([]byte(testCondaEnvContent), nil) artifactServiceMock.On("ReadArtifact", mock.Anything, modelDependenciesURL).Return([]byte(testCondaEnvContent), nil) }, want: modelDependenciesURL, @@ -1837,12 +2038,14 @@ func Test_imageBuilder_getHashedModelDependenciesUrl(t *testing.T) { name: "hash dependencies is not exist yet", args: args{ ctx: context.Background(), - version: modelVersion, + version: modelVersionWithGCSArtifact, }, artifactServiceMock: func(artifactServiceMock *mocks.Service) { - artifactServiceMock.On("ParseURL", testArtifactURI).Return(testArtifactGsutilURL, nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, testCondaEnvUrl).Return([]byte(testCondaEnvContent), nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, modelDependenciesURL).Return(nil, storage.ErrObjectNotExist) + artifactServiceMock.On("ParseURL", fmt.Sprintf("gs%s", testArtifactURISuffix)).Return(testArtifactGsutilURL, nil) + artifactServiceMock.On("GetURLScheme").Return("gs") + artifactServiceMock.On("GetURLScheme").Return("gs") + artifactServiceMock.On("ReadArtifact", mock.Anything, fmt.Sprintf("gs%s", testCondaEnvUrlSuffix)).Return([]byte(testCondaEnvContent), nil) + artifactServiceMock.On("ReadArtifact", mock.Anything, modelDependenciesURL).Return(nil, artifact.ErrObjectNotExist) artifactServiceMock.On("WriteArtifact", mock.Anything, modelDependenciesURL, []byte(testCondaEnvContent)).Return(nil) }, want: modelDependenciesURL, @@ -1895,12 +2098,12 @@ func Test_imageBuilder_GetImageBuildingJobStatus(t *testing.T) { args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, mockGetJob: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), Namespace: config.BuildNamespace, }, Status: batchv1.JobStatus{ @@ -1919,12 +2122,12 @@ func Test_imageBuilder_GetImageBuildingJobStatus(t *testing.T) { args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, mockGetJob: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), Namespace: config.BuildNamespace, }, Status: batchv1.JobStatus{ @@ -1943,12 +2146,12 @@ func Test_imageBuilder_GetImageBuildingJobStatus(t *testing.T) { args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, mockGetJob: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), Namespace: config.BuildNamespace, }, Status: batchv1.JobStatus{ @@ -1968,9 +2171,9 @@ func Test_imageBuilder_GetImageBuildingJobStatus(t *testing.T) { mockListPods: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s-1", project.Name, model.Name, modelVersion.ID), + Name: fmt.Sprintf("%s-%s-%s-1", project.Name, model.Name, modelVersionWithGCSArtifact.ID), Labels: map[string]string{ - "job-name": fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), + "job-name": fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), }, }, Status: v1.PodStatus{ @@ -2021,7 +2224,7 @@ CondaEnvException: Pip failed`, args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, mockGetJob: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { job := &batchv1.Job{} diff --git a/config.yaml b/config.yaml index 4d34b1db4..4125753c3 100644 --- a/config.yaml +++ b/config.yaml @@ -51,6 +51,7 @@ ImageBuilderConfig: command: gke-gcloud-auth-plugin interactiveMode: IfAvailable provideClusterInfo: true + KanikoPushRegistryType: docker KanikoAdditionalArgs: - "--cache=true" - "--compressed-caching=false" diff --git a/python/batch-predictor/docker/app.Dockerfile b/python/batch-predictor/docker/app.Dockerfile index d1e518683..39d33a8b4 100644 --- a/python/batch-predictor/docker/app.Dockerfile +++ b/python/batch-predictor/docker/app.Dockerfile @@ -15,15 +15,36 @@ ARG BASE_IMAGE FROM ${BASE_IMAGE} +ARG MLFLOW_ARTIFACT_STORAGE_TYPE + ARG GOOGLE_APPLICATION_CREDENTIALS -RUN if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; \ - then gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} > gcloud-auth.txt; \ - else echo "GOOGLE_APPLICATION_CREDENTIALS is not set. Skipping gcloud auth command." > gcloud-auth.txt; \ + +ARG AWS_ACCESS_KEY_ID +ARG AWS_SECRET_ACCESS_KEY +ARG AWS_DEFAULT_REGION +ARG AWS_ENDPOINT_URL + +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then \ + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} > gcloud-auth.txt; \ + else echo "GOOGLE_APPLICATION_CREDENTIALS is not set. Skipping gcloud auth command." > gcloud-auth.txt; \ + fi \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + echo "S3 credentials used"; \ + else \ + echo "No credentials are used"; \ fi # Download and install user model dependencies ARG MODEL_DEPENDENCIES_URL -RUN gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml; \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + S3_KEY=${MODEL_DEPENDENCIES_URL##*s3://}; \ + aws s3api get-object --bucket ${S3_KEY%%/*} --key ${S3_KEY#*/} conda.yaml; \ + else \ + echo "No credentials are used"; \ + fi ARG MERLIN_DEP_CONSTRAINT RUN process_conda_env.sh conda.yaml "merlin-batch-predictor" "${MERLIN_DEP_CONSTRAINT}" @@ -31,7 +52,13 @@ RUN conda env create --name merlin-model --file conda.yaml # Download and dry-run user model artifacts and code ARG MODEL_ARTIFACTS_URL -RUN gsutil -m cp -r ${MODEL_ARTIFACTS_URL} . +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + gsutil -m cp -r ${MODEL_ARTIFACTS_URL} .; \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + aws s3 cp ${MODEL_ARTIFACTS_URL} model --recursive; \ + else \ + echo "No credentials are used"; \ + fi RUN /bin/bash -c ". activate merlin-model && merlin-batch-predictor --dry-run-model ${HOME}/model" ENTRYPOINT ["merlin_entrypoint.sh"] diff --git a/python/batch-predictor/docker/base.Dockerfile b/python/batch-predictor/docker/base.Dockerfile index 07237e67a..f5b5ad47e 100644 --- a/python/batch-predictor/docker/base.Dockerfile +++ b/python/batch-predictor/docker/base.Dockerfile @@ -23,6 +23,8 @@ ENV PATH /opt/conda/bin:$PATH ENV SPARK_OPERATOR_VERSION=v1beta2-1.3.7-3.1.1 ENV SPARK_BQ_CONNECTOR_VERSION=0.27.0 +RUN apt-get update && apt-get install unzip + # Setup dependencies for Google Cloud Storage access. RUN rm $SPARK_HOME/jars/guava-14.0.1.jar ADD https://repo1.maven.org/maven2/com/google/guava/guava/23.0/guava-23.0.jar \ @@ -59,6 +61,9 @@ ENV PATH=$PATH:/google-cloud-sdk/bin ENV GCLOUD_VERSION=405.0.1 RUN wget -qO- https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz | tar xzf - -C / +# Install aws CLI +RUN wget -q https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && unzip awscli-exe-linux-x86_64.zip && ./aws/install + # Configure non-root user ENV USER spark ENV UID 185 diff --git a/python/pyfunc-server/docker/Dockerfile b/python/pyfunc-server/docker/Dockerfile index 42d40f90d..ae362256f 100644 --- a/python/pyfunc-server/docker/Dockerfile +++ b/python/pyfunc-server/docker/Dockerfile @@ -13,17 +13,39 @@ # limitations under the License. ARG BASE_IMAGE + FROM ${BASE_IMAGE} +ARG MLFLOW_ARTIFACT_STORAGE_TYPE + ARG GOOGLE_APPLICATION_CREDENTIALS -RUN if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; \ - then gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} > gcloud-auth.txt; \ - else echo "GOOGLE_APPLICATION_CREDENTIALS is not set. Skipping gcloud auth command." > gcloud-auth.txt; \ + +ARG AWS_ACCESS_KEY_ID +ARG AWS_SECRET_ACCESS_KEY +ARG AWS_DEFAULT_REGION +ARG AWS_ENDPOINT_URL + +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then \ + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} > gcloud-auth.txt; \ + else echo "GOOGLE_APPLICATION_CREDENTIALS is not set. Skipping gcloud auth command." > gcloud-auth.txt; \ + fi \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + echo "S3 credentials used"; \ + else \ + echo "No credentials are used"; \ fi # Download and install user model dependencies ARG MODEL_DEPENDENCIES_URL -RUN gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml; \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + S3_KEY=${MODEL_DEPENDENCIES_URL##*s3://}; \ + aws s3api get-object --bucket ${S3_KEY%%/*} --key ${S3_KEY#*/} conda.yaml; \ + else \ + echo "No credentials are used"; \ + fi ARG MERLIN_DEP_CONSTRAINT RUN process_conda_env.sh conda.yaml "merlin-pyfunc-server" "${MERLIN_DEP_CONSTRAINT}" @@ -31,7 +53,13 @@ RUN conda env create --name merlin-model --file conda.yaml # Download and dry-run user model artifacts and code ARG MODEL_ARTIFACTS_URL -RUN gsutil -m cp -r ${MODEL_ARTIFACTS_URL} . +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + gsutil -m cp -r ${MODEL_ARTIFACTS_URL} .; \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + aws s3 cp ${MODEL_ARTIFACTS_URL} model --recursive; \ + else \ + echo "No credentials are used"; \ + fi RUN /bin/bash -c ". activate merlin-model && merlin-pyfunc-server --model_dir model --dry_run" CMD ["run.sh"] diff --git a/python/pyfunc-server/docker/base.Dockerfile b/python/pyfunc-server/docker/base.Dockerfile index 83fdddbc2..dd0041b2d 100644 --- a/python/pyfunc-server/docker/base.Dockerfile +++ b/python/pyfunc-server/docker/base.Dockerfile @@ -14,8 +14,13 @@ FROM condaforge/miniforge3:23.11.0-0 +RUN apt-get update && apt-get install unzip + ENV GCLOUD_VERSION=405.0.1 RUN wget -qO- https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz | tar xzf - +# Install aws CLI +RUN wget -q https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && unzip awscli-exe-linux-x86_64.zip && ./aws/install + ENV PATH=$PATH:/google-cloud-sdk/bin ENV GRPC_HEALTH_PROBE_VERSION=v0.4.4 diff --git a/python/sdk/merlin/model.py b/python/sdk/merlin/model.py index 175cc4e70..2150305a0 100644 --- a/python/sdk/merlin/model.py +++ b/python/sdk/merlin/model.py @@ -58,7 +58,7 @@ from merlin.transformer import Transformer from merlin.util import ( autostr, - download_files_from_gcs, + download_files_from_blob_storage, extract_optional_value_with_default, guess_mlp_ui_url, valid_name_check, @@ -956,7 +956,7 @@ def download_artifact(self, destination_path): if artifact_uri is None or artifact_uri == "": raise Exception("There is no artifact uri for this model version") - download_files_from_gcs(artifact_uri, destination_path) + download_files_from_blob_storage(artifact_uri, destination_path) def log_artifacts(self, local_dir, artifact_path=None): """ diff --git a/python/sdk/merlin/util.py b/python/sdk/merlin/util.py index 2b01e36be..0efea1c52 100644 --- a/python/sdk/merlin/util.py +++ b/python/sdk/merlin/util.py @@ -14,6 +14,7 @@ import re import os +import boto3 from urllib.parse import urlparse from google.cloud import storage from os.path import dirname @@ -66,34 +67,57 @@ def valid_name_check(input_name: str) -> bool: return matching_group == input_name -def get_bucket_name(gcs_uri: str) -> str: - parsed_result = urlparse(gcs_uri) +def get_blob_storage_scheme(artifact_uri: str) -> str: + parsed_result = urlparse(artifact_uri) + return parsed_result.scheme + + +def get_bucket_name(artifact_uri: str) -> str: + parsed_result = urlparse(artifact_uri) return parsed_result.netloc -def get_gcs_path(gcs_uri: str) -> str: - parsed_result = urlparse(gcs_uri) +def get_artifact_path(artifact_uri: str) -> str: + parsed_result = urlparse(artifact_uri) return parsed_result.path.strip("/") -def download_files_from_gcs(gcs_uri: str, destination_path: str): +def download_files_from_blob_storage(artifact_uri: str, destination_path: str): makedirs(destination_path, exist_ok=True) - client = storage.Client() - bucket_name = get_bucket_name(gcs_uri) - path = get_gcs_path(gcs_uri) - - bucket = client.get_bucket(bucket_name) - blobs = bucket.list_blobs(prefix=path) - for blob in blobs: - # Get only the path after .../artifacts/model - # E.g. - # Some blob looks like this mlflow/3/ad8f15a4023f461796955f71e1152bac/artifacts/model/1/saved_model.pb - # we only want to extract 1/saved_model.pb - artifact_path = os.path.join(*blob.name.split("/")[5:]) - dir = os.path.join(destination_path, dirname(artifact_path)) - makedirs(dir, exist_ok=True) - blob.download_to_filename(os.path.join(destination_path, artifact_path)) + storage_schema = get_blob_storage_scheme(artifact_uri) + bucket_name = get_bucket_name(artifact_uri) + path = get_artifact_path(artifact_uri) + + if storage_schema == "gs": + client = storage.Client() + bucket = client.get_bucket(bucket_name) + blobs = bucket.list_blobs(prefix=path) + for blob in blobs: + # Get only the path after .../artifacts/model + # E.g. + # Some blob looks like this mlflow/3/ad8f15a4023f461796955f71e1152bac/artifacts/model/1/saved_model.pb + # we only want to extract 1/saved_model.pb + artifact_path = os.path.join(*blob.name.split("/")[5:]) + dir = os.path.join(destination_path, dirname(artifact_path)) + makedirs(dir, exist_ok=True) + blob.download_to_filename(os.path.join(destination_path, artifact_path)) + elif storage_schema == "s3": + client = boto3.client("s3") + bucket = client.list_objects_v2(Prefix=path, Bucket=bucket_name)["Contents"] + for s3_object in bucket: + # we do this because the list_objects_v2 method lists all subdirectories in addition to files + if not s3_object['Key'].endswith('/'): + # Get only the path after .../artifacts/model + # E.g. + # Some blob looks like this mlflow/3/ad8f15a4023f461796955f71e1152bac/artifacts/model/1/saved_model.pb + # we only want to extract 1/saved_model.pb + object_paths = s3_object['Key'].split("/")[5:] + if len(object_paths) != 0: + artifact_path = os.path.join(*object_paths) + os.makedirs(os.path.join(destination_path, dirname(artifact_path)), exist_ok=True) + client.download_file(bucket_name, s3_object['Key'], os.path.join(destination_path, artifact_path)) + def extract_optional_value_with_default(opt: Optional[Any], default: Any) -> Any: if opt is not None: diff --git a/python/sdk/test/utils_unit_test.py b/python/sdk/test/utils_unit_test.py index ba3843f1f..712bbc272 100644 --- a/python/sdk/test/utils_unit_test.py +++ b/python/sdk/test/utils_unit_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from merlin.util import guess_mlp_ui_url, valid_name_check, get_bucket_name, get_gcs_path +from merlin.util import guess_mlp_ui_url, valid_name_check, get_blob_storage_scheme, get_bucket_name, get_artifact_path import pytest @@ -34,19 +34,28 @@ def test_name_check(): assert result is True +@pytest.mark.unit +def test_get_blob_storage_scheme(): + gcs_artifact_uri = 'gs://some-bucket/mlflow/81/ddd' + assert get_blob_storage_scheme(gcs_artifact_uri) == 'gs' + + s3_artifact_uri = 's3://some-bucket/mlflow/81/ddd' + assert get_blob_storage_scheme(s3_artifact_uri) == 's3' + + @pytest.mark.unit def test_get_bucket_name(): - gcs_uri = 'gs://some-bucket/mlflow/81/ddd' - assert get_bucket_name(gcs_uri) == 'some-bucket' + artifact_uri = 'gs://some-bucket/mlflow/81/ddd' + assert get_bucket_name(artifact_uri) == 'some-bucket' @pytest.mark.unit -def test_get_gcs_path(): - gcs_uri = 'gs://some-bucket/mlflow/81/ddd' - assert get_gcs_path(gcs_uri) == 'mlflow/81/ddd' +def test_get_artifact_path(): + artifact_uri = 'gs://some-bucket/mlflow/81/ddd' + assert get_artifact_path(artifact_uri) == 'mlflow/81/ddd' double_slash_uri_path = 'gs://some-bucket//mlflow/81/ddd' - assert get_gcs_path(double_slash_uri_path) == 'mlflow/81/ddd' + assert get_artifact_path(double_slash_uri_path) == 'mlflow/81/ddd' @pytest.mark.parametrize("mlp_api_url,expected", [