-
Notifications
You must be signed in to change notification settings - Fork 53
Support Optional Spark features #118
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -3,17 +3,17 @@ package spark | |||||
import ( | ||||||
"context" | ||||||
"fmt" | ||||||
"time" | ||||||
|
||||||
"github.com/lyft/flyteplugins/go/tasks/pluginmachinery" | ||||||
"github.com/lyft/flyteplugins/go/tasks/pluginmachinery/flytek8s" | ||||||
"github.com/lyft/flyteplugins/go/tasks/pluginmachinery/flytek8s/config" | ||||||
|
||||||
"github.com/lyft/flyteplugins/go/tasks/errors" | ||||||
"github.com/lyft/flyteplugins/go/tasks/logs" | ||||||
pluginsCore "github.com/lyft/flyteplugins/go/tasks/pluginmachinery/core" | ||||||
|
||||||
"github.com/lyft/flyteplugins/go/tasks/pluginmachinery/k8s" | ||||||
"github.com/lyft/flyteplugins/go/tasks/pluginmachinery/utils" | ||||||
|
||||||
"k8s.io/client-go/kubernetes/scheme" | ||||||
|
||||||
sparkOp "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta1" | ||||||
|
@@ -22,7 +22,9 @@ import ( | |||||
"github.com/lyft/flyteidl/gen/pb-go/flyteidl/plugins" | ||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||
|
||||||
"github.com/lyft/flyteplugins/go/tasks/errors" | ||||||
"regexp" | ||||||
"strings" | ||||||
"time" | ||||||
|
||||||
pluginsConfig "github.com/lyft/flyteplugins/go/tasks/config" | ||||||
) | ||||||
|
@@ -31,12 +33,20 @@ const KindSparkApplication = "SparkApplication" | |||||
const sparkDriverUI = "sparkDriverUI" | ||||||
const sparkHistoryUI = "sparkHistoryUI" | ||||||
|
||||||
var featureRegex = regexp.MustCompile(`^spark.((lyft)|(flyte)).(.+).enabled$`) | ||||||
|
||||||
var sparkTaskType = "spark" | ||||||
|
||||||
// Spark-specific configs | ||||||
type Config struct { | ||||||
DefaultSparkConfig map[string]string `json:"spark-config-default" pflag:",Key value pairs of default spark configuration that should be applied to every SparkJob"` | ||||||
SparkHistoryServerURL string `json:"spark-history-server-url" pflag:",URL for SparkHistory Server that each job will publish the execution history to."` | ||||||
Features []Feature `json:"features" pflag:",List of optional features supported."` | ||||||
} | ||||||
|
||||||
type Feature struct { | ||||||
Name string `json:"name"` | ||||||
SparkConfig map[string]string `json:"spark-config"` | ||||||
} | ||||||
|
||||||
var ( | ||||||
|
@@ -139,7 +149,12 @@ func (sparkResourceHandler) BuildResource(ctx context.Context, taskCtx pluginsCo | |||||
} | ||||||
|
||||||
for k, v := range sparkJob.GetSparkConf() { | ||||||
sparkConfig[k] = v | ||||||
// Add optional features if present. | ||||||
if featureRegex.MatchString(k) { | ||||||
addConfig(sparkConfig, k, v) | ||||||
} else { | ||||||
sparkConfig[k] = v | ||||||
} | ||||||
} | ||||||
|
||||||
// Set pod limits. | ||||||
|
@@ -184,6 +199,27 @@ func (sparkResourceHandler) BuildResource(ctx context.Context, taskCtx pluginsCo | |||||
return j, nil | ||||||
} | ||||||
|
||||||
func addConfig(sparkConfig map[string]string, key string, value string) { | ||||||
|
||||||
if strings.TrimSpace(value) != "true" { | ||||||
return | ||||||
} | ||||||
|
||||||
matches := featureRegex.FindAllStringSubmatch(key, -1) | ||||||
if len(matches) == 0 { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
return | ||||||
} | ||||||
featureName := matches[0][len(matches[0])-1] | ||||||
|
||||||
for _, feature := range GetSparkConfig().Features { | ||||||
if feature.Name == featureName { | ||||||
for k, v := range feature.SparkConfig { | ||||||
sparkConfig[k] = v | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So if multiple features override the same spark configs, the last one wins... is that ok/intentional/undesired (so we can add validation)? whichever it's, I would add docs on the config to explain the behavior There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally they should be unique so this can be a nested map here instead of list of features. In future thou, I can see this being extended to check for values as well i.e. feature name + value can be unique so hence a struct makes better sense. For now, I think using the first matching config makes sense (instead of the last). Added that with comments. |
||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
// Convert SparkJob ApplicationType to Operator CRD ApplicationType | ||||||
func getApplicationType(applicationType plugins.SparkApplication_Type) sparkOp.SparkApplicationType { | ||||||
switch applicationType { | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: to lower?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done