Skip to content

Commit

Permalink
Set SchedulerPluginsControl as the default implementation
Browse files Browse the repository at this point in the history
Signed-off-by: Syulin7 <[email protected]>
  • Loading branch information
Syulin7 committed Mar 14, 2023
1 parent d46247c commit e08164d
Show file tree
Hide file tree
Showing 13 changed files with 19 additions and 16 deletions.
7 changes: 4 additions & 3 deletions cmd/training-operator.v1/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ func main() {
flag.StringVar(&leaderElectionID, "leader-election-id", "1ca428e5.training-operator.kubeflow.org", "The ID for leader election.")
flag.Var(&enabledSchemes, "enable-scheme", "Enable scheme(s) as --enable-scheme=tfjob --enable-scheme=pytorchjob, case insensitive."+
" Now supporting TFJob, PyTorchJob, MXNetJob, XGBoostJob, PaddleJob. By default, all supported schemes will be enabled.")
flag.StringVar(&gangSchedulerName, "gang-scheduler-name", "none", "The scheduler to gang-schedule kubeflow jobs, defaults to none")
flag.StringVar(&gangSchedulerName, "gang-scheduler-name", "", "The scheduler to gang-schedule kubeflow jobs."+
" Now supporting volcano, default-scheduler, scheduler-plugins, koord-scheduler.")
flag.StringVar(&namespace, "namespace", os.Getenv(commonutil.EnvKubeflowNamespace), "The namespace to monitor kubeflow jobs. If unset, it monitors all namespaces cluster-wide."+
"If set, it only monitors kubeflow jobs in the given namespace.")
flag.IntVar(&monitoringPort, "monitoring-port", 9443, "Endpoint port for displaying monitoring metrics. "+
Expand Down Expand Up @@ -121,8 +122,8 @@ func main() {
cfg := mgr.GetConfig()
volcanoClientSet := volcanoclient.NewForConfigOrDie(cfg)
gangSchedulingSetupFunc = common.GenVolcanoSetupFunc(volcanoClientSet)
} else if strings.EqualFold(gangSchedulerName, string(common.GangSchedulerSchedulerPlugins)) {
gangSchedulingSetupFunc = common.GenSchedulerPluginsSetupFunc(mgr.GetClient())
} else if gangSchedulerName != "" {
gangSchedulingSetupFunc = common.GenSchedulerPluginsSetupFunc(mgr.GetClient(), gangSchedulerName)
}

// TODO: We need a general manager. all rest reconciler addsToManager
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,5 @@ require (
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
)

replace github.com/kubeflow/common v0.4.6 => github.com/Syulin7/common v0.0.0-20230314024246-f6d94fbd12fa
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbt
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/Syulin7/common v0.0.0-20230314024246-f6d94fbd12fa h1:XFgorzovrtSmkaIjhmO0ncF4Sg25JV/dpENgk4tV0Bs=
github.com/Syulin7/common v0.0.0-20230314024246-f6d94fbd12fa/go.mod h1:43MAof/uhpJA2C0urynqatE3oKFQc7m2HLmJty7waqY=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
Expand Down Expand Up @@ -327,8 +329,6 @@ github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kubeflow/common v0.4.6 h1:yzJf/HEdS6ginD0GlVkgbOFie0Sp66VdGjXidAGZIlk=
github.com/kubeflow/common v0.4.6/go.mod h1:43MAof/uhpJA2C0urynqatE3oKFQc7m2HLmJty7waqY=
github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
Expand Down
2 changes: 1 addition & 1 deletion hack/python-sdk/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@
"format": "int32"
},
"cleanPodPolicy": {
"description": "CleanPodPolicy defines the policy to kill pods after the job completes. Default to Running.",
"description": "CleanPodPolicy defines the policy to kill pods after the job completes. Default to None.",
"type": "string"
},
"schedulingPolicy": {
Expand Down
2 changes: 1 addition & 1 deletion manifests/base/crds/kubeflow.org_mpijobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7374,7 +7374,7 @@ spec:
type: integer
cleanPodPolicy:
description: CleanPodPolicy defines the policy to kill pods after
the job completes. Default to Running.
the job completes. Default to None.
type: string
schedulingPolicy:
description: SchedulingPolicy defines the policy related to scheduling,
Expand Down
2 changes: 1 addition & 1 deletion manifests/base/crds/kubeflow.org_mxjobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7374,7 +7374,7 @@ spec:
type: integer
cleanPodPolicy:
description: CleanPodPolicy defines the policy to kill pods after
the job completes. Default to Running.
the job completes. Default to None.
type: string
schedulingPolicy:
description: SchedulingPolicy defines the policy related to scheduling,
Expand Down
2 changes: 1 addition & 1 deletion manifests/base/crds/kubeflow.org_paddlejobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7881,7 +7881,7 @@ spec:
type: integer
cleanPodPolicy:
description: CleanPodPolicy defines the policy to kill pods after
the job completes. Default to Running.
the job completes. Default to None.
type: string
schedulingPolicy:
description: SchedulingPolicy defines the policy related to scheduling,
Expand Down
2 changes: 1 addition & 1 deletion manifests/base/crds/kubeflow.org_pytorchjobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7910,7 +7910,7 @@ spec:
type: integer
cleanPodPolicy:
description: CleanPodPolicy defines the policy to kill pods after
the job completes. Default to Running.
the job completes. Default to None.
type: string
schedulingPolicy:
description: SchedulingPolicy defines the policy related to scheduling,
Expand Down
2 changes: 1 addition & 1 deletion manifests/base/crds/kubeflow.org_tfjobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ spec:
type: integer
cleanPodPolicy:
description: CleanPodPolicy defines the policy to kill pods after
the job completes. Default to Running.
the job completes. Default to None.
type: string
schedulingPolicy:
description: SchedulingPolicy defines the policy related to scheduling,
Expand Down
2 changes: 1 addition & 1 deletion manifests/base/crds/kubeflow.org_xgboostjobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ spec:
type: integer
cleanPodPolicy:
description: CleanPodPolicy defines the policy to kill pods after
the job completes. Default to Running.
the job completes. Default to None.
type: string
schedulingPolicy:
description: SchedulingPolicy defines the policy related to scheduling,
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/kubeflow.org/v1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion sdk/python/docs/V1RunPolicy.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Name | Type | Description | Notes
------------ | ------------- | ------------- | -------------
**active_deadline_seconds** | **int** | Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer. | [optional]
**backoff_limit** | **int** | Optional number of retries before marking this job failed. | [optional]
**clean_pod_policy** | **str** | CleanPodPolicy defines the policy to kill pods after the job completes. Default to Running. | [optional]
**clean_pod_policy** | **str** | CleanPodPolicy defines the policy to kill pods after the job completes. Default to None. | [optional]
**scheduling_policy** | [**V1SchedulingPolicy**](V1SchedulingPolicy.md) | | [optional]
**ttl_seconds_after_finished** | **int** | TTLSecondsAfterFinished is the TTL to clean up jobs. It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Default to infinite. | [optional]

Expand Down
4 changes: 2 additions & 2 deletions sdk/python/kubeflow/training/models/v1_run_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def backoff_limit(self, backoff_limit):
def clean_pod_policy(self):
"""Gets the clean_pod_policy of this V1RunPolicy. # noqa: E501

CleanPodPolicy defines the policy to kill pods after the job completes. Default to Running. # noqa: E501
CleanPodPolicy defines the policy to kill pods after the job completes. Default to None. # noqa: E501

:return: The clean_pod_policy of this V1RunPolicy. # noqa: E501
:rtype: str
Expand All @@ -133,7 +133,7 @@ def clean_pod_policy(self):
def clean_pod_policy(self, clean_pod_policy):
"""Sets the clean_pod_policy of this V1RunPolicy.

CleanPodPolicy defines the policy to kill pods after the job completes. Default to Running. # noqa: E501
CleanPodPolicy defines the policy to kill pods after the job completes. Default to None. # noqa: E501

:param clean_pod_policy: The clean_pod_policy of this V1RunPolicy. # noqa: E501
:type: str
Expand Down

0 comments on commit e08164d

Please sign in to comment.