From 4683cdbb9f4e32b1ca139e64264b1b6b4f8ea275 Mon Sep 17 00:00:00 2001 From: Xuzheng Chang Date: Fri, 29 Dec 2023 16:58:33 +0800 Subject: [PATCH] add ignored csi provisioner when compute csi resources Signed-off-by: Xuzheng Chang --- cmd/scheduler/app/options/options.go | 6 ++++++ pkg/scheduler/cache/cache.go | 21 ++++++++++++++++--- pkg/scheduler/cache/event_handlers.go | 9 ++++++++ .../plugins/predicates/predicates_test.go | 2 +- pkg/scheduler/scheduler.go | 2 +- 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/cmd/scheduler/app/options/options.go b/cmd/scheduler/app/options/options.go index 411920031c..d3c6cf819e 100644 --- a/cmd/scheduler/app/options/options.go +++ b/cmd/scheduler/app/options/options.go @@ -79,6 +79,11 @@ type ServerOption struct { NodeSelector []string EnableCacheDumper bool NodeWorkerThreads uint32 + + // IgnoredCSIProvisioners contains a list of provisioners, and pod request pvc with these provisioners will + // not be counted in pod pvc resource request and node.Allocatable, because the spec.drivers of csinode resource + // is always null, these provisioners usually are host path csi controllers like rancher.io/local-path and hostpath.csi.k8s.io. + IgnoredCSIProvisioners []string } type DecryptFunc func(c *ServerOption) error @@ -134,6 +139,7 @@ func (s *ServerOption) AddFlags(fs *pflag.FlagSet) { fs.StringSliceVar(&s.NodeSelector, "node-selector", nil, "volcano only work with the labeled node, like: --node-selector=volcano.sh/role:train --node-selector=volcano.sh/role:serving") fs.BoolVar(&s.EnableCacheDumper, "cache-dumper", true, "Enable the cache dumper, it's true by default") fs.Uint32Var(&s.NodeWorkerThreads, "node-worker-threads", defaultNodeWorkers, "The number of threads syncing node operations.") + fs.StringSliceVar(&s.IgnoredCSIProvisioners, "ignored-provisioners", nil, "The provisioners that will be ignored during pod pvc request computation and preemption.") } // CheckOptionOrDie check lock-object-namespace when LeaderElection is enabled. diff --git a/pkg/scheduler/cache/cache.go b/pkg/scheduler/cache/cache.go index d9f0f7892e..61eeb3d9c6 100644 --- a/pkg/scheduler/cache/cache.go +++ b/pkg/scheduler/cache/cache.go @@ -74,6 +74,9 @@ const ( defaultMetricsInternal = 30 * time.Second ) +// defaultIgnoredProvisioners contains provisioners that will be ignored during pod pvc request computation and preemption. +var defaultIgnoredProvisioners = []string{"rancher.io/local-path", "hostpath.csi.k8s.io"} + func init() { schemeBuilder := runtime.SchemeBuilder{ v1.AddToScheme, @@ -83,8 +86,8 @@ func init() { } // New returns a Cache implementation. -func New(config *rest.Config, schedulerNames []string, defaultQueue string, nodeSelectors []string, nodeWorkers uint32) Cache { - return newSchedulerCache(config, schedulerNames, defaultQueue, nodeSelectors, nodeWorkers) +func New(config *rest.Config, schedulerNames []string, defaultQueue string, nodeSelectors []string, nodeWorkers uint32, ignoredProvisioners []string) Cache { + return newSchedulerCache(config, schedulerNames, defaultQueue, nodeSelectors, nodeWorkers, ignoredProvisioners) } // SchedulerCache cache for the kube batch @@ -148,6 +151,11 @@ type SchedulerCache struct { imageStates map[string]*imageState nodeWorkers uint32 + + // IgnoredCSIProvisioners contains a list of provisioners, and pod request pvc with these provisioners will + // not be counted in pod pvc resource request and node.Allocatable, because the spec.drivers of csinode resource + // is always null, these provisioners usually are host path csi controllers like rancher.io/local-path and hostpath.csi.k8s.io. + IgnoredCSIProvisioners sets.Set[string] } type imageState struct { @@ -390,7 +398,7 @@ func (pgb *podgroupBinder) Bind(job *schedulingapi.JobInfo, cluster string) (*sc return job, nil } -func newSchedulerCache(config *rest.Config, schedulerNames []string, defaultQueue string, nodeSelectors []string, nodeWorkers uint32) *SchedulerCache { +func newSchedulerCache(config *rest.Config, schedulerNames []string, defaultQueue string, nodeSelectors []string, nodeWorkers uint32, ignoredProvisioners []string) *SchedulerCache { kubeClient, err := kubernetes.NewForConfig(config) if err != nil { panic(fmt.Sprintf("failed init kubeClient, with err: %v", err)) @@ -453,6 +461,13 @@ func newSchedulerCache(config *rest.Config, schedulerNames []string, defaultQueu NodeList: []string{}, nodeWorkers: nodeWorkers, } + + ignoredProvisionersSet := sets.New[string]() + for _, provisioner := range append(ignoredProvisioners, defaultIgnoredProvisioners...) { + ignoredProvisionersSet.Insert(provisioner) + } + sc.IgnoredCSIProvisioners = ignoredProvisionersSet + if len(nodeSelectors) > 0 { for _, nodeSelectorLabel := range nodeSelectors { nodeSelectorLabelLen := len(nodeSelectorLabel) diff --git a/pkg/scheduler/cache/event_handlers.go b/pkg/scheduler/cache/event_handlers.go index 6f76cd91e7..75c4d1bb2f 100644 --- a/pkg/scheduler/cache/event_handlers.go +++ b/pkg/scheduler/cache/event_handlers.go @@ -127,7 +127,12 @@ func (sc *SchedulerCache) getPodCSIVolumes(pod *v1.Pod) (map[v1.ResourceName]int return volumes, err } } + driverName := sc.getCSIDriverInfo(pvc) + if sc.isIgnoredProvisioner(driverName) { + klog.V(5).InfoS("Provisioner ignored, skip count pod pvc num", "driverName", driverName) + continue + } if driverName == "" { klog.V(5).InfoS("Could not find a CSI driver name for pvc(%s/%s), not counting volume", pvc.Namespace, pvc.Name) continue @@ -147,6 +152,10 @@ func (sc *SchedulerCache) getPodCSIVolumes(pod *v1.Pod) (map[v1.ResourceName]int return volumes, nil } +func (sc *SchedulerCache) isIgnoredProvisioner(driverName string) bool { + return sc.IgnoredCSIProvisioners.Has(driverName) +} + func (sc *SchedulerCache) getCSIDriverInfo(pvc *v1.PersistentVolumeClaim) string { pvName := pvc.Spec.VolumeName diff --git a/pkg/scheduler/plugins/predicates/predicates_test.go b/pkg/scheduler/plugins/predicates/predicates_test.go index 02efd1bc71..1cbb1e77b1 100644 --- a/pkg/scheduler/plugins/predicates/predicates_test.go +++ b/pkg/scheduler/plugins/predicates/predicates_test.go @@ -70,7 +70,7 @@ func TestEventHandler(t *testing.T) { return } - sc := cache.New(config, option.SchedulerNames, option.DefaultQueue, option.NodeSelector, option.NodeWorkerThreads) + sc := cache.New(config, option.SchedulerNames, option.DefaultQueue, option.NodeSelector, option.NodeWorkerThreads, nil) schedulerCache := sc.(*cache.SchedulerCache) // pending pods diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index ee2258eae9..e8d508a7b9 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -64,7 +64,7 @@ func NewScheduler(config *rest.Config, opt *options.ServerOption) (*Scheduler, e } } - cache := schedcache.New(config, opt.SchedulerNames, opt.DefaultQueue, opt.NodeSelector, opt.NodeWorkerThreads) + cache := schedcache.New(config, opt.SchedulerNames, opt.DefaultQueue, opt.NodeSelector, opt.NodeWorkerThreads, opt.IgnoredCSIProvisioners) scheduler := &Scheduler{ schedulerConf: opt.SchedulerConf, fileWatcher: watcher,