Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PodTemplates support as NodeInfoProcessor in Cluster-Autoscaler #3964

2 changes: 1 addition & 1 deletion charts/cluster-autoscaler/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ name: cluster-autoscaler
sources:
- https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
type: application
version: 9.9.2
version: 9.10.0
1 change: 1 addition & 0 deletions charts/cluster-autoscaler/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ rules:
- ""
resources:
- pods
- podtemplates
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there are more rbac configs in various cloudprovider/ subdirectories (many providers have example yaml in there).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have updated all the ClusterRole to add the "PodTemplates" resource

- services
- replicationcontrollers
- persistentvolumeclaims
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ rules:
resources: ["nodes"]
verbs: ["watch","list","get","update"]
- apiGroups: [""]
resources: ["pods","services","replicationcontrollers","persistentvolumeclaims","persistentvolumes"]
resources: ["pods","podtemplates","services","replicationcontrollers","persistentvolumeclaims","persistentvolumes"]
verbs: ["watch","list","get"]
- apiGroups: ["extensions"]
resources: ["replicasets","daemonsets"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ rules:
- persistentvolumeclaims
- persistentvolumes
- pods
- podtemplates
- replicationcontrollers
- services
verbs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ rules:
- apiGroups: [""]
resources:
- "pods"
- "podtemplates"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,6 @@ type AutoscalingOptions struct {
DaemonSetEvictionForOccupiedNodes bool
// User agent to use for HTTP calls.
UserAgent string
// NodeInfoProcessorPodTemplates Enable or disable the NodeInfoProcessor PodTemplate
NodeInfoProcessorPodTemplates bool
}
1 change: 0 additions & 1 deletion cluster-autoscaler/core/scale_test_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,6 @@ type mockAutoprovisioningNodeGroupListProcessor struct {

func (p *mockAutoprovisioningNodeGroupListProcessor) Process(context *context.AutoscalingContext, nodeGroups []cloudprovider.NodeGroup, nodeInfos map[string]*schedulerframework.NodeInfo,
unschedulablePods []*apiv1.Pod) ([]cloudprovider.NodeGroup, map[string]*schedulerframework.NodeInfo, error) {

machines, err := context.CloudProvider.GetAvailableMachineTypes()
assert.NoError(p.t, err)

Expand Down
2 changes: 0 additions & 2 deletions cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ func NewStaticAutoscaler(
expanderStrategy expander.Strategy,
estimatorBuilder estimator.EstimatorBuilder,
backoff backoff.Backoff) *StaticAutoscaler {

processorCallbacks := newStaticAutoscalerProcessorCallbacks()
autoscalingContext := context.NewAutoscalingContext(
opts,
Expand Down Expand Up @@ -701,7 +700,6 @@ func (a *StaticAutoscaler) filterOutYoungPods(allUnschedulablePods []*apiv1.Pod,
oldUnschedulablePods = append(oldUnschedulablePods, pod)
} else {
klog.V(3).Infof("Pod %s is %.3f seconds old, too new to consider unschedulable", pod.Name, podAge.Seconds())

}
}
return oldUnschedulablePods
Expand Down
22 changes: 16 additions & 6 deletions cluster-autoscaler/core/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ import (
klog "k8s.io/klog/v2"
)

const (
// TemplateNodeForNamePrefix template node prefix use in the sanitizeNodeInfo() function.
TemplateNodeForNamePrefix = "template-node-for"
// TemplateNodeForNameFromTemplatePrefix sub-prefix when the template is generated based on a cloud-provider node template.
TemplateNodeForNameFromTemplatePrefix = "template"
// TemplateNodeForNameFromCopyPrefix sub-prefix when the template is generated from a copy of another exiting node.
TemplateNodeForNameFromCopyPrefix = "copy"
)

// GetNodeInfosForGroups finds NodeInfos for all node groups used to manage the given nodes. It also returns a node group to sample node mapping.
func GetNodeInfosForGroups(nodes []*apiv1.Node, nodeInfoCache map[string]*schedulerframework.NodeInfo, cloudProvider cloudprovider.CloudProvider, listers kube_util.ListerRegistry,
// TODO(mwielgus): This returns map keyed by url, while most code (including scheduler) uses node.Name for a key.
Expand Down Expand Up @@ -67,7 +76,7 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, nodeInfoCache map[string]*schedu
if err != nil {
return false, "", err
}
sanitizedNodeInfo, err := sanitizeNodeInfo(nodeInfo, id, ignoredTaints)
sanitizedNodeInfo, err := sanitizeNodeInfo(nodeInfo, TemplateNodeForNameFromCopyPrefix, id, ignoredTaints)
if err != nil {
return false, "", err
}
Expand Down Expand Up @@ -179,9 +188,10 @@ func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*ap
for _, podInfo := range baseNodeInfo.Pods {
pods = append(pods, podInfo.Pod)
}

fullNodeInfo := schedulerframework.NewNodeInfo(pods...)
fullNodeInfo.SetNode(baseNodeInfo.Node())
sanitizedNodeInfo, typedErr := sanitizeNodeInfo(fullNodeInfo, id, ignoredTaints)
sanitizedNodeInfo, typedErr := sanitizeNodeInfo(fullNodeInfo, TemplateNodeForNameFromTemplatePrefix, id, ignoredTaints)
if typedErr != nil {
return nil, typedErr
}
Expand Down Expand Up @@ -226,9 +236,9 @@ func deepCopyNodeInfo(nodeInfo *schedulerframework.NodeInfo) (*schedulerframewor
return newNodeInfo, nil
}

func sanitizeNodeInfo(nodeInfo *schedulerframework.NodeInfo, nodeGroupName string, ignoredTaints taints.TaintKeySet) (*schedulerframework.NodeInfo, errors.AutoscalerError) {
func sanitizeNodeInfo(nodeInfo *schedulerframework.NodeInfo, nodeSource string, nodeGroupName string, ignoredTaints taints.TaintKeySet) (*schedulerframework.NodeInfo, errors.AutoscalerError) {
// Sanitize node name.
sanitizedNode, err := sanitizeTemplateNode(nodeInfo.Node(), nodeGroupName, ignoredTaints)
sanitizedNode, err := sanitizeTemplateNode(nodeInfo.Node(), nodeSource, nodeGroupName, ignoredTaints)
if err != nil {
return nil, err
}
Expand All @@ -247,9 +257,9 @@ func sanitizeNodeInfo(nodeInfo *schedulerframework.NodeInfo, nodeGroupName strin
return sanitizedNodeInfo, nil
}

func sanitizeTemplateNode(node *apiv1.Node, nodeGroup string, ignoredTaints taints.TaintKeySet) (*apiv1.Node, errors.AutoscalerError) {
func sanitizeTemplateNode(node *apiv1.Node, nodeSource string, nodeGroup string, ignoredTaints taints.TaintKeySet) (*apiv1.Node, errors.AutoscalerError) {
newNode := node.DeepCopy()
nodeName := fmt.Sprintf("template-node-for-%s-%d", nodeGroup, rand.Int63())
nodeName := fmt.Sprintf("%s-%s-%s-%d", TemplateNodeForNamePrefix, nodeSource, nodeGroup, rand.Int63())
newNode.Labels = make(map[string]string, len(node.Labels))
for k, v := range node.Labels {
if k != apiv1.LabelHostname {
Expand Down
5 changes: 2 additions & 3 deletions cluster-autoscaler/core/utils/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ func TestSanitizeNodeInfo(t *testing.T) {
nodeInfo := schedulerframework.NewNodeInfo(pod)
nodeInfo.SetNode(node)

res, err := sanitizeNodeInfo(nodeInfo, "test-group", nil)
res, err := sanitizeNodeInfo(nodeInfo, "template", "test-group", nil)
assert.NoError(t, err)
assert.Equal(t, 1, len(res.Pods))
}
Expand All @@ -238,7 +238,7 @@ func TestSanitizeLabels(t *testing.T) {
apiv1.LabelHostname: "abc",
"x": "y",
}
node, err := sanitizeTemplateNode(oldNode, "bzium", nil)
node, err := sanitizeTemplateNode(oldNode, "copy", "bzium", nil)
assert.NoError(t, err)
assert.NotEqual(t, node.Labels[apiv1.LabelHostname], "abc", nil)
assert.Equal(t, node.Labels["x"], "y")
Expand Down Expand Up @@ -277,7 +277,6 @@ func TestGetNodeResource(t *testing.T) {

memory = getNodeResource(nodeWithNegativeCapacity, apiv1.ResourceMemory)
assert.Equal(t, int64(0), memory)

}

func TestGetNodeCoresAndMemory(t *testing.T) {
Expand Down
7 changes: 7 additions & 0 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/metrics"
ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors"
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset"
"k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfos/podtemplates"
"k8s.io/autoscaler/cluster-autoscaler/simulator"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
Expand Down Expand Up @@ -180,6 +181,7 @@ var (
daemonSetEvictionForEmptyNodes = flag.Bool("daemonset-eviction-for-empty-nodes", false, "DaemonSet pods will be gracefully terminated from empty nodes")
daemonSetEvictionForOccupiedNodes = flag.Bool("daemonset-eviction-for-occupied-nodes", true, "DaemonSet pods will be gracefully terminated from non-empty nodes")
userAgent = flag.String("user-agent", "cluster-autoscaler", "User agent used for HTTP calls.")
nodeInfoProcessorPodTemplates = flag.Bool("node-info-processor-podtemplate", false, "Enable PodTemplate NodeInfoProcessor to consider specific PodTemplate as DaemonSet")
)

func createAutoscalingOptions() config.AutoscalingOptions {
Expand Down Expand Up @@ -257,6 +259,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
DaemonSetEvictionForEmptyNodes: *daemonSetEvictionForEmptyNodes,
DaemonSetEvictionForOccupiedNodes: *daemonSetEvictionForOccupiedNodes,
UserAgent: *userAgent,
NodeInfoProcessorPodTemplates: *nodeInfoProcessorPodTemplates,
}
}

Expand Down Expand Up @@ -331,6 +334,10 @@ func buildAutoscaler() (core.Autoscaler, error) {
Comparator: nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels),
}

if autoscalingOptions.NodeInfoProcessorPodTemplates {
opts.Processors.NodeInfoProcessor = podtemplates.NewNodeInfoWithPodTemplateProcessor(&opts)
}

// These metrics should be published only once.
metrics.UpdateNapEnabled(autoscalingOptions.NodeAutoprovisioningEnabled)
metrics.UpdateMaxNodesCount(autoscalingOptions.MaxNodesTotal)
Expand Down
Loading