Skip to content

Commit

Permalink
CA: don't error out in HintingSimulator if a hinted Node is gone
Browse files Browse the repository at this point in the history
If a hinted Node is no longer in the cluster snapshot (e.g. it was
a fake upcoming Node and the real one appeared).

This was introduced in the recent PredicateChecker->PredicateSnapshot
refactor. Previously, PredicateChecker.CheckPredicates() would return
an error if the hinted Node was gone, and HintingSimulator treated
this error the same as failing predicates - it would move on to the
non-hinting logic. After the refactor, HintingSimulator explicitly
errors out if it can't retrieve the hinted Node from the snapshot,
so the behavior changed.

I checked other CheckPredicates()/SchedulePod() callsites, and this is
the only one when ignoring the missing Node makes sense. For the others,
the Node is added to the snapshot just before the call, so it being
missing should cause an error.
  • Loading branch information
towca committed Dec 12, 2024
1 parent 756db6a commit 4e283e3
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
3 changes: 2 additions & 1 deletion cluster-autoscaler/simulator/scheduling/hinting_simulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ func (s *HintingSimulator) tryScheduleUsingHints(clusterSnapshot clustersnapshot

nodeInfo, err := clusterSnapshot.GetNodeInfo(hintedNode)
if err != nil {
return "", err
// The hinted Node is no longer in the cluster. No need to error out, we can just look for another one.
return "", nil
}
if !isNodeAcceptable(nodeInfo) {
return "", nil
Expand Down
27 changes: 27 additions & 0 deletions cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func TestTrySchedulePods(t *testing.T) {
nodes []*apiv1.Node
pods []*apiv1.Pod
newPods []*apiv1.Pod
hints map[*apiv1.Pod]string
acceptableNodes func(*framework.NodeInfo) bool
wantStatuses []Status
wantErr bool
Expand All @@ -58,6 +59,27 @@ func TestTrySchedulePods(t *testing.T) {
{Pod: BuildTestPod("p3", 500, 500000), NodeName: "n1"},
},
},

{
desc: "hinted Node no longer in the cluster doesn't cause an error",
nodes: []*apiv1.Node{
buildReadyNode("n1", 1000, 2000000),
buildReadyNode("n2", 1000, 2000000),
},
pods: []*apiv1.Pod{
buildScheduledPod("p1", 300, 500000, "n1"),
},
newPods: []*apiv1.Pod{
BuildTestPod("p2", 800, 500000),
BuildTestPod("p3", 500, 500000),
},
hints: map[*apiv1.Pod]string{BuildTestPod("p2", 800, 500000): "non-existing-node"},
acceptableNodes: ScheduleAnywhere,
wantStatuses: []Status{
{Pod: BuildTestPod("p2", 800, 500000), NodeName: "n2"},
{Pod: BuildTestPod("p3", 500, 500000), NodeName: "n1"},
},
},
{
desc: "three new pods, two nodes, no fit",
nodes: []*apiv1.Node{
Expand Down Expand Up @@ -136,6 +158,11 @@ func TestTrySchedulePods(t *testing.T) {
clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t)
clustersnapshot.InitializeClusterSnapshotOrDie(t, clusterSnapshot, tc.nodes, tc.pods)
s := NewHintingSimulator()

for pod, nodeName := range tc.hints {
s.hints.Set(HintKeyFromPod(pod), nodeName)
}

statuses, _, err := s.TrySchedulePods(clusterSnapshot, tc.newPods, tc.acceptableNodes, false)
if tc.wantErr {
assert.Error(t, err)
Expand Down

0 comments on commit 4e283e3

Please sign in to comment.