diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go index bd6d3a49818..a8f12f2d5bc 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go @@ -33,7 +33,8 @@ import ( ) const ( - nodeProviderIDIndex = "clusterapi-nodeProviderIDIndex" + machineProviderIDIndex = "clusterapi-machineProviderIDIndex" + nodeProviderIDIndex = "clusterapi-nodeProviderIDIndex" ) // machineController watches for Nodes, Machines, MachineSets and @@ -52,9 +53,22 @@ type machineController struct { type machineSetFilterFunc func(machineSet *v1alpha1.MachineSet) error -func indexNodeByNodeProviderID(obj interface{}) ([]string, error) { +func indexMachineByProviderID(obj interface{}) ([]string, error) { + if machine, ok := obj.(*v1alpha1.Machine); ok { + if machine.Spec.ProviderID != nil && *machine.Spec.ProviderID != "" { + return []string{*machine.Spec.ProviderID}, nil + } + return []string{}, nil + } + return []string{}, nil +} + +func indexNodeByProviderID(obj interface{}) ([]string, error) { if node, ok := obj.(*corev1.Node); ok { - return []string{node.Spec.ProviderID}, nil + if node.Spec.ProviderID != "" { + return []string{node.Spec.ProviderID}, nil + } + return []string{}, nil } return []string{}, nil } @@ -143,36 +157,43 @@ func (c *machineController) run(stopCh <-chan struct{}) error { return nil } -// findMachineByNodeProviderID find associated machine using -// node.Spec.ProviderID as the key. Returns nil if either the Node by -// node.Spec.ProviderID cannot be found or if the node has no machine -// annotation. A DeepCopy() of the object is returned on success. -func (c *machineController) findMachineByNodeProviderID(node *corev1.Node) (*v1alpha1.Machine, error) { - objs, err := c.nodeInformer.GetIndexer().ByIndex(nodeProviderIDIndex, node.Spec.ProviderID) +// findMachineByProviderID finds machine matching providerID. A +// DeepCopy() of the object is returned on success. +func (c *machineController) findMachineByProviderID(providerID string) (*v1alpha1.Machine, error) { + objs, err := c.machineInformer.Informer().GetIndexer().ByIndex(machineProviderIDIndex, providerID) if err != nil { return nil, err } switch n := len(objs); { - case n == 0: - return nil, nil case n > 1: return nil, fmt.Errorf("internal error; expected len==1, got %v", n) + case n == 1: + machine, ok := objs[0].(*v1alpha1.Machine) + if !ok { + return nil, fmt.Errorf("internal error; unexpected type %T", machine) + } + if machine != nil { + return machine.DeepCopy(), nil + } } - node, ok := objs[0].(*corev1.Node) - if !ok { - return nil, fmt.Errorf("internal error; unexpected type %T", node) + // If the machine object has no providerID--maybe actuator + // does not set this value (e.g., OpenStack)--then first + // lookup the node using ProviderID. If that is successful + // then the machine can be found using the annotation (should + // it exist). + node, err := c.findNodeByProviderID(providerID) + if err != nil { + return nil, err } - - if machineName, found := node.Annotations[machineAnnotationKey]; found { - return c.findMachine(machineName) + if node == nil { + return nil, nil } - - return nil, nil + return c.findMachine(node.Annotations[machineAnnotationKey]) } -// findNodeByNodeName find the Node object keyed by node.Name. Returns +// findNodeByNodeName finds the Node object keyed by name.. Returns // nil if it cannot be found. A DeepCopy() of the object is returned // on success. func (c *machineController) findNodeByNodeName(name string) (*corev1.Node, error) { @@ -235,12 +256,16 @@ func newMachineController( nodeInformer := kubeInformerFactory.Core().V1().Nodes().Informer() nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{}) - indexerFuncs := cache.Indexers{ - nodeProviderIDIndex: indexNodeByNodeProviderID, + if err := machineInformer.Informer().GetIndexer().AddIndexers(cache.Indexers{ + machineProviderIDIndex: indexMachineByProviderID, + }); err != nil { + return nil, fmt.Errorf("cannot add machine indexer: %v", err) } - if err := nodeInformer.GetIndexer().AddIndexers(indexerFuncs); err != nil { - return nil, fmt.Errorf("cannot add indexers: %v", err) + if err := nodeInformer.GetIndexer().AddIndexers(cache.Indexers{ + nodeProviderIDIndex: indexNodeByProviderID, + }); err != nil { + return nil, fmt.Errorf("cannot add node indexer: %v", err) } return &machineController{ @@ -263,6 +288,18 @@ func (c *machineController) machineSetNodeNames(machineSet *v1alpha1.MachineSet) var nodes []string for _, machine := range machines { + if machine.Spec.ProviderID != nil && *machine.Spec.ProviderID != "" { + // Prefer machine<=>node mapping using ProviderID + node, err := c.findNodeByProviderID(*machine.Spec.ProviderID) + if err != nil { + return nil, err + } + if node != nil { + nodes = append(nodes, node.Spec.ProviderID) + continue + } + } + if machine.Status.NodeRef == nil { glog.V(4).Infof("Status.NodeRef of machine %q is currently nil", machine.Name) continue @@ -362,7 +399,7 @@ func (c *machineController) nodeGroups() ([]*nodegroup, error) { } func (c *machineController) nodeGroupForNode(node *corev1.Node) (*nodegroup, error) { - machine, err := c.findMachineByNodeProviderID(node) + machine, err := c.findMachineByProviderID(node.Spec.ProviderID) if err != nil { return nil, err } @@ -415,3 +452,27 @@ func (c *machineController) nodeGroupForNode(node *corev1.Node) (*nodegroup, err glog.V(4).Infof("node %q is in nodegroup %q", node.Name, machineSet.Name) return nodegroup, nil } + +// findNodeByProviderID find the Node object keyed by provideID. +// Returns nil if it cannot be found. A DeepCopy() of the object is +// returned on success. +func (c *machineController) findNodeByProviderID(providerID string) (*corev1.Node, error) { + objs, err := c.nodeInformer.GetIndexer().ByIndex(nodeProviderIDIndex, providerID) + if err != nil { + return nil, err + } + + switch n := len(objs); { + case n == 0: + return nil, nil + case n > 1: + return nil, fmt.Errorf("internal error; expected len==1, got %v", n) + } + + node, ok := objs[0].(*corev1.Node) + if !ok { + return nil, fmt.Errorf("internal error; unexpected type %T", node) + } + + return node.DeepCopy(), nil +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go index 75e29bef6ad..fa6d9189ada 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go @@ -30,6 +30,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" fakekube "k8s.io/client-go/kubernetes/fake" + "k8s.io/utils/pointer" "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1" fakeclusterapi "k8s.io/autoscaler/cluster-autoscaler/client/clusterapi/clientset/versioned/fake" ) @@ -222,6 +223,9 @@ func makeLinkedNodeAndMachine(i int, namespace string, owner v1.OwnerReference) UID: owner.UID, }}, }, + Spec: v1alpha1.MachineSpec{ + ProviderID: pointer.StringPtr(fmt.Sprintf("%s-%s-nodeid-%d", namespace, owner.Name, i)), + }, Status: v1alpha1.MachineStatus{ NodeRef: &apiv1.ObjectReference{ Kind: node.Kind, @@ -396,7 +400,7 @@ func TestControllerFindMachineOwner(t *testing.T) { } } -func TestControllerFindMachineByNodeProviderID(t *testing.T) { +func TestControllerFindMachineByProviderID(t *testing.T) { testConfig := createMachineSetTestConfig(testNamespace, 1, 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", @@ -405,9 +409,18 @@ func TestControllerFindMachineByNodeProviderID(t *testing.T) { controller, stop := mustCreateTestController(t, testConfig) defer stop() - // Test #1: Verify node can be found because it has a - // ProviderID value and a machine annotation. - machine, err := controller.findMachineByNodeProviderID(testConfig.nodes[0]) + // Remove all the "machine" annotation values on all the + // nodes. We want to force findMachineByProviderID() to only + // be successful by searching on provider ID. + for _, node := range testConfig.nodes { + delete(node.Annotations, machineAnnotationKey) + if err := controller.nodeInformer.GetStore().Update(node); err != nil { + t.Fatalf("unexpected error updating node, got %v", err) + } + } + + // Test #1: Verify underlying machine provider ID matches + machine, err := controller.findMachineByProviderID(testConfig.nodes[0].Spec.ProviderID) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -418,29 +431,18 @@ func TestControllerFindMachineByNodeProviderID(t *testing.T) { t.Fatalf("expected machines to be equal - expected %+v, got %+v", testConfig.machines[0], machine) } - // Test #2: Verify node is not found if it has a non-existent ProviderID - node := testConfig.nodes[0].DeepCopy() - node.Spec.ProviderID = "" - nonExistentMachine, err := controller.findMachineByNodeProviderID(node) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if nonExistentMachine != nil { - t.Fatal("expected find to fail") - } - - // Test #3: Verify node is not found if the stored object has - // no "machine" annotation - node = testConfig.nodes[0].DeepCopy() - delete(node.Annotations, machineAnnotationKey) - if err := controller.nodeInformer.GetStore().Update(node); err != nil { - t.Fatalf("unexpected error updating node, got %v", err) + // Test #2: Verify machine is not found if it has a + // non-existent or different provider ID. + machine = testConfig.machines[0].DeepCopy() + machine.Spec.ProviderID = pointer.StringPtr("does-not-match") + if err := controller.machineInformer.Informer().GetStore().Update(machine); err != nil { + t.Fatalf("unexpected error updating machine, got %v", err) } - nonExistentMachine, err = controller.findMachineByNodeProviderID(testConfig.nodes[0]) + machine, err = controller.findMachineByProviderID(testConfig.nodes[0].Spec.ProviderID) if err != nil { t.Fatalf("unexpected error: %v", err) } - if nonExistentMachine != nil { + if machine != nil { t.Fatal("expected find to fail") } } @@ -791,3 +793,191 @@ func TestControllerNodeGroupsNodeCount(t *testing.T) { } }) } + +func TestControllerFindMachineFromNodeAnnotation(t *testing.T) { + testConfig := createMachineSetTestConfig(testNamespace, 1, 1, map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }) + + controller, stop := mustCreateTestController(t, testConfig) + defer stop() + + // Remove all the provider ID values on all the machines. We + // want to force findMachineByProviderID() to fallback to + // searching using the annotation on the node object. + for _, machine := range testConfig.machines { + machine.Spec.ProviderID = nil + if err := controller.machineInformer.Informer().GetStore().Update(machine); err != nil { + t.Fatalf("unexpected error updating machine, got %v", err) + } + } + + // Test #1: Verify machine can be found from node annotation + machine, err := controller.findMachineByProviderID(testConfig.nodes[0].Spec.ProviderID) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if machine == nil { + t.Fatal("expected to find machine") + } + if !reflect.DeepEqual(machine, testConfig.machines[0]) { + t.Fatalf("expected machines to be equal - expected %+v, got %+v", testConfig.machines[0], machine) + } + + // Test #2: Verify machine is not found if it has no + // corresponding machine annotation. + node := testConfig.nodes[0].DeepCopy() + delete(node.Annotations, machineAnnotationKey) + if err := controller.nodeInformer.GetStore().Update(node); err != nil { + t.Fatalf("unexpected error updating node, got %v", err) + } + machine, err = controller.findMachineByProviderID(testConfig.nodes[0].Spec.ProviderID) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if machine != nil { + t.Fatal("expected find to fail") + } +} + +func TestControllerMachineSetNodeNamesWithoutLinkage(t *testing.T) { + testConfig := createMachineSetTestConfig(testNamespace, 1, 3, map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }) + + controller, stop := mustCreateTestController(t, testConfig) + defer stop() + + // Remove all linkage between node and machine. + for _, machine := range testConfig.machines { + machine.Spec.ProviderID = nil + if err := controller.machineInformer.Informer().GetStore().Update(machine); err != nil { + t.Fatalf("unexpected error updating machine, got %v", err) + } + } + for _, machine := range testConfig.machines { + machine.Status.NodeRef = nil + if err := controller.machineInformer.Informer().GetStore().Update(machine); err != nil { + t.Fatalf("unexpected error updating machine, got %v", err) + } + } + + nodegroups, err := controller.nodeGroups() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if l := len(nodegroups); l != 1 { + t.Fatalf("expected 1 nodegroup, got %d", l) + } + + ng := nodegroups[0] + nodeNames, err := ng.Nodes() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // We removed all linkage - so we should get 0 nodes back. + if len(nodeNames) != 0 { + t.Fatalf("expected len=0, got len=%v", len(nodeNames)) + } +} + +func TestControllerMachineSetNodeNamesUsingProviderID(t *testing.T) { + testConfig := createMachineSetTestConfig(testNamespace, 3, 3, map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }) + + controller, stop := mustCreateTestController(t, testConfig) + defer stop() + + // Remove Status.NodeRef.Name on all the machines. We want to + // force machineSetNodeNames() to only consider the provider + // ID for lookups. + for _, machine := range testConfig.machines { + machine.Status.NodeRef = nil + if err := controller.machineInformer.Informer().GetStore().Update(machine); err != nil { + t.Fatalf("unexpected error updating machine, got %v", err) + } + } + + nodegroups, err := controller.nodeGroups() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if l := len(nodegroups); l != 1 { + t.Fatalf("expected 1 nodegroup, got %d", l) + } + + ng := nodegroups[0] + nodeNames, err := ng.Nodes() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(nodeNames) != len(testConfig.nodes) { + t.Fatalf("expected len=%v, got len=%v", len(testConfig.nodes), len(nodeNames)) + } + + sort.Slice(nodeNames, func(i, j int) bool { + return nodeNames[i].Id < nodeNames[j].Id + }) + + for i := range testConfig.nodes { + if nodeNames[i].Id != testConfig.nodes[i].Spec.ProviderID { + t.Fatalf("expected %q, got %q", testConfig.nodes[i].Spec.ProviderID, nodeNames[i].Id) + } + } +} + +func TestControllerMachineSetNodeNamesUsingStatusNodeRefName(t *testing.T) { + testConfig := createMachineSetTestConfig(testNamespace, 3, 3, map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }) + + controller, stop := mustCreateTestController(t, testConfig) + defer stop() + + // Remove all the provider ID values on all the machines. We + // want to force machineSetNodeNames() to fallback to + // searching using Status.NodeRef.Name. + for _, machine := range testConfig.machines { + machine.Spec.ProviderID = nil + if err := controller.machineInformer.Informer().GetStore().Update(machine); err != nil { + t.Fatalf("unexpected error updating machine, got %v", err) + } + } + + nodegroups, err := controller.nodeGroups() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if l := len(nodegroups); l != 1 { + t.Fatalf("expected 1 nodegroup, got %d", l) + } + + nodeNames, err := nodegroups[0].Nodes() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(nodeNames) != len(testConfig.nodes) { + t.Fatalf("expected len=%v, got len=%v", len(testConfig.nodes), len(nodeNames)) + } + + sort.Slice(nodeNames, func(i, j int) bool { + return nodeNames[i].Id < nodeNames[j].Id + }) + + for i := range testConfig.nodes { + if nodeNames[i].Id != testConfig.nodes[i].Spec.ProviderID { + t.Fatalf("expected %q, got %q", testConfig.nodes[i].Spec.ProviderID, nodeNames[i].Id) + } + } +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go index b512f5bbfa5..00c263361f2 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go @@ -100,7 +100,7 @@ func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node) error { return fmt.Errorf("node %q doesn't belong to node group %q", node.Spec.ProviderID, ng.Id()) } - machine, err := ng.machineController.findMachineByNodeProviderID(node) + machine, err := ng.machineController.findMachineByProviderID(node.Spec.ProviderID) if err != nil { return err }