Skip to content

Commit

Permalink
Merge pull request #4274 from kinvolk/imran/cloud-provider-packet-fix
Browse files Browse the repository at this point in the history
Cloud provider[Packet] fixes
  • Loading branch information
k8s-ci-robot authored and ipochi committed Aug 27, 2021
1 parent a4b1049 commit a53d048
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 4 deletions.
29 changes: 29 additions & 0 deletions cluster-autoscaler/cloudprovider/packet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,35 @@ affinity:
- t1.small.x86
```

## CCM and Controller node labels

### CCM
By default, autoscaler assumes that you have an older deprecated version of `packet-ccm` installed in your
cluster. If however, that is not the case and you've migrated to the new `cloud-provider-equinix-metal` CCM,
then this must be told to autoscaler. This can be done via setting an environment variable in the deployment:
```
env:
- name: INSTALLED_CCM
value: cloud-provider-equinix-metal
```
**NOTE**: As a prerequisite, ensure that all worker nodes in your cluster have the prefix `equinixmetal://` in
the Node spec `.spec.providerID`. If there are any existing worker nodes with prefix `packet://`, then drain
the node, remove the node and restart the kubelet on that worker node to re-register the node in the cluster,
this would ensure that `cloud-provider-equinix-metal` CCM sets the uuid with prefix `equinixmetal://` to the
field `.spec.ProviderID`.

### Controller node labels

Autoscaler assumes that control plane nodes in your cluster are identified by the label
`node-role.kubernetes.io/master`. If for some reason, this assumption is not true in your case, then set the
envirnment variable in the deployment:

```
env:
- name: PACKET_CONTROLLER_NODE_IDENTIFIER_LABEL
value: <label>
```

## Notes

The autoscaler will not remove nodes which have non-default kube-system pods.
Expand Down
13 changes: 12 additions & 1 deletion cluster-autoscaler/cloudprovider/packet/packet_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ const (
ProviderName = "packet"
// GPULabel is the label added to nodes with GPU resource.
GPULabel = "cloud.google.com/gke-accelerator"
// DefaultControllerNodeLabelKey is the label added to Master/Controller to identify as
// master/controller node.
DefaultControllerNodeLabelKey = "node-role.kubernetes.io/master"
// ControllerNodeIdentifierEnv is the string for the environment variable.
ControllerNodeIdentifierEnv = "PACKET_CONTROLLER_NODE_IDENTIFIER_LABEL"
)

var (
Expand Down Expand Up @@ -94,7 +99,13 @@ func (pcp *packetCloudProvider) AddNodeGroup(group packetNodeGroup) {
//
// Since only a single node group is currently supported, the first node group is always returned.
func (pcp *packetCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
if _, found := node.ObjectMeta.Labels["node-role.kubernetes.io/master"]; found {
controllerNodeLabel := os.Getenv(ControllerNodeIdentifierEnv)
if controllerNodeLabel == "" {
klog.V(3).Infof("env %s not set, using default: %s", ControllerNodeIdentifierEnv, DefaultControllerNodeLabelKey)
controllerNodeLabel = DefaultControllerNodeLabelKey
}

if _, found := node.ObjectMeta.Labels[controllerNodeLabel]; found {
return nil, nil
}
nodeGroupId, err := pcp.packetManager.NodeGroupForNode(node.ObjectMeta.Labels, node.Spec.ProviderID)
Expand Down
42 changes: 39 additions & 3 deletions cluster-autoscaler/cloudprovider/packet/packet_manager_rest.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ import (
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)

const (
packetPrefix = "packet://"
equinixMetalPrefix = "equinixmetal://"
)

type instanceType struct {
InstanceName string
CPU int64
Expand Down Expand Up @@ -275,7 +280,12 @@ func Contains(a []string, x string) bool {
// createPacketManagerRest sets up the client and returns
// an packetManagerRest.
func createPacketManagerRest(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (*packetManagerRest, error) {
var cfg ConfigFile
// Initialize ConfigFile instance
cfg := ConfigFile{
DefaultNodegroupdef: ConfigNodepool{},
Nodegroupdef: map[string]*ConfigNodepool{},
}

if configReader != nil {
if err := gcfg.ReadInto(&cfg, configReader); err != nil {
klog.Errorf("Couldn't read config: %v", err)
Expand Down Expand Up @@ -395,7 +405,11 @@ func (mgr *packetManagerRest) NodeGroupForNode(labels map[string]string, nodeId
if nodegroup, ok := labels["pool"]; ok {
return nodegroup, nil
}
device, err := mgr.getPacketDevice(strings.TrimPrefix(nodeId, "packet://"))

trimmedNodeId := strings.TrimPrefix(nodeId, packetPrefix)
trimmedNodeId = strings.TrimPrefix(trimmedNodeId, equinixMetalPrefix)

device, err := mgr.getPacketDevice(trimmedNodeId)
if err != nil {
return "", fmt.Errorf("Could not find group for node: %s %s", nodeId, err)
}
Expand Down Expand Up @@ -540,9 +554,31 @@ func (mgr *packetManagerRest) getNodes(nodegroup string) ([]string, error) {
// Get node ProviderIDs by getting device IDs from Packet
devices, err := mgr.listPacketDevices()
nodes := []string{}

// This bit of code along with the switch statement, checks if the CCM installed on the cluster is
// `packet-ccm` or `cloud-provider-equinix-metal`. The reason its important to check because depending
// on the CCM installed, the prefix in providerID of K8s Node spec differs from either `packet://` or
// `equinixmetal://`. This is now needed as `packet-ccm` is now deprecated and renamed in favor of
// `cloud-provider-equinix-metal`.
// This code checks if the INSTALLED_CCM env var is set or not. If set to `cloud-provider-equinix-metal`,
// the prefix is set to `equinixmetal://` and any other case the prefix is `packet://`.
// At a later point in time, there would be a need to make `equinixmetal://` prefix as the default or do away
// with `packet://` prefix entirely. This should happen presumably when the packet code in autoscaler is
// renamed from packet to equinixmetal.
prefix := packetPrefix

switch installedCCM := os.Getenv("INSTALLED_CCM"); installedCCM {
case "packet-ccm":
prefix = packetPrefix
case "cloud-provider-equinix-metal":
prefix = equinixMetalPrefix
default:
klog.V(3).Info("Unrecognized value: expected INSTALLED_CCM to be either `packet-ccm` or `cloud-provider-equinix-metal`, using default: `packet-ccm`")
}

for _, d := range devices.Devices {
if Contains(d.Tags, "k8s-cluster-"+mgr.getNodePoolDefinition(nodegroup).clusterName) && Contains(d.Tags, "k8s-nodepool-"+nodegroup) {
nodes = append(nodes, fmt.Sprintf("packet://%s", d.ID))
nodes = append(nodes, fmt.Sprintf("%s%s", prefix, d.ID))
}
}
return nodes, err
Expand Down

0 comments on commit a53d048

Please sign in to comment.