diff --git a/cloud/const.go b/cloud/const.go index 8234ee9b7..278f7e217 100644 --- a/cloud/const.go +++ b/cloud/const.go @@ -22,4 +22,7 @@ const ( // See https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ // for annotation formatting rules. CustomDataHashAnnotation = "sigs.k8s.io/cluster-api-provider-gcp-mig-custom-data-hash" + + // ClusterAPIImagePrefix is the prefix for the image name used by the Cluster API provider for GCP. + ClusterAPIImagePrefix = "capi-ubuntu-1804-k8s-" ) diff --git a/cloud/gcperrors/errors.go b/cloud/gcperrors/errors.go index 77a480051..807608e0b 100644 --- a/cloud/gcperrors/errors.go +++ b/cloud/gcperrors/errors.go @@ -19,6 +19,7 @@ package gcperrors import ( "net/http" + "strings" "google.golang.org/api/googleapi" ) @@ -43,3 +44,32 @@ func IgnoreNotFound(err error) error { return err } + +// IsAlreadyDeleted reports whether err is a Google API error indicating that the resource is already being deleted. +func IsAlreadyDeleted(err error) bool { + if err == nil { + return false + } + ae, _ := err.(*googleapi.Error) + + return strings.Contains(ae.Errors[0].Message, "Instance is already being deleted.") +} + +// IsMemberNotFound reports whether err is a Google API error indicating that the member is not found. +func IsMemberNotFound(err error) bool { + if err == nil { + return false + } + ae, _ := err.(*googleapi.Error) + + return strings.Contains(ae.Errors[0].Message, "is not a member of") +} + +// PrintGCPError returns the error message from a Google API error. +func PrintGCPError(err error) string { + if err == nil { + return "" + } + ae, _ := err.(*googleapi.Error) + return ae.Message + " " + ae.Errors[0].Message + " " + ae.Errors[0].Reason +} diff --git a/cloud/scope/machine.go b/cloud/scope/machine.go index 15b0dc141..3e9691e0c 100644 --- a/cloud/scope/machine.go +++ b/cloud/scope/machine.go @@ -222,7 +222,7 @@ func (m *MachineScope) InstanceImageSpec() *compute.AttachedDisk { if m.Machine.Spec.Version != nil { version = *m.Machine.Spec.Version } - image := "capi-ubuntu-1804-k8s-" + strings.ReplaceAll(semver.MajorMinor(version), ".", "-") + image := cloud.ClusterAPIImagePrefix + strings.ReplaceAll(semver.MajorMinor(version), ".", "-") sourceImage := path.Join("projects", m.ClusterGetter.Project(), "global", "images", "family", image) if m.GCPMachine.Spec.Image != nil { sourceImage = *m.GCPMachine.Spec.Image diff --git a/cloud/scope/machinepool.go b/cloud/scope/machinepool.go index 69fca7bbf..57932c745 100644 --- a/cloud/scope/machinepool.go +++ b/cloud/scope/machinepool.go @@ -24,24 +24,29 @@ import ( "os" "path" "sort" + "strconv" "strings" "github.com/pkg/errors" "golang.org/x/mod/semver" "google.golang.org/api/compute/v1" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" "sigs.k8s.io/cluster-api-provider-gcp/cloud" + machinepool "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope/strategies/machinepool_deployments" infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" capierrors "sigs.k8s.io/cluster-api/errors" clusterv1exp "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/labels/format" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -58,10 +63,11 @@ type ( Client client.Client PatchHelper *patch.Helper CapiMachinePoolPatchHelper *patch.Helper - - ClusterGetter cloud.ClusterGetter - MachinePool *clusterv1exp.MachinePool - GCPMachinePool *infrav1exp.GCPMachinePool + ClusterGetter cloud.ClusterGetter + MachinePool *clusterv1exp.MachinePool + GCPMachinePool *infrav1exp.GCPMachinePool + migState *compute.InstanceGroupManager + migInstances []*compute.ManagedInstance } ) @@ -97,6 +103,16 @@ func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, erro }, nil } +// SetMIGState updates the machine pool scope with the current state of the MIG. +func (m *MachinePoolScope) SetMIGState(migState *compute.InstanceGroupManager) { + m.migState = migState +} + +// SetMIGInstances updates the machine pool scope with the current state of the MIG instances. +func (m *MachinePoolScope) SetMIGInstances(migInstances []*compute.ManagedInstance) { + m.migInstances = migInstances +} + // SetReady sets the GCPMachinePool Ready Status to true. func (m *MachinePoolScope) SetReady() { m.GCPMachinePool.Status.Ready = true @@ -133,8 +149,19 @@ func (m *MachinePoolScope) PatchObject(ctx context.Context) error { // Close closes the current scope persisting the cluster configuration and status. func (m *MachinePoolScope) Close(ctx context.Context) error { + if m.migState != nil && m.migInstances != nil { + if err := m.applyGCPMachinePoolMachines(ctx); err != nil { + return errors.Wrap(err, "failed to apply GCPMachinePoolMachines") + } + + m.setProvisioningStateAndConditions() + if err := m.updateReplicasAndProviderIDs(ctx); err != nil { + return errors.Wrap(err, "failed to update replicas and providerIDs") + } + } + if err := m.PatchObject(ctx); err != nil { - return err + return errors.Wrap(err, "failed to patch GCPMachinePool") } if err := m.PatchCAPIMachinePoolObject(ctx); err != nil { return errors.Wrap(err, "unable to patch CAPI MachinePool") @@ -143,6 +170,204 @@ func (m *MachinePoolScope) Close(ctx context.Context) error { return nil } +// updateReplicasAndProviderIDs updates the GCPMachinePool replicas and providerIDs. +func (m *MachinePoolScope) updateReplicasAndProviderIDs(ctx context.Context) error { + machines, err := m.GetMachinePoolMachines(ctx) + if err != nil { + return errors.Wrap(err, "failed to get machine pool machines") + } + + var readyReplicas int32 + providerIDs := make([]string, len(machines)) + for i, machine := range machines { + if machine.Status.Ready { + readyReplicas++ + } + providerIDs[i] = machine.Spec.ProviderID + } + + m.GCPMachinePool.Status.Replicas = readyReplicas + m.GCPMachinePool.Spec.ProviderIDList = providerIDs + m.MachinePool.Spec.ProviderIDList = providerIDs + m.MachinePool.Status.Replicas = readyReplicas + return nil +} + +// setProvisioningStateAndConditions sets the GCPMachinePool provisioning state and conditions. +func (m *MachinePoolScope) setProvisioningStateAndConditions() { + switch { + case *m.MachinePool.Spec.Replicas == m.GCPMachinePool.Status.Replicas: + // MIG is provisioned with enough ready replicas + m.SetReady() + conditions.MarkTrue(m.ConditionSetter(), infrav1exp.GCPMachinePoolReadyCondition) + conditions.MarkFalse(m.ConditionSetter(), infrav1exp.GCPMachinePoolCreatingCondition, infrav1exp.GCPMachinePoolUpdatedReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(m.ConditionSetter(), infrav1exp.GCPMachinePoolUpdatingCondition, infrav1exp.GCPMachinePoolUpdatedReason, clusterv1.ConditionSeverityInfo, "") + case *m.MachinePool.Spec.Replicas != m.GCPMachinePool.Status.Replicas: + // MIG is still provisioning + m.SetNotReady() + conditions.MarkFalse(m.ConditionSetter(), infrav1exp.GCPMachinePoolReadyCondition, infrav1exp.GCPMachinePoolCreatingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkTrue(m.ConditionSetter(), infrav1exp.GCPMachinePoolUpdatingCondition) + default: + m.SetNotReady() + conditions.MarkFalse(m.ConditionSetter(), infrav1exp.GCPMachinePoolReadyCondition, infrav1exp.GCPMachinePoolCreatingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkTrue(m.ConditionSetter(), infrav1exp.GCPMachinePoolUpdatingCondition) + } +} + +func (m *MachinePoolScope) applyGCPMachinePoolMachines(ctx context.Context) error { + log := log.FromContext(ctx) + + if m.migState == nil { + return nil + } + + gmpms, err := m.GetMachinePoolMachines(ctx) + if err != nil { + return err + } + + existingMachinesByProviderID := make(map[string]infrav1exp.GCPMachinePoolMachine, len(gmpms)) + for _, machine := range gmpms { + existingMachinesByProviderID[machine.Spec.ProviderID] = machine + } + + gcpMachinesByProviderID := m.InstancesByProviderID() + for key, val := range gcpMachinesByProviderID { + if _, ok := existingMachinesByProviderID[key]; !ok { + log.Info("Creating GCPMachinePoolMachine", "machine", val.Name, "providerID", key) + if err := m.createMachine(ctx, val); err != nil { + return errors.Wrap(err, "failed creating GCPMachinePoolMachine") + } + continue + } + } + + deleted := false + // delete machines that no longer exist in GCP + for key, machine := range existingMachinesByProviderID { + machine := machine + if _, ok := gcpMachinesByProviderID[key]; !ok { + deleted = true + log.V(4).Info("deleting GCPMachinePoolMachine because it no longer exists in the MIG", "providerID", key) + delete(existingMachinesByProviderID, key) + if err := m.Client.Delete(ctx, &machine); err != nil { + return errors.Wrap(err, "failed deleting GCPMachinePoolMachine no longer existing in GCP") + } + } + } + + if deleted { + log.Info("GCPMachinePoolMachines deleted, requeueing") + return nil + } + + // when replicas are externally managed, we do not want to scale down manually since that is handled by the external scaler. + if m.HasReplicasExternallyManaged(ctx) { + log.Info("Replicas are externally managed, skipping scaling down") + return nil + } + + deleteSelector := m.getDeploymentStrategy() + if deleteSelector == nil { + log.V(4).Info("can not select GCPMachinePoolMachines to delete because no deployment strategy is specified") + return nil + } + + // select machines to delete to lower the replica count + toDelete, err := deleteSelector.SelectMachinesToDelete(ctx, m.DesiredReplicas(), existingMachinesByProviderID) + if err != nil { + return errors.Wrap(err, "failed selecting GCPMachinePoolMachines to delete") + } + + for _, machine := range toDelete { + machine := machine + log.Info("deleting selected GCPMachinePoolMachine", "providerID", machine.Spec.ProviderID) + if err := m.Client.Delete(ctx, &machine); err != nil { + return errors.Wrap(err, "failed deleting GCPMachinePoolMachine to reduce replica count") + } + } + return nil +} + +func (m *MachinePoolScope) createMachine(ctx context.Context, managedInstance compute.ManagedInstance) error { + gmpm := infrav1exp.GCPMachinePoolMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: managedInstance.Name, + Namespace: m.GCPMachinePool.Namespace, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: infrav1exp.GroupVersion.String(), + Kind: "GCPMachinePool", + Name: m.GCPMachinePool.Name, + BlockOwnerDeletion: ptr.To(true), + UID: m.GCPMachinePool.UID, + }, + }, + Labels: map[string]string{ + m.ClusterGetter.Name(): string(infrav1.ResourceLifecycleOwned), + clusterv1.ClusterNameLabel: m.ClusterGetter.Name(), + infrav1exp.MachinePoolNameLabel: m.GCPMachinePool.Name, + clusterv1.MachinePoolNameLabel: format.MustFormatValue(m.MachinePool.Name), + }, + }, + Spec: infrav1exp.GCPMachinePoolMachineSpec{ + ProviderID: m.ProviderIDInstance(&managedInstance), + InstanceID: strconv.FormatUint(managedInstance.Id, 10), + }, + } + + controllerutil.AddFinalizer(&gmpm, infrav1exp.GCPMachinePoolMachineFinalizer) + if err := m.Client.Create(ctx, &gmpm); err != nil { + return errors.Wrapf(err, "failed creating GCPMachinePoolMachine %s in GCPMachinePool %s", managedInstance.Name, m.GCPMachinePool.Name) + } + + return nil +} + +func (m *MachinePoolScope) getDeploymentStrategy() machinepool.TypedDeleteSelector { + if m.GCPMachinePool == nil { + return nil + } + + return machinepool.NewMachinePoolDeploymentStrategy(m.GCPMachinePool.Spec.Strategy) +} + +// GetMachinePoolMachines returns the list of GCPMachinePoolMachines associated with this GCPMachinePool. +func (m *MachinePoolScope) GetMachinePoolMachines(ctx context.Context) ([]infrav1exp.GCPMachinePoolMachine, error) { + labels := m.getMachinePoolMachineLabels() + gmpml := &infrav1exp.GCPMachinePoolMachineList{} + if err := m.Client.List(ctx, gmpml, client.InNamespace(m.GCPMachinePool.Namespace), client.MatchingLabels(labels)); err != nil { + return nil, errors.Wrap(err, "failed to list GCPMachinePoolMachines") + } + + return gmpml.Items, nil +} + +// DesiredReplicas returns the replica count on machine pool or 0 if machine pool replicas is nil. +func (m MachinePoolScope) DesiredReplicas() int32 { + return ptr.Deref(m.MachinePool.Spec.Replicas, 0) +} + +// InstancesByProviderID returns a map of GCPMachinePoolMachine instances by providerID. +func (m *MachinePoolScope) InstancesByProviderID() map[string]compute.ManagedInstance { + instances := make(map[string]compute.ManagedInstance, len(m.migInstances)) + for _, instance := range m.migInstances { + if instance.InstanceStatus == "RUNNING" && instance.CurrentAction == "NONE" || instance.InstanceStatus == "PROVISIONING" { + instances[m.ProviderIDInstance(instance)] = *instance + } + } + return instances +} + +func (m *MachinePoolScope) getMachinePoolMachineLabels() map[string]string { + return map[string]string{ + clusterv1.ClusterNameLabel: m.ClusterGetter.Name(), + infrav1exp.MachinePoolNameLabel: m.GCPMachinePool.Name, + clusterv1.MachinePoolNameLabel: format.MustFormatValue(m.MachinePool.Name), + m.ClusterGetter.Name(): string(infrav1.ResourceLifecycleOwned), + } +} + // InstanceGroupTemplateBuilder returns a GCP instance template. func (m *MachinePoolScope) InstanceGroupTemplateBuilder(bootstrapData string) *compute.InstanceTemplate { instanceTemplate := &compute.InstanceTemplate{ @@ -224,7 +449,7 @@ func (m *MachinePoolScope) InstanceAdditionalDiskSpec() []*compute.AttachedDisk AutoDelete: true, InitializeParams: &compute.AttachedDiskInitializeParams{ DiskSizeGb: ptr.Deref(disk.Size, 30), - DiskType: string(*disk.DeviceType), + DiskType: *disk.DeviceType, }, } if strings.HasSuffix(additionalDisk.InitializeParams.DiskType, string(infrav1.LocalSsdDiskType)) { @@ -248,7 +473,7 @@ func (m *MachinePoolScope) InstanceImageSpec() *compute.AttachedDisk { if m.MachinePool.Spec.Template.Spec.Version != nil { version = *m.MachinePool.Spec.Template.Spec.Version } - image := "capi-ubuntu-1804-k8s-" + strings.ReplaceAll(semver.MajorMinor(version), ".", "-") + image := cloud.ClusterAPIImagePrefix + strings.ReplaceAll(semver.MajorMinor(version), ".", "-") sourceImage := path.Join("projects", m.ClusterGetter.Project(), "global", "images", "family", image) if m.GCPMachinePool.Spec.Image != nil { sourceImage = *m.GCPMachinePool.Spec.Image @@ -317,7 +542,6 @@ func (m *MachinePoolScope) InstanceGroupBuilder(instanceTemplateName string) *co Name: m.GCPMachinePool.Name, BaseInstanceName: m.GCPMachinePool.Name, InstanceTemplate: path.Join("projects", m.ClusterGetter.Project(), "global", "instanceTemplates", instanceTemplateName), - TargetSize: int64(*m.MachinePool.Spec.Replicas), } } @@ -372,6 +596,18 @@ func (m *MachinePoolScope) GetInstanceTemplateHash(instance *compute.InstanceTem return fmt.Sprintf("%08x", shortHash), nil } +// NeedsRequeue returns true if the machine pool needs to be requeued. +func (m *MachinePoolScope) NeedsRequeue() bool { + numberOfRunningInstances := 0 + for _, instance := range m.migInstances { + if instance.InstanceStatus == "RUNNING" { + numberOfRunningInstances++ + } + } + + return numberOfRunningInstances != int(m.DesiredReplicas()) +} + // SetAnnotation sets a key value annotation on the GCPMachinePool. func (m *MachinePoolScope) SetAnnotation(key, value string) { if m.GCPMachinePool.Annotations == nil { @@ -380,16 +616,21 @@ func (m *MachinePoolScope) SetAnnotation(key, value string) { m.GCPMachinePool.Annotations[key] = value } -// Namespace returns the GCPMachine namespace. +// Namespace returns the GCPMachinePool namespace. func (m *MachinePoolScope) Namespace() string { return m.MachinePool.Namespace } -// Name returns the GCPMachine name. +// Name returns the GCPMachinePool name. func (m *MachinePoolScope) Name() string { return m.GCPMachinePool.Name } +// ProviderIDInstance returns the GCPMachinePool providerID for a managed instance. +func (m *MachinePoolScope) ProviderIDInstance(managedInstance *compute.ManagedInstance) string { + return fmt.Sprintf("gce://%s/%s/%s", m.Project(), m.GCPMachinePool.Spec.Zone, managedInstance.Name) +} + // HasReplicasExternallyManaged returns true if the machine pool has replicas externally managed. func (m *MachinePoolScope) HasReplicasExternallyManaged(_ context.Context) bool { return annotations.ReplicasManagedByExternalAutoscaler(m.MachinePool) @@ -408,7 +649,7 @@ func (m *MachinePoolScope) UpdateCAPIMachinePoolReplicas(_ context.Context, repl m.MachinePool.Spec.Replicas = replicas } -// ReconcileReplicas ensures MachinePool replicas match MIG capacity if replicas are externally managed by an autoscaler. +// ReconcileReplicas ensures MachinePool replicas match MIG capacity unless replicas are externally managed by an autoscaler. func (m *MachinePoolScope) ReconcileReplicas(ctx context.Context, mig *compute.InstanceGroupManager) error { log := log.FromContext(ctx) diff --git a/cloud/scope/machinepoolmachine.go b/cloud/scope/machinepoolmachine.go new file mode 100644 index 000000000..ec2e264ef --- /dev/null +++ b/cloud/scope/machinepoolmachine.go @@ -0,0 +1,476 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scope + +import ( + "context" + "fmt" + "net/url" + "os" + "path" + "strings" + "time" + + "github.com/pkg/errors" + "golang.org/x/mod/semver" + "google.golang.org/api/compute/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/kubernetes" + kubedrain "k8s.io/kubectl/pkg/drain" + "k8s.io/utils/ptr" + "sigs.k8s.io/cluster-api-provider-gcp/cloud" + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/noderefutil" + "sigs.k8s.io/cluster-api/controllers/remote" + capierrors "sigs.k8s.io/cluster-api/errors" + clusterv1exp "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +const ( + // MachinePoolMachineScopeName is the sourceName, or more specifically the UserAgent, of client used in cordon and drain. + MachinePoolMachineScopeName = "gcpmachinepoolmachine-scope" +) + +type ( + // MachinePoolMachineScopeParams defines the input parameters used to create a new MachinePoolScope. + MachinePoolMachineScopeParams struct { + Client client.Client + ClusterGetter cloud.ClusterGetter + MachinePool *clusterv1exp.MachinePool + GCPMachinePool *infrav1exp.GCPMachinePool + GCPMachinePoolMachine *infrav1exp.GCPMachinePoolMachine + } + // MachinePoolMachineScope defines a scope defined around a machine pool and its cluster. + MachinePoolMachineScope struct { + Client client.Client + PatchHelper *patch.Helper + CapiMachinePoolPatchHelper *patch.Helper + + ClusterGetter cloud.ClusterGetter + MachinePool *clusterv1exp.MachinePool + GCPMachinePool *infrav1exp.GCPMachinePool + GCPMachinePoolMachine *infrav1exp.GCPMachinePoolMachine + } +) + +// PatchObject persists the machine pool configuration and status. +func (m *MachinePoolMachineScope) PatchObject(ctx context.Context) error { + return m.PatchHelper.Patch(ctx, m.GCPMachinePoolMachine) +} + +// SetReady sets the GCPMachinePoolMachine Ready Status to true. +func (m *MachinePoolMachineScope) SetReady() { + m.GCPMachinePoolMachine.Status.Ready = true +} + +// SetNotReady sets the GCPMachinePoolMachine Ready Status to false. +func (m *MachinePoolMachineScope) SetNotReady() { + m.GCPMachinePoolMachine.Status.Ready = false +} + +// SetFailureMessage sets the GCPMachinePoolMachine status failure message. +func (m *MachinePoolMachineScope) SetFailureMessage(v error) { + m.GCPMachinePoolMachine.Status.FailureMessage = ptr.To(v.Error()) +} + +// SetFailureReason sets the GCPMachinePoolMachine status failure reason. +func (m *MachinePoolMachineScope) SetFailureReason(v capierrors.MachineStatusError) { + m.GCPMachinePoolMachine.Status.FailureReason = &v +} + +// Close closes the current scope persisting the cluster configuration and status. +func (m *MachinePoolMachineScope) Close(ctx context.Context) error { + if err := m.PatchObject(ctx); err != nil { + return err + } + return nil +} + +// NewMachinePoolMachineScope creates a new MachinePoolScope from the supplied parameters. +func NewMachinePoolMachineScope(params MachinePoolMachineScopeParams) (*MachinePoolMachineScope, error) { + if params.Client == nil { + return nil, errors.New("client is required when creating a MachinePoolScope") + } + if params.MachinePool == nil { + return nil, errors.New("machine pool is required when creating a MachinePoolScope") + } + if params.GCPMachinePool == nil { + return nil, errors.New("gcp machine pool is required when creating a MachinePoolScope") + } + if params.GCPMachinePoolMachine == nil { + return nil, errors.New("gcp machine pool machine is required when creating a MachinePoolScope") + } + + helper, err := patch.NewHelper(params.GCPMachinePoolMachine, params.Client) + if err != nil { + return nil, errors.Wrapf(err, "failed to init patch helper for %s %s/%s", params.GCPMachinePoolMachine.GroupVersionKind(), params.GCPMachinePoolMachine.Namespace, params.GCPMachinePoolMachine.Name) + } + + capiMachinePoolPatchHelper, err := patch.NewHelper(params.MachinePool, params.Client) + if err != nil { + return nil, errors.Wrapf(err, "failed to init patch helper for %s %s/%s", params.MachinePool.GroupVersionKind(), params.MachinePool.Namespace, params.MachinePool.Name) + } + + return &MachinePoolMachineScope{ + Client: params.Client, + ClusterGetter: params.ClusterGetter, + MachinePool: params.MachinePool, + GCPMachinePool: params.GCPMachinePool, + GCPMachinePoolMachine: params.GCPMachinePoolMachine, + PatchHelper: helper, + CapiMachinePoolPatchHelper: capiMachinePoolPatchHelper, + }, nil +} + +// UpdateNodeStatus updates the GCPMachinePoolMachine conditions and ready status. It will also update the node ref and the Kubernetes version. +func (m *MachinePoolMachineScope) UpdateNodeStatus(ctx context.Context) (bool, error) { + var node *corev1.Node + nodeRef := m.GCPMachinePoolMachine.Status.NodeRef + + // See if we can fetch a node using either the providerID or the nodeRef + node, found, err := m.GetNode(ctx) + switch { + case err != nil && apierrors.IsNotFound(err) && nodeRef != nil && nodeRef.Name != "": + // Node was not found due to 404 when finding by ObjectReference. + conditions.MarkFalse(m.GCPMachinePoolMachine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeNotFoundReason, clusterv1.ConditionSeverityError, "") + return false, nil + case err != nil: + // Failed due to an unexpected error + return false, err + case !found && m.ProviderID() == "": + // Node was not found due to not having a providerID set + conditions.MarkFalse(m.GCPMachinePoolMachine, clusterv1.MachineNodeHealthyCondition, clusterv1.WaitingForNodeRefReason, clusterv1.ConditionSeverityInfo, "") + return false, nil + case !found && m.ProviderID() != "": + // Node was not found due to not finding a matching node by providerID + conditions.MarkFalse(m.GCPMachinePoolMachine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeProvisioningReason, clusterv1.ConditionSeverityInfo, "") + return false, nil + default: + // Node was found. Check if it is ready. + nodeReady := noderefutil.IsNodeReady(node) + m.GCPMachinePoolMachine.Status.Ready = nodeReady + if nodeReady { + conditions.MarkTrue(m.GCPMachinePoolMachine, clusterv1.MachineNodeHealthyCondition) + } else { + conditions.MarkFalse(m.GCPMachinePoolMachine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeConditionsFailedReason, clusterv1.ConditionSeverityWarning, "") + } + + m.GCPMachinePoolMachine.Status.NodeRef = &corev1.ObjectReference{ + Kind: node.Kind, + Namespace: node.Namespace, + Name: node.Name, + UID: node.UID, + APIVersion: node.APIVersion, + } + + m.GCPMachinePoolMachine.Status.Version = node.Status.NodeInfo.KubeletVersion + } + + return true, nil +} + +// GetNode returns the node for the GCPMachinePoolMachine. If the node is not found, it returns false. +func (m *MachinePoolMachineScope) GetNode(ctx context.Context) (*corev1.Node, bool, error) { + var ( + nodeRef = m.GCPMachinePoolMachine.Status.NodeRef + node *corev1.Node + err error + ) + + if nodeRef == nil || nodeRef.Name == "" { + node, err = m.GetNodeByProviderID(ctx, m.ProviderID()) + if err != nil { + return nil, false, errors.Wrap(err, "failed to get node by providerID") + } + } else { + node, err = m.GetNodeByObjectReference(ctx, *nodeRef) + if err != nil { + return nil, false, errors.Wrap(err, "failed to get node by object reference") + } + } + + if node == nil { + return nil, false, nil + } + + return node, true, nil +} + +// GetNodeByObjectReference will fetch a *corev1.Node via a node object reference. +func (m *MachinePoolMachineScope) GetNodeByObjectReference(ctx context.Context, nodeRef corev1.ObjectReference) (*corev1.Node, error) { + var node corev1.Node + err := m.Client.Get(ctx, client.ObjectKey{ + Namespace: nodeRef.Namespace, + Name: nodeRef.Name, + }, &node) + + return &node, err +} + +// GetNodeByProviderID returns a node by its providerID. If the node is not found, it returns nil. +func (m *MachinePoolMachineScope) GetNodeByProviderID(ctx context.Context, providerID string) (*corev1.Node, error) { + nodeList := corev1.NodeList{} + for { + if err := m.Client.List(ctx, &nodeList, client.Continue(nodeList.Continue)); err != nil { + return nil, errors.Wrapf(err, "failed to List nodes") + } + + for _, node := range nodeList.Items { + if node.Spec.ProviderID == providerID { + return &node, nil + } + } + + if nodeList.Continue == "" { + break + } + } + + return nil, nil +} + +// GetGCPClientCredentials returns the GCP client credentials. +func (m *MachinePoolMachineScope) GetGCPClientCredentials() ([]byte, error) { + credsPath := os.Getenv(ConfigFileEnvVar) + if credsPath == "" { + return nil, fmt.Errorf("no ADC environment variable found for credentials (expect %s)", ConfigFileEnvVar) + } + + byteValue, err := os.ReadFile(credsPath) //nolint:gosec // We need to read a file here + if err != nil { + return nil, fmt.Errorf("reading credentials from file %s: %w", credsPath, err) + } + return byteValue, nil +} + +// Zone returns the zone for the GCPMachinePoolMachine. +func (m *MachinePoolMachineScope) Zone() string { + return m.GCPMachinePool.Spec.Zone +} + +// Project return the project for the GCPMachinePoolMachine cluster. +func (m *MachinePoolMachineScope) Project() string { + return m.ClusterGetter.Project() +} + +// Name returns the GCPMachinePoolMachine name. +func (m *MachinePoolMachineScope) Name() string { + return m.GCPMachinePoolMachine.GetName() +} + +// ProviderID returns the provider ID for the GCPMachinePoolMachine. +func (m *MachinePoolMachineScope) ProviderID() string { + return fmt.Sprintf("gce://%s/%s/%s", m.Project(), m.GCPMachinePool.Spec.Zone, m.Name()) +} + +// HasLatestModelApplied checks if the latest model is applied to the GCPMachinePoolMachine. +func (m *MachinePoolMachineScope) HasLatestModelApplied(_ context.Context, instance *compute.Disk) (bool, error) { + image := "" + + if m.GCPMachinePool.Spec.Image == nil { + version := "" + if m.MachinePool.Spec.Template.Spec.Version != nil { + version = *m.MachinePool.Spec.Template.Spec.Version + } + image = cloud.ClusterAPIImagePrefix + strings.ReplaceAll(semver.MajorMinor(version), ".", "-") + } else { + image = *m.GCPMachinePool.Spec.Image + } + + // Get the image from the disk URL path to compare with the latest image name + diskImage, err := url.Parse(instance.SourceImage) + if err != nil { + return false, err + } + instanceImage := path.Base(diskImage.Path) + + // Check if the image is the latest + if image == instanceImage { + return true, nil + } + + return false, nil +} + +// CordonAndDrainNode cordon and drain the node for the GCPMachinePoolMachine. +func (m *MachinePoolMachineScope) CordonAndDrainNode(ctx context.Context) error { + log := log.FromContext(ctx) + + // See if we can fetch a node using either the providerID or the nodeRef + node, found, err := m.GetNode(ctx) + if err != nil { + if apierrors.IsNotFound(err) { + return nil + } + // failed due to an unexpected error + return errors.Wrap(err, "failed to get node") + } else if !found { + // node was not found due to not finding a nodes with the ProviderID + return nil + } + + // Drain node before deletion and issue a patch in order to make this operation visible to the users. + if m.isNodeDrainAllowed() { + patchHelper, err := patch.NewHelper(m.GCPMachinePoolMachine, m.Client) + if err != nil { + return errors.Wrap(err, "failed to build a patchHelper when draining node") + } + + log.Info("Draining node before deletion", "node", node.Name) + // The DrainingSucceededCondition never exists before the node is drained for the first time, + // so its transition time can be used to record the first time draining. + // This `if` condition prevents the transition time to be changed more than once. + if conditions.Get(m.GCPMachinePoolMachine, clusterv1.DrainingSucceededCondition) == nil { + conditions.MarkFalse(m.GCPMachinePoolMachine, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion") + } + + if err := patchHelper.Patch(ctx, m.GCPMachinePoolMachine); err != nil { + return errors.Wrap(err, "failed to patch GCPMachinePoolMachine") + } + + if err := m.drainNode(ctx, node); err != nil { + // Check for condition existence. If the condition exists, it may have a different severity or message, which + // would cause the last transition time to be updated. The last transition time is used to determine how + // long to wait to timeout the node drain operation. If we were to keep updating the last transition time, + // a drain operation may never timeout. + if conditions.Get(m.GCPMachinePoolMachine, clusterv1.DrainingSucceededCondition) == nil { + conditions.MarkFalse(m.GCPMachinePoolMachine, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + } + return err + } + + conditions.MarkTrue(m.GCPMachinePoolMachine, clusterv1.DrainingSucceededCondition) + } + + return nil +} + +// isNodeDrainAllowed checks to see the node is excluded from draining or if the NodeDrainTimeout has expired. +func (m *MachinePoolMachineScope) isNodeDrainAllowed() bool { + if _, exists := m.GCPMachinePoolMachine.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; exists { + return false + } + + if m.nodeDrainTimeoutExceeded() { + return false + } + + return true +} + +// nodeDrainTimeoutExceeded will check to see if the GCPMachinePool's NodeDrainTimeout is exceeded for the +// GCPMachinePoolMachine. +func (m *MachinePoolMachineScope) nodeDrainTimeoutExceeded() bool { + // if the NodeDrainTineout type is not set by user + pool := m.GCPMachinePool + if pool == nil || pool.Spec.NodeDrainTimeout == nil || pool.Spec.NodeDrainTimeout.Seconds() <= 0 { + return false + } + + // if the draining succeeded condition does not exist + if conditions.Get(m.GCPMachinePoolMachine, clusterv1.DrainingSucceededCondition) == nil { + return false + } + + now := time.Now() + firstTimeDrain := conditions.GetLastTransitionTime(m.GCPMachinePoolMachine, clusterv1.DrainingSucceededCondition) + diff := now.Sub(firstTimeDrain.Time) + return diff.Seconds() >= m.GCPMachinePool.Spec.NodeDrainTimeout.Seconds() +} + +func (m *MachinePoolMachineScope) drainNode(ctx context.Context, node *corev1.Node) error { + log := log.FromContext(ctx) + + restConfig, err := remote.RESTConfig(ctx, MachinePoolMachineScopeName, m.Client, client.ObjectKey{ + Name: m.ClusterGetter.Name(), + Namespace: m.GCPMachinePoolMachine.Namespace, + }) + + if err != nil { + log.Error(err, "Error creating a remote client while deleting Machine, won't retry") + return nil + } + + kubeClient, err := kubernetes.NewForConfig(restConfig) + if err != nil { + log.Error(err, "Error creating a remote client while deleting Machine, won't retry") + return nil + } + + drainer := &kubedrain.Helper{ + Client: kubeClient, + Ctx: ctx, + Force: true, + IgnoreAllDaemonSets: true, + DeleteEmptyDirData: true, + GracePeriodSeconds: -1, + // If a pod is not evicted in 20 seconds, retry the eviction next time the + // machine gets reconciled again (to allow other machines to be reconciled). + Timeout: 20 * time.Second, + OnPodDeletedOrEvicted: func(pod *corev1.Pod, usingEviction bool) { + verbStr := "Deleted" + if usingEviction { + verbStr = "Evicted" + } + log.Info("Pod", verbStr, "from node", "pod", pod.Name, "node", node.Name) + }, + Out: &writerInfo{logFunc: log.Info}, + ErrOut: &writerError{logFunc: log.Error}, + } + + if noderefutil.IsNodeUnreachable(node) { + // When the node is unreachable and some pods are not evicted for as long as this timeout, we ignore them. + drainer.SkipWaitForDeleteTimeoutSeconds = 60 * 5 // 5 minutes + } + + if err := kubedrain.RunCordonOrUncordon(drainer, node, true); err != nil { + // Machine will be re-reconciled after a cordon failure. + return fmt.Errorf("cordoning failed, retry in 20s: %v", err) + } + + if err := kubedrain.RunNodeDrain(drainer, node.Name); err != nil { + // Machine will be re-reconciled after a drain failure. + return fmt.Errorf("draining failed, retry in 20s: %v", err) + } + + log.Info("Node drained successfully", "node", node.Name) + return nil +} + +type writerInfo struct { + logFunc func(msg string, keysAndValues ...any) +} + +func (w *writerInfo) Write(p []byte) (n int, err error) { + w.logFunc(string(p)) + return len(p), nil +} + +type writerError struct { + logFunc func(err error, msg string, keysAndValues ...any) +} + +func (w *writerError) Write(p []byte) (n int, err error) { + w.logFunc(errors.New(string(p)), "") + return len(p), nil +} diff --git a/cloud/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go b/cloud/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go new file mode 100644 index 000000000..2fb9747f3 --- /dev/null +++ b/cloud/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go @@ -0,0 +1,280 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package machinepool implements the machine pool deployment strategies for GCPMachinePool. +package machinepool + +import ( + "context" + "math/rand" + "sort" + "time" + + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/util/intstr" + + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + ctrl "sigs.k8s.io/controller-runtime" +) + +type ( + // Surger is the ability to surge a number of replica. + Surger interface { + Surge(desiredReplicaCount int) (int, error) + } + + // DeleteSelector is the ability to select nodes to be delete with respect to a desired number of replicas. + DeleteSelector interface { + SelectMachinesToDelete(ctx context.Context, desiredReplicas int32, machinesByProviderID map[string]infrav1exp.GCPMachinePoolMachine) ([]infrav1exp.GCPMachinePoolMachine, error) + } + + // TypedDeleteSelector is the ability to select nodes to be deleted with respect to a desired number of nodes, and + // the ability to describe the underlying type of the deployment strategy. + TypedDeleteSelector interface { + DeleteSelector + Type() infrav1exp.GCPMachinePoolDeploymentStrategyType + } + + rollingUpdateStrategy struct { + infrav1exp.MachineRollingUpdateDeployment + } +) + +// NewMachinePoolDeploymentStrategy constructs a strategy implementation described in the GCPMachinePoolDeploymentStrategy +// specification. +func NewMachinePoolDeploymentStrategy(strategy infrav1exp.GCPMachinePoolDeploymentStrategy) TypedDeleteSelector { + switch strategy.Type { + case infrav1exp.RollingUpdateGCPMachinePoolDeploymentStrategyType: + rollingUpdate := strategy.RollingUpdate + if rollingUpdate == nil { + rollingUpdate = &infrav1exp.MachineRollingUpdateDeployment{} + } + + return &rollingUpdateStrategy{ + MachineRollingUpdateDeployment: *rollingUpdate, + } + default: + // default to a rolling update strategy if unknown type + return &rollingUpdateStrategy{ + MachineRollingUpdateDeployment: infrav1exp.MachineRollingUpdateDeployment{}, + } + } +} + +// Type is the GCPMachinePoolDeploymentStrategyType for the strategy. +func (rollingUpdateStrategy *rollingUpdateStrategy) Type() infrav1exp.GCPMachinePoolDeploymentStrategyType { + return infrav1exp.RollingUpdateGCPMachinePoolDeploymentStrategyType +} + +// Surge calculates the number of replicas that can be added during an upgrade operation. +func (rollingUpdateStrategy *rollingUpdateStrategy) Surge(desiredReplicaCount int) (int, error) { + if rollingUpdateStrategy.MaxSurge == nil { + return 1, nil + } + + return intstr.GetScaledValueFromIntOrPercent(rollingUpdateStrategy.MaxSurge, desiredReplicaCount, true) +} + +// maxUnavailable calculates the maximum number of replicas which can be unavailable at any time. +func (rollingUpdateStrategy *rollingUpdateStrategy) maxUnavailable(desiredReplicaCount int) (int, error) { + if rollingUpdateStrategy.MaxUnavailable != nil { + val, err := intstr.GetScaledValueFromIntOrPercent(rollingUpdateStrategy.MaxUnavailable, desiredReplicaCount, false) + if err != nil { + return 0, errors.Wrap(err, "failed to get scaled value or int from maxUnavailable") + } + + return val, nil + } + + return 0, nil +} + +// SelectMachinesToDelete selects the machines to delete based on the machine state, desired replica count, and +// the DeletePolicy. +func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx context.Context, desiredReplicaCount int32, machinesByProviderID map[string]infrav1exp.GCPMachinePoolMachine) ([]infrav1exp.GCPMachinePoolMachine, error) { + maxUnavailable, err := rollingUpdateStrategy.maxUnavailable(int(desiredReplicaCount)) + if err != nil { + return nil, err + } + + var ( + order = func() func(machines []infrav1exp.GCPMachinePoolMachine) []infrav1exp.GCPMachinePoolMachine { + switch rollingUpdateStrategy.DeletePolicy { + case infrav1exp.OldestDeletePolicyType: + return orderByOldest + case infrav1exp.NewestDeletePolicyType: + return orderByNewest + default: + return orderRandom + } + }() + log = ctrl.LoggerFrom(ctx).V(4) + readyMachines = order(getReadyMachines(machinesByProviderID)) + machinesWithoutLatestModel = order(getMachinesWithoutLatestModel(machinesByProviderID)) + deletingMachines = order(getDeletingMachines(machinesByProviderID)) + overProvisionCount = len(readyMachines) - int(desiredReplicaCount) + disruptionBudget = func() int { + if maxUnavailable > int(desiredReplicaCount) { + return int(desiredReplicaCount) + } + + return len(readyMachines) - int(desiredReplicaCount) + maxUnavailable + }() + ) + + log.Info("selecting machines to delete", + "readyMachines", len(readyMachines), + "desiredReplicaCount", desiredReplicaCount, + "maxUnavailable", maxUnavailable, + "disruptionBudget", disruptionBudget, + "machinesWithoutTheLatestModel", len(machinesWithoutLatestModel), + "deletingMachines", len(deletingMachines), + ) + + // if we have failed or deleting machines, remove them + if len(deletingMachines) > 0 { + log.Info("failed or deleting machines", "desiredReplicaCount", desiredReplicaCount, "maxUnavailable", maxUnavailable, "deletingMachines", getProviderIDs(deletingMachines)) + return deletingMachines, nil + } + + // if we have not yet reached our desired count, don't try to delete anything + if len(readyMachines) < int(desiredReplicaCount) { + log.Info("not enough ready machines", "desiredReplicaCount", desiredReplicaCount, "readyMachinesCount", len(readyMachines), "machinesByProviderID", len(machinesByProviderID)) + return []infrav1exp.GCPMachinePoolMachine{}, nil + } + + // we have too many machines, let's choose the oldest to remove + if overProvisionCount > 0 { + var toDelete []infrav1exp.GCPMachinePoolMachine + log.Info("over-provisioned", "desiredReplicaCount", desiredReplicaCount, "overProvisionCount", overProvisionCount, "machinesWithoutLatestModel", getProviderIDs(machinesWithoutLatestModel)) + // we are over-provisioned try to remove old models + for _, v := range machinesWithoutLatestModel { + if len(toDelete) >= overProvisionCount { + return toDelete, nil + } + + toDelete = append(toDelete, v) + } + + log.Info("over-provisioned ready", "desiredReplicaCount", desiredReplicaCount, "overProvisionCount", overProvisionCount, "readyMachines", getProviderIDs(readyMachines)) + // remove ready machines + for _, v := range readyMachines { + if len(toDelete) >= overProvisionCount { + return toDelete, nil + } + + toDelete = append(toDelete, v) + } + + return toDelete, nil + } + + if len(machinesWithoutLatestModel) == 0 { + log.Info("nothing more to do since all the GCPMachinePoolMachine(s) are the latest model and not over-provisioned") + return []infrav1exp.GCPMachinePoolMachine{}, nil + } + + if disruptionBudget <= 0 { + log.Info("exit early since disruption budget is less than or equal to zero", "disruptionBudget", disruptionBudget, "desiredReplicaCount", desiredReplicaCount, "maxUnavailable", maxUnavailable, "readyMachines", getProviderIDs(readyMachines), "readyMachinesCount", len(readyMachines)) + return []infrav1exp.GCPMachinePoolMachine{}, nil + } + + var toDelete []infrav1exp.GCPMachinePoolMachine + log.Info("removing ready machines within disruption budget", "desiredReplicaCount", desiredReplicaCount, "maxUnavailable", maxUnavailable, "readyMachines", getProviderIDs(readyMachines), "readyMachinesCount", len(readyMachines)) + for _, v := range readyMachines { + if len(toDelete) >= disruptionBudget { + return toDelete, nil + } + + if !v.Status.LatestModelApplied { + toDelete = append(toDelete, v) + } + } + + log.Info("completed without filling toDelete", "toDelete", getProviderIDs(toDelete), "numToDelete", len(toDelete)) + return toDelete, nil +} + +func getReadyMachines(machinesByProviderID map[string]infrav1exp.GCPMachinePoolMachine) []infrav1exp.GCPMachinePoolMachine { + var readyMachines []infrav1exp.GCPMachinePoolMachine + for _, v := range machinesByProviderID { + // ready status, with provisioning state Succeeded, and not marked for delete + if v.Status.Ready && + v.DeletionTimestamp.IsZero() { + readyMachines = append(readyMachines, v) + } + } + + return readyMachines +} + +func getMachinesWithoutLatestModel(machinesByProviderID map[string]infrav1exp.GCPMachinePoolMachine) []infrav1exp.GCPMachinePoolMachine { + var machinesWithLatestModel []infrav1exp.GCPMachinePoolMachine + for _, v := range machinesByProviderID { + if !v.Status.LatestModelApplied { + machinesWithLatestModel = append(machinesWithLatestModel, v) + } + } + + return machinesWithLatestModel +} + +// getDeletingMachines is responsible for identifying machines whose VMs are in an active state of deletion +// but whose corresponding GCPMachinePoolMachine resource has not yet been marked for deletion. +func getDeletingMachines(machinesByProviderID map[string]infrav1exp.GCPMachinePoolMachine) []infrav1exp.GCPMachinePoolMachine { + var machines []infrav1exp.GCPMachinePoolMachine + for _, v := range machinesByProviderID { + if v.Status.ProvisioningState == infrav1exp.Deleting && + // Ensure that the machine has not already been marked for deletion + v.DeletionTimestamp.IsZero() { + machines = append(machines, v) + } + } + + return machines +} + +func orderByNewest(machines []infrav1exp.GCPMachinePoolMachine) []infrav1exp.GCPMachinePoolMachine { + sort.Slice(machines, func(i, j int) bool { + return machines[i].ObjectMeta.CreationTimestamp.After(machines[j].ObjectMeta.CreationTimestamp.Time) + }) + + return machines +} + +func orderByOldest(machines []infrav1exp.GCPMachinePoolMachine) []infrav1exp.GCPMachinePoolMachine { + sort.Slice(machines, func(i, j int) bool { + return machines[j].ObjectMeta.CreationTimestamp.After(machines[i].ObjectMeta.CreationTimestamp.Time) + }) + + return machines +} + +func orderRandom(machines []infrav1exp.GCPMachinePoolMachine) []infrav1exp.GCPMachinePoolMachine { + //nolint:gosec // We don't need a cryptographically appropriate random number here + r := rand.New(rand.NewSource(time.Now().UnixNano())) + r.Shuffle(len(machines), func(i, j int) { machines[i], machines[j] = machines[j], machines[i] }) + return machines +} + +func getProviderIDs(machines []infrav1exp.GCPMachinePoolMachine) []string { + ids := make([]string, len(machines)) + for i, machine := range machines { + ids[i] = machine.Spec.ProviderID + } + + return ids +} diff --git a/cloud/services/compute/instancegroupinstances/client.go b/cloud/services/compute/instancegroupinstances/client.go new file mode 100644 index 000000000..7e48aaeca --- /dev/null +++ b/cloud/services/compute/instancegroupinstances/client.go @@ -0,0 +1,72 @@ +/* +Copyright 2023 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package instancegroupinstances provides methods for managing GCP instance groups. +package instancegroupinstances + +import ( + "context" + + "google.golang.org/api/compute/v1" + "google.golang.org/api/option" +) + +// Client wraps GCP SDK. +type Client interface { + + // Instance methods. + GetInstance(ctx context.Context, project, zone, name string) (*compute.Instance, error) + // InstanceGroupInstances methods. + ListInstanceGroupInstances(ctx context.Context, project, zone, name string) (*compute.InstanceGroupManagersListManagedInstancesResponse, error) + DeleteInstanceGroupInstances(ctx context.Context, project, zone, name string, instances *compute.InstanceGroupManagersDeleteInstancesRequest) (*compute.Operation, error) + // Disk methods. + GetDisk(ctx context.Context, project, zone, name string) (*compute.Disk, error) +} + +type ( + // GCPClient contains the GCP SDK client. + GCPClient struct { + service *compute.Service + } +) + +var _ Client = &GCPClient{} + +// NewGCPClient creates a new GCP SDK client. +func NewGCPClient(ctx context.Context, creds []byte) *GCPClient { + service, err := compute.NewService(ctx, option.WithCredentialsJSON(creds)) + if err != nil { + return nil + } + return &GCPClient{service: service} +} + +// GetInstance returns a specific instance in a project and zone. +func (c *GCPClient) GetInstance(_ context.Context, project, zone, name string) (*compute.Instance, error) { + return c.service.Instances.Get(project, zone, name).Do() +} + +// GetDisk returns a specific disk in a project and zone. +func (c *GCPClient) GetDisk(_ context.Context, project, zone, name string) (*compute.Disk, error) { + return c.service.Disks.Get(project, zone, name).Do() +} + +// ListInstanceGroupInstances returns a response that contains the list of managed instances in the instance group. +func (c *GCPClient) ListInstanceGroupInstances(_ context.Context, project, zone, name string) (*compute.InstanceGroupManagersListManagedInstancesResponse, error) { + return c.service.InstanceGroupManagers.ListManagedInstances(project, zone, name).Do() +} + +// DeleteInstanceGroupInstances deletes instances from an instance group in a project and zone. +func (c *GCPClient) DeleteInstanceGroupInstances(_ context.Context, project, zone, name string, instances *compute.InstanceGroupManagersDeleteInstancesRequest) (*compute.Operation, error) { + return c.service.InstanceGroupManagers.DeleteInstances(project, zone, name, instances).Do() +} diff --git a/cloud/services/compute/instancegroupinstances/doc.go b/cloud/services/compute/instancegroupinstances/doc.go new file mode 100644 index 000000000..2ab53e8f9 --- /dev/null +++ b/cloud/services/compute/instancegroupinstances/doc.go @@ -0,0 +1,15 @@ +/* +Copyright 2023 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package instancegroupinstances provides methods for managing GCP instance groups. +package instancegroupinstances diff --git a/cloud/services/compute/instancegroupinstances/instancegroupinstances.go b/cloud/services/compute/instancegroupinstances/instancegroupinstances.go new file mode 100644 index 000000000..94eb64804 --- /dev/null +++ b/cloud/services/compute/instancegroupinstances/instancegroupinstances.go @@ -0,0 +1,140 @@ +/* +Copyright 2023 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package instancegroupinstances provides methods for managing GCP instance group instances. +package instancegroupinstances + +import ( + "context" + "fmt" + "time" + + "google.golang.org/api/compute/v1" + "sigs.k8s.io/cluster-api-provider-gcp/cloud" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/gcperrors" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" + "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +type ( + // Service is a service for managing GCP instance group instances. + Service struct { + scope *scope.MachinePoolMachineScope + Client + } +) + +var _ cloud.ReconcilerWithResult = &Service{} + +// New creates a new instance group service. +func New(scope *scope.MachinePoolMachineScope) *Service { + creds, err := scope.GetGCPClientCredentials() + if err != nil { + return nil + } + + return &Service{ + scope: scope, + Client: NewGCPClient(context.Background(), creds), + } +} + +// Reconcile gets/creates/updates a instance group. +func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.Info("Reconciling Instance Group Instances") + + // Fetch the instance. + instance, err := s.GetInstance(ctx, s.scope.Project(), s.scope.Zone(), s.scope.Name()) + if err != nil { + return ctrl.Result{}, err + } + + // Fetch the instances disk. + disk, err := s.GetDisk(ctx, s.scope.Project(), s.scope.Zone(), s.scope.Name()) + if err != nil { + return ctrl.Result{}, err + } + + // Update the GCPMachinePoolMachine status. + s.scope.GCPMachinePoolMachine.Status.InstanceName = instance.Name + + // Update Node status with the instance information. If the node is not found, requeue. + if nodeFound, err := s.scope.UpdateNodeStatus(ctx); err != nil { + log.Error(err, "Failed to update Node status") + return ctrl.Result{}, err + } else if !nodeFound { + log.Info("Node not found, requeueing") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + // Update hasLatestModelApplied status. + latestModel, err := s.scope.HasLatestModelApplied(ctx, disk) + if err != nil { + log.Error(err, "Failed to check if the latest model is applied") + return ctrl.Result{}, err + } + + // Update the GCPMachinePoolMachine status. + s.scope.GCPMachinePoolMachine.Status.LatestModelApplied = latestModel + s.scope.SetReady() + + return ctrl.Result{}, nil +} + +// Delete deletes the instance group. +func (s *Service) Delete(ctx context.Context) (ctrl.Result, error) { + log := log.FromContext(ctx) + log.Info("Deleting Instance Group Instances") + + if s.scope.GCPMachinePoolMachine.Status.ProvisioningState != v1beta1.Deleting { + log.Info("Deleting instance", "instance", s.scope.Name()) + // Cordon and drain the node before deleting the instance. + if err := s.scope.CordonAndDrainNode(ctx); err != nil { + return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, err + } + + // Delete the instance group instance + _, err := s.DeleteInstanceGroupInstances(ctx, s.scope.Project(), s.scope.Zone(), s.scope.GCPMachinePool.Name, &compute.InstanceGroupManagersDeleteInstancesRequest{ + Instances: []string{fmt.Sprintf("zones/%s/instances/%s", s.scope.Zone(), s.scope.Name())}, + }) + if err != nil { + log.Info("Assuming the instance is already deleted", "error", gcperrors.PrintGCPError(err)) + return ctrl.Result{}, nil + } + + // Update the GCPMachinePoolMachine status. + s.scope.GCPMachinePoolMachine.Status.ProvisioningState = v1beta1.Deleting + + // Wait for the instance to be deleted before proceeding. + return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, nil + } + + log.Info("Waiting for instance to be deleted", "instance", s.scope.Name()) + // List the instance group instances to check if the instance is deleted. + instances, err := s.ListInstanceGroupInstances(ctx, s.scope.Project(), s.scope.Zone(), s.scope.GCPMachinePool.Name) + if err != nil { + return ctrl.Result{}, err + } + + for _, instance := range instances.ManagedInstances { + if instance.Name == s.scope.Name() { + log.Info("Instance is still deleting") + return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, nil + } + } + + return ctrl.Result{}, nil +} diff --git a/cloud/services/compute/instancegroups/client.go b/cloud/services/compute/instancegroups/client.go index 041dd3972..b36eb9f91 100644 --- a/cloud/services/compute/instancegroups/client.go +++ b/cloud/services/compute/instancegroups/client.go @@ -33,10 +33,12 @@ type Client interface { CreateInstanceGroup(ctx context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) UpdateInstanceGroup(ctx context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) SetInstanceGroupTemplate(ctx context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) + SetInstanceGroupSize(ctx context.Context, project, zone, name string, size int64) (*compute.Operation, error) DeleteInstanceGroup(ctx context.Context, project, zone, name string) (*compute.Operation, error) ListInstanceGroupInstances(ctx context.Context, project, zone, name string) (*compute.InstanceGroupManagersListManagedInstancesResponse, error) // InstanceGroupTemplate Interfaces GetInstanceTemplate(ctx context.Context, project, name string) (*compute.InstanceTemplate, error) + ListInstanceTemplates(ctx context.Context, project string) (*compute.InstanceTemplateList, error) CreateInstanceTemplate(ctx context.Context, project string, instanceTemplate *compute.InstanceTemplate) (*compute.Operation, error) DeleteInstanceTemplate(ctx context.Context, project, name string) (*compute.Operation, error) WaitUntilOperationCompleted(project, operation string) error @@ -76,6 +78,11 @@ func (c *GCPClient) UpdateInstanceGroup(_ context.Context, project, zone string, return c.service.InstanceGroupManagers.Patch(project, zone, instanceGroup.Name, instanceGroup).Do() } +// SetInstanceGroupSize resizes an instance group in a project and zone. +func (c *GCPClient) SetInstanceGroupSize(_ context.Context, project, zone, name string, size int64) (*compute.Operation, error) { + return c.service.InstanceGroupManagers.Resize(project, zone, name, size).Do() +} + // SetInstanceGroupTemplate sets an instance group template in a project and zone. func (c *GCPClient) SetInstanceGroupTemplate(_ context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) { return c.service.InstanceGroupManagers.SetInstanceTemplate(project, zone, instanceGroup.Name, &compute.InstanceGroupManagersSetInstanceTemplateRequest{ @@ -98,6 +105,11 @@ func (c *GCPClient) GetInstanceTemplate(_ context.Context, project, name string) return c.service.InstanceTemplates.Get(project, name).Do() } +// ListInstanceTemplates returns a list of instance templates in a project. +func (c *GCPClient) ListInstanceTemplates(_ context.Context, project string) (*compute.InstanceTemplateList, error) { + return c.service.InstanceTemplates.List(project).Do() +} + // CreateInstanceTemplate creates a new instance template in a project. func (c *GCPClient) CreateInstanceTemplate(_ context.Context, project string, instanceTemplate *compute.InstanceTemplate) (*compute.Operation, error) { return c.service.InstanceTemplates.Insert(project, instanceTemplate).Do() diff --git a/cloud/services/compute/instancegroups/instancegroups.go b/cloud/services/compute/instancegroups/instancegroups.go index c1622cd86..92f3b8229 100644 --- a/cloud/services/compute/instancegroups/instancegroups.go +++ b/cloud/services/compute/instancegroups/instancegroups.go @@ -24,12 +24,10 @@ import ( "time" "google.golang.org/api/compute/v1" + "k8s.io/utils/ptr" "sigs.k8s.io/cluster-api-provider-gcp/cloud" "sigs.k8s.io/cluster-api-provider-gcp/cloud/gcperrors" "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" - infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -98,6 +96,7 @@ func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { } instanceGroup, err := s.Client.GetInstanceGroup(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.GCPMachinePool.Name) + var patched bool switch { case err != nil && !gcperrors.IsNotFound(err): log.Error(err, "Error looking for instance group") @@ -109,48 +108,43 @@ func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { return ctrl.Result{}, err } case err == nil: - log.Info("Instance group found, updating") - err = s.patchInstanceGroup(ctx, instanceTemplateName, instanceGroup) + log.Info("Instance group found", "instance group", instanceGroup.Name) + patched, err = s.patchInstanceGroup(ctx, instanceTemplateName, instanceGroup) if err != nil { log.Error(err, "Error updating instance group") return ctrl.Result{}, err } + err = s.removeOldInstanceTemplate(ctx, instanceTemplateName) + if err != nil { + log.Error(err, "Error removing old instance templates") + return ctrl.Result{}, err + } } - // Re-get the instance group after updating it. This is needed to get the latest status. - instanceGroup, err = s.Client.GetInstanceGroup(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.GCPMachinePool.Name) - if err != nil { - log.Error(err, "Error getting instance group") - return ctrl.Result{}, err + // Get the instance group again if it was patched. This is needed to get the updated state. If it wasn't patched, use the instance group from the previous step. + if patched { + log.Info("Instance group patched, getting updated instance group") + instanceGroup, err = s.Client.GetInstanceGroup(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.GCPMachinePool.Name) + if err != nil { + log.Error(err, "Error getting instance group") + return ctrl.Result{}, err + } } - - instanceGroupResponse, err := s.Client.ListInstanceGroupInstances(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.GCPMachinePool.Name) + // List the instance group instances. This is needed to get the provider IDs. + instanceGroupInstances, err := s.Client.ListInstanceGroupInstances(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.GCPMachinePool.Name) if err != nil { log.Error(err, "Error listing instance group instances") return ctrl.Result{}, err } - providerIDList := []string{} - for _, managedInstance := range instanceGroupResponse.ManagedInstances { - managedInstanceFmt := fmt.Sprintf("gce://%s/%s/%s", s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, managedInstance.Name) - providerIDList = append(providerIDList, managedInstanceFmt) + // Set the MIG state and instances. This is needed to set the status. + if instanceGroup != nil && instanceGroupInstances != nil { + s.scope.SetMIGState(instanceGroup) + s.scope.SetMIGInstances(instanceGroupInstances.ManagedInstances) + } else { + err = fmt.Errorf("instance group or instance group list is nil") + return ctrl.Result{}, err } - - // update ProviderID and ProviderId List - s.scope.MachinePool.Spec.ProviderIDList = providerIDList - s.scope.GCPMachinePool.Spec.ProviderID = fmt.Sprintf("gce://%s/%s/%s", s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, instanceGroup.Name) - s.scope.GCPMachinePool.Spec.ProviderIDList = providerIDList - - log.Info("Instance group updated", "instance group", instanceGroup.Name, "instance group status", instanceGroup.Status, "instance group target size", instanceGroup.TargetSize, "instance group current size", instanceGroup.TargetSize) - // Set the status. - conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GCPMachinePoolUpdatingCondition, infrav1exp.GCPMachinePoolUpdatedReason, clusterv1.ConditionSeverityInfo, "") - s.scope.SetReplicas(int32(instanceGroup.TargetSize)) - s.scope.MachinePool.Status.Replicas = int32(instanceGroup.TargetSize) - s.scope.MachinePool.Status.ReadyReplicas = int32(instanceGroup.TargetSize) - s.scope.GCPMachinePool.Status.Ready = true - conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GCPMachinePoolReadyCondition) - conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GCPMachinePoolCreatingCondition, infrav1exp.GCPMachinePoolUpdatedReason, clusterv1.ConditionSeverityInfo, "") - return ctrl.Result{}, nil } @@ -192,43 +186,98 @@ func (s *Service) createInstanceGroup(ctx context.Context, instanceTemplateName } // patchInstanceGroup patches the instance group. -func (s *Service) patchInstanceGroup(ctx context.Context, instanceTemplateName string, instanceGroup *compute.InstanceGroupManager) error { +func (s *Service) patchInstanceGroup(ctx context.Context, instanceTemplateName string, instanceGroup *compute.InstanceGroupManager) (bool, error) { log := log.FromContext(ctx) + // Reconcile replicas. err := s.scope.ReconcileReplicas(ctx, instanceGroup) if err != nil { log.Error(err, "Error reconciling replicas") - return err + return false, err } lastSlashTemplateURI := strings.LastIndex(instanceGroup.InstanceTemplate, "/") fetchedInstanceTemplateName := instanceGroup.InstanceTemplate[lastSlashTemplateURI+1:] + patched := false // Check if instance group is already using the instance template. if fetchedInstanceTemplateName != instanceTemplateName { - log.Info("Instance group is not using the instance template, setting instance template", "instance group", instanceGroup.InstanceTemplate, "instance template", instanceTemplateName) + log.Info("Instance group is not using the latest instance template, setting instance template", "instance group", instanceGroup.InstanceTemplate, "instance template", instanceTemplateName) // Set instance template. - _, err := s.Client.SetInstanceGroupTemplate(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.InstanceGroupBuilder(instanceTemplateName)) + setInstanceTemplateOperation, err := s.Client.SetInstanceGroupTemplate(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.InstanceGroupBuilder(instanceTemplateName)) if err != nil { log.Error(err, "Error setting instance group template") - return err + return false, err } + + err = s.WaitUntilComputeOperationCompleted(s.scope.Project(), s.scope.Zone(), setInstanceTemplateOperation.Name) + if err != nil { + log.Error(err, "Error waiting for instance group template operation to complete") + return false, err + } + + patched = true } - // If the instance group is already using the instance template, update the instance group. Otherwise, set the instance template. - if fetchedInstanceTemplateName == instanceTemplateName { - log.Info("Instance group is using the instance template, updating instance group") - instanceGroupUpdateOperation, err := s.Client.UpdateInstanceGroup(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.InstanceGroupBuilder(instanceTemplateName)) + machinePoolReplicas := int64(ptr.Deref[int32](s.scope.MachinePool.Spec.Replicas, 0)) + // Decreases in replica count is handled by deleting GCPMachinePoolMachine instances in the MachinePoolScope + if !s.scope.HasReplicasExternallyManaged(ctx) && instanceGroup.TargetSize < machinePoolReplicas { + log.Info("Instance Group Target Size does not match the desired replicas in MachinePool, setting replicas", "instance group", instanceGroup.TargetSize, "desired replicas", machinePoolReplicas) + // Set replicas. + setReplicasOperation, err := s.Client.SetInstanceGroupSize(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.GCPMachinePool.Name, machinePoolReplicas) if err != nil { - log.Error(err, "Error updating instance group") - return err + log.Error(err, "Error setting instance group size") + return patched, err } - err = s.WaitUntilComputeOperationCompleted(s.scope.Project(), s.scope.Zone(), instanceGroupUpdateOperation.Name) + err = s.WaitUntilComputeOperationCompleted(s.scope.Project(), s.scope.Zone(), setReplicasOperation.Name) if err != nil { - log.Error(err, "Error waiting for instance group update operation to complete") - return err + log.Error(err, "Error waiting for instance group size operation to complete") + return patched, err } + + patched = true + } + + return patched, nil +} + +// removeOldInstanceTemplate removes the old instance templates. +func (s *Service) removeOldInstanceTemplate(ctx context.Context, instanceTemplateName string) error { + log := log.FromContext(ctx) + + // List all instance templates. + instanceTemplates, err := s.Client.ListInstanceTemplates(ctx, s.scope.Project()) + if err != nil { + log.Error(err, "Error listing instance templates") + return err + } + + // Prepare to identify instance templates to remove. + lastIndex := strings.LastIndex(instanceTemplateName, "-") + if lastIndex == -1 { + log.Error(fmt.Errorf("invalid instance template name format"), "Invalid template name", "templateName", instanceTemplateName) + return fmt.Errorf("invalid instance template name format: %s", instanceTemplateName) + } + + trimmedInstanceTemplateName := instanceTemplateName[:lastIndex] + var errors []error + + for _, instanceTemplate := range instanceTemplates.Items { + if strings.HasPrefix(instanceTemplate.Name, trimmedInstanceTemplateName) && instanceTemplate.Name != instanceTemplateName { + log.Info("Deleting old instance template", "templateName", instanceTemplate.Name) + _, err := s.Client.DeleteInstanceTemplate(ctx, s.scope.Project(), instanceTemplate.Name) + if err != nil { + log.Error(err, "Error deleting instance template", "templateName", instanceTemplate.Name) + errors = append(errors, err) + continue // Proceed to next template instead of returning immediately. + } + } + } + + // Aggregate errors (if any). + if len(errors) > 0 { + return fmt.Errorf("encountered errors during deletion: %v", errors) } return nil diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepoolmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepoolmachines.yaml new file mode 100644 index 000000000..7ecd5e8ca --- /dev/null +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepoolmachines.yaml @@ -0,0 +1,202 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.13.0 + name: gcpmachinepoolmachines.infrastructure.cluster.x-k8s.io +spec: + group: infrastructure.cluster.x-k8s.io + names: + kind: GCPMachinePoolMachine + listKind: GCPMachinePoolMachineList + plural: gcpmachinepoolmachines + singular: gcpmachinepoolmachine + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: A machine pool machine belongs to a GCPMachinePool + jsonPath: .metadata.labels.cluster\.x-k8s\.io/cluster-name + name: Cluster + type: string + - description: Machine ready status + jsonPath: .status.ready + name: Ready + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: GCPMachinePoolMachine is the Schema for the GCPMachinePoolMachines + API and represents a GCP Machine Pool. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: GCPMachinePoolMachineSpec defines the desired state of GCPMachinePoolMachine + and the GCP instances that it will create. + properties: + instanceID: + description: InstanceID is the unique identifier for the instance + in the cloud provider. + type: string + providerID: + description: ProviderID is the unique identifier as specified by the + cloud provider. + type: string + type: object + status: + description: GCPMachinePoolMachineStatus defines the observed state of + GCPMachinePoolMachine and the GCP instances that it manages. + properties: + conditions: + description: Conditions specifies the conditions for the managed machine + pool + items: + description: Condition defines an observation of a Cluster API resource + operational state. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. This should be when the underlying condition changed. + If that is not known, then using the time when the API field + changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. This field may be empty. + type: string + reason: + description: The reason for the condition's last transition + in CamelCase. The specific API may choose whether or not this + field is considered a guaranteed API. This field may not be + empty. + type: string + severity: + description: Severity provides an explicit classification of + Reason code, so the users or machines can immediately understand + the current situation and act accordingly. The Severity field + MUST be set only when Status=False. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase or in foo.example.com/CamelCase. + Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. + type: string + required: + - lastTransitionTime + - status + - type + type: object + type: array + failureMessage: + description: "FailureMessage will be set in the event that there is + a terminal problem reconciling the MachinePool and will contain + a more verbose string suitable for logging and human consumption. + \n This field should not be set for transitive errors that a controller + faces that are expected to be fixed automatically over time (like + service outages), but instead indicate that something is fundamentally + wrong with the MachinePool's spec or the configuration of the controller, + and that manual intervention is required. Examples of terminal errors + would be invalid combinations of settings in the spec, values that + are unsupported by the controller, or the responsible controller + itself being critically misconfigured. \n Any transient errors that + occur during the reconciliation of MachinePools can be added as + events to the MachinePool object and/or logged in the controller's + output." + type: string + failureReason: + description: "FailureReason will be set in the event that there is + a terminal problem reconciling the MachinePool and will contain + a succinct value suitable for machine interpretation. \n This field + should not be set for transitive errors that a controller faces + that are expected to be fixed automatically over time (like service + outages), but instead indicate that something is fundamentally wrong + with the MachinePool's spec or the configuration of the controller, + and that manual intervention is required. Examples of terminal errors + would be invalid combinations of settings in the spec, values that + are unsupported by the controller, or the responsible controller + itself being critically misconfigured. \n Any transient errors that + occur during the reconciliation of MachinePools can be added as + events to the MachinePool object and/or logged in the controller's + output." + type: string + instanceName: + description: InstanceName is the name of the Machine Instance within + the VMSS + type: string + lastOperation: + description: LastOperation is a string that contains the last operation + that was performed on the machine. + type: string + latestModelApplied: + description: LatestModelApplied is true when the latest instance template + has been applied to the machine. + type: boolean + nodeRef: + description: NodeRef will point to the corresponding Node if it exists. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: 'If referring to a piece of an object instead of + an entire object, this string should contain a valid JSON/Go + field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within + a pod, this would take on a value like: "spec.containers{name}" + (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" + (container with index 2 in this pod). This syntax is chosen + only to have some well-defined way of referencing a part of + an object. TODO: this design is not final and this field is + subject to change in the future.' + type: string + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + type: string + resourceVersion: + description: 'Specific resourceVersion to which this reference + is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency' + type: string + uid: + description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' + type: string + type: object + x-kubernetes-map-type: atomic + provisioningState: + description: ProvisioningState is the state of the machine pool instance. + type: string + ready: + description: Ready is true when the provider resource is ready. + type: boolean + version: + description: Version defines the Kubernetes version for the VM Instance + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml index 5fc3a284e..6d2e867cf 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml @@ -59,7 +59,7 @@ spec: enum: - pd-standard - pd-ssd - - pd-balanced + - local-ssd type: string size: description: Size is the size of the disk in GBs. Defaults to @@ -124,6 +124,12 @@ spec: description: Network is the network to be used by machines in the machine pool. type: string + nodeDrainTimeout: + description: 'NodeDrainTimeout is the total amount of time that the + controller will spend on draining a node. The default value is 0, + meaning that the node can be drained without any time limitations. + NOTE: NodeDrainTimeout is different from `kubectl drain --timeout`' + type: string providerID: description: ProviderID is the identification ID of the Managed Instance Group @@ -168,6 +174,76 @@ spec: type: string type: array type: object + strategy: + default: + rollingUpdate: + deletePolicy: Oldest + maxSurge: 1 + maxUnavailable: 0 + type: RollingUpdate + description: The deployment strategy to use to replace existing GCPMachinePoolMachines + with new ones. + properties: + rollingUpdate: + description: Rolling update config params. Present only if MachineDeploymentStrategyType + = RollingUpdate. + properties: + deletePolicy: + default: Oldest + description: DeletePolicy defines the policy used by the MachineDeployment + to identify nodes to delete when downscaling. Valid values + are "Random, "Newest", "Oldest" When no value is supplied, + the default is Oldest + enum: + - Random + - Newest + - Oldest + type: string + maxSurge: + anyOf: + - type: integer + - type: string + default: 1 + description: 'The maximum number of machines that can be scheduled + above the desired number of machines. Value can be an absolute + number (ex: 5) or a percentage of desired machines (ex: + 10%). This can not be 0 if MaxUnavailable is 0. Absolute + number is calculated from percentage by rounding up. Defaults + to 1. Example: when this is set to 30%, the new MachineSet + can be scaled up immediately when the rolling update starts, + such that the total number of old and new machines do not + exceed 130% of desired machines. Once old machines have + been killed, new MachineSet can be scaled up further, ensuring + that total number of machines running at any time during + the update is at most 130% of desired machines.' + x-kubernetes-int-or-string: true + maxUnavailable: + anyOf: + - type: integer + - type: string + default: 0 + description: 'The maximum number of machines that can be unavailable + during the update. Value can be an absolute number (ex: + 5) or a percentage of desired machines (ex: 10%). Absolute + number is calculated from percentage by rounding down. This + can not be 0 if MaxSurge is 0. Defaults to 0. Example: when + this is set to 30%, the old MachineSet can be scaled down + to 70% of desired machines immediately when the rolling + update starts. Once new machines are ready, old MachineSet + can be scaled down further, followed by scaling up the new + MachineSet, ensuring that the total number of machines available + at all times during the update is at least 70% of desired + machines.' + x-kubernetes-int-or-string: true + type: object + type: + default: RollingUpdate + description: Type of deployment. Currently the only supported + strategy is RollingUpdate + enum: + - RollingUpdate + type: string + type: object subnet: description: Subnet is a reference to the subnetwork to use for this instance. If not specified, the first subnetwork retrieved from @@ -267,6 +343,11 @@ spec: ready: description: Ready is true when the provider resource is ready. type: boolean + replicas: + description: The number of non-terminated machines targeted by this + machine pool that have the desired template spec. + format: int32 + type: integer type: object type: object served: true diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 614b02be6..8529fd375 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -14,6 +14,7 @@ resources: - bases/infrastructure.cluster.x-k8s.io_gcpmanagedmachinepools.yaml - bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml +- bases/infrastructure.cluster.x-k8s.io_gcpmachinepoolmachines.yaml # +kubebuilder:scaffold:crdkustomizeresource patchesStrategicMerge: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index a4559e62d..87a314bad 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -105,6 +105,42 @@ rules: - get - patch - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - gcpmachinepoolmachines + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - gcpmachinepoolmachines/finalizers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - gcpmachinepoolmachines/status + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - infrastructure.cluster.x-k8s.io resources: diff --git a/exp/api/v1beta1/gcpmachinepool_types.go b/exp/api/v1beta1/gcpmachinepool_types.go index 9bf55149e..8c1b6b2f7 100644 --- a/exp/api/v1beta1/gcpmachinepool_types.go +++ b/exp/api/v1beta1/gcpmachinepool_types.go @@ -18,14 +18,27 @@ package v1beta1 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/errors" ) const ( - // MachinePoolFinalizer allows ReconcileGCPMachinePool to clean up GCP resources associated with GCPMachinePool before removing it from the apiserver. - MachinePoolFinalizer = "gcpmachinepool.infrastructure.cluster.x-k8s.io" + // MachinePoolNameLabel indicates the GCPMachinePool name the GCPMachinePoolMachine belongs. + MachinePoolNameLabel = "gcpmachinepool.infrastructure.cluster.x-k8s.io/machine-pool" + + // RollingUpdateGCPMachinePoolDeploymentStrategyType replaces GCPMachinePoolMachines with older models with + // GCPMachinePoolMachines based on the latest model. + // i.e. gradually scale down the old GCPMachinePoolMachines and scale up the new ones. + RollingUpdateGCPMachinePoolDeploymentStrategyType GCPMachinePoolDeploymentStrategyType = "RollingUpdate" + + // OldestDeletePolicyType will delete machines with the oldest creation date first. + OldestDeletePolicyType GCPMachinePoolDeletePolicyType = "Oldest" + // NewestDeletePolicyType will delete machines with the newest creation date first. + NewestDeletePolicyType GCPMachinePoolDeletePolicyType = "Newest" + // RandomDeletePolicyType will delete machines in random order. + RandomDeletePolicyType GCPMachinePoolDeletePolicyType = "Random" ) const ( @@ -45,8 +58,9 @@ type AttachedDiskSpec struct { // 2. "pd-ssd" - SSD persistent disk // 3. "local-ssd" - Local SSD disk (https://cloud.google.com/compute/docs/disks/local-ssd). // Default is "pd-standard". + // +kubebuilder:validation:Enum=pd-standard;pd-ssd;local-ssd // +optional - DeviceType *DiskType `json:"deviceType,omitempty"` + DeviceType *string `json:"deviceType,omitempty"` // Size is the size of the disk in GBs. // Defaults to 30GB. For "local-ssd" size is always 375GB. // +optional @@ -151,10 +165,87 @@ type GCPMachinePoolSpec struct { // +optional Subnet *string `json:"subnet,omitempty"` + // The deployment strategy to use to replace existing GCPMachinePoolMachines with new ones. + // +optional + // +kubebuilder:default={type: "RollingUpdate", rollingUpdate: {maxSurge: 1, maxUnavailable: 0, deletePolicy: Oldest}} + Strategy GCPMachinePoolDeploymentStrategy `json:"strategy,omitempty"` + + // NodeDrainTimeout is the total amount of time that the controller will spend on draining a node. + // The default value is 0, meaning that the node can be drained without any time limitations. + // NOTE: NodeDrainTimeout is different from `kubectl drain --timeout` + // +optional + NodeDrainTimeout *metav1.Duration `json:"nodeDrainTimeout,omitempty"` + // Zone is the GCP zone location ex us-central1-a Zone string `json:"zone"` } +// GCPMachinePoolDeploymentStrategyType is the type of deployment strategy employed to rollout a new version of the GCPMachinePool. +type GCPMachinePoolDeploymentStrategyType string + +// GCPMachinePoolDeploymentStrategy describes how to replace existing machines with new ones. +type GCPMachinePoolDeploymentStrategy struct { + // Type of deployment. Currently the only supported strategy is RollingUpdate + // +optional + // +kubebuilder:validation:Enum=RollingUpdate + // +optional + // +kubebuilder:default=RollingUpdate + Type GCPMachinePoolDeploymentStrategyType `json:"type,omitempty"` + + // Rolling update config params. Present only if + // MachineDeploymentStrategyType = RollingUpdate. + // +optional + RollingUpdate *MachineRollingUpdateDeployment `json:"rollingUpdate,omitempty"` +} + +// GCPMachinePoolDeletePolicyType is the type of DeletePolicy employed to select machines to be deleted during an +// upgrade. +type GCPMachinePoolDeletePolicyType string + +// MachineRollingUpdateDeployment is used to control the desired behavior of rolling update. +type MachineRollingUpdateDeployment struct { + // The maximum number of machines that can be unavailable during the update. + // Value can be an absolute number (ex: 5) or a percentage of desired + // machines (ex: 10%). + // Absolute number is calculated from percentage by rounding down. + // This can not be 0 if MaxSurge is 0. + // Defaults to 0. + // Example: when this is set to 30%, the old MachineSet can be scaled + // down to 70% of desired machines immediately when the rolling update + // starts. Once new machines are ready, old MachineSet can be scaled + // down further, followed by scaling up the new MachineSet, ensuring + // that the total number of machines available at all times + // during the update is at least 70% of desired machines. + // +optional + // +kubebuilder:default:=0 + MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` + + // The maximum number of machines that can be scheduled above the + // desired number of machines. + // Value can be an absolute number (ex: 5) or a percentage of + // desired machines (ex: 10%). + // This can not be 0 if MaxUnavailable is 0. + // Absolute number is calculated from percentage by rounding up. + // Defaults to 1. + // Example: when this is set to 30%, the new MachineSet can be scaled + // up immediately when the rolling update starts, such that the total + // number of old and new machines do not exceed 130% of desired + // machines. Once old machines have been killed, new MachineSet can + // be scaled up further, ensuring that total number of machines running + // at any time during the update is at most 130% of desired machines. + // +optional + // +kubebuilder:default:=1 + MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty"` + + // DeletePolicy defines the policy used by the MachineDeployment to identify nodes to delete when downscaling. + // Valid values are "Random, "Newest", "Oldest" + // When no value is supplied, the default is Oldest + // +optional + // +kubebuilder:validation:Enum=Random;Newest;Oldest + // +kubebuilder:default:=Oldest + DeletePolicy GCPMachinePoolDeletePolicyType `json:"deletePolicy,omitempty"` +} + // GCPMachinePoolStatus defines the observed state of GCPMachinePool and the GCP instances that it manages. type GCPMachinePoolStatus struct { @@ -162,6 +253,10 @@ type GCPMachinePoolStatus struct { // +optional Ready bool `json:"ready"` + // The number of non-terminated machines targeted by this machine pool that have the desired template spec. + // +optional + Replicas int32 `json:"replicas"` + // FailureReason will be set in the event that there is a terminal problem // reconciling the MachinePool and will contain a succinct value suitable // for machine interpretation. diff --git a/exp/api/v1beta1/gcpmachinepoolmachine_types.go b/exp/api/v1beta1/gcpmachinepoolmachine_types.go new file mode 100644 index 000000000..66f587fa4 --- /dev/null +++ b/exp/api/v1beta1/gcpmachinepoolmachine_types.go @@ -0,0 +1,146 @@ +/* +Copyright The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/errors" +) + +const ( + // GCPMachinePoolMachineFinalizer indicates the GCPMachinePoolMachine name the GCPMachinePoolMachine belongs. + GCPMachinePoolMachineFinalizer = "gcpmachinepoolmachine.infrastructure.cluster.x-k8s.io" +) + +// GCPMachinePoolMachineSpec defines the desired state of GCPMachinePoolMachine and the GCP instances that it will create. +type GCPMachinePoolMachineSpec struct { + // ProviderID is the unique identifier as specified by the cloud provider. + // +optional + ProviderID string `json:"providerID,omitempty"` + + // InstanceID is the unique identifier for the instance in the cloud provider. + // +optional + InstanceID string `json:"instanceID,omitempty"` +} + +// GCPMachinePoolMachineStatus defines the observed state of GCPMachinePoolMachine and the GCP instances that it manages. +type GCPMachinePoolMachineStatus struct { + + // NodeRef will point to the corresponding Node if it exists. + // +optional + NodeRef *corev1.ObjectReference `json:"nodeRef,omitempty"` + + // Version defines the Kubernetes version for the VM Instance + // +optional + Version string `json:"version,omitempty"` + + // InstanceName is the name of the Machine Instance within the VMSS + // +optional + InstanceName string `json:"instanceName,omitempty"` + + // LatestModelApplied is true when the latest instance template has been applied to the machine. + // +optional + LatestModelApplied bool `json:"latestModelApplied,omitempty"` + + // Ready is true when the provider resource is ready. + // +optional + Ready bool `json:"ready,omitempty"` + + // LastOperation is a string that contains the last operation that was performed on the machine. + // +optional + LastOperation string `json:"lastOperation,omitempty"` + + // ProvisioningState is the state of the machine pool instance. + ProvisioningState ProvisioningState `json:"provisioningState,omitempty"` + + // FailureReason will be set in the event that there is a terminal problem + // reconciling the MachinePool and will contain a succinct value suitable + // for machine interpretation. + // + // This field should not be set for transitive errors that a controller + // faces that are expected to be fixed automatically over + // time (like service outages), but instead indicate that something is + // fundamentally wrong with the MachinePool's spec or the configuration of + // the controller, and that manual intervention is required. Examples + // of terminal errors would be invalid combinations of settings in the + // spec, values that are unsupported by the controller, or the + // responsible controller itself being critically misconfigured. + // + // Any transient errors that occur during the reconciliation of MachinePools + // can be added as events to the MachinePool object and/or logged in the + // controller's output. + // +optional + FailureReason *errors.MachineStatusError `json:"failureReason,omitempty"` + + // FailureMessage will be set in the event that there is a terminal problem + // reconciling the MachinePool and will contain a more verbose string suitable + // for logging and human consumption. + // + // This field should not be set for transitive errors that a controller + // faces that are expected to be fixed automatically over + // time (like service outages), but instead indicate that something is + // fundamentally wrong with the MachinePool's spec or the configuration of + // the controller, and that manual intervention is required. Examples + // of terminal errors would be invalid combinations of settings in the + // spec, values that are unsupported by the controller, or the + // responsible controller itself being critically misconfigured. + // + // Any transient errors that occur during the reconciliation of MachinePools + // can be added as events to the MachinePool object and/or logged in the + // controller's output. + // +optional + FailureMessage *string `json:"failureMessage,omitempty"` + + // Conditions specifies the conditions for the managed machine pool + Conditions clusterv1.Conditions `json:"conditions,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Cluster",type="string",JSONPath=".metadata.labels.cluster\\.x-k8s\\.io/cluster-name",description="A machine pool machine belongs to a GCPMachinePool" +// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.ready",description="Machine ready status" + +// GCPMachinePoolMachine is the Schema for the GCPMachinePoolMachines API and represents a GCP Machine Pool. +type GCPMachinePoolMachine struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec GCPMachinePoolMachineSpec `json:"spec,omitempty"` + Status GCPMachinePoolMachineStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// GCPMachinePoolMachineList contains a list of GCPMachinePoolMachine resources. +type GCPMachinePoolMachineList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []GCPMachinePoolMachine `json:"items"` +} + +// GetConditions returns the conditions for the GCPManagedMachinePool. +func (r *GCPMachinePoolMachine) GetConditions() clusterv1.Conditions { + return r.Status.Conditions +} + +// SetConditions sets the status conditions for the GCPManagedMachinePool. +func (r *GCPMachinePoolMachine) SetConditions(conditions clusterv1.Conditions) { + r.Status.Conditions = conditions +} +func init() { + infrav1.SchemeBuilder.Register(&GCPMachinePoolMachine{}, &GCPMachinePoolMachineList{}) +} diff --git a/exp/api/v1beta1/types.go b/exp/api/v1beta1/types.go index 53eb3dc85..1bdac65d1 100644 --- a/exp/api/v1beta1/types.go +++ b/exp/api/v1beta1/types.go @@ -36,6 +36,24 @@ type Taint struct { Value string `json:"value"` } +// ProvisioningState describes the provisioning state of an GCP resource. +type ProvisioningState string + +const ( + // Creating ... + Creating ProvisioningState = "Creating" + // Deleting ... + Deleting ProvisioningState = "Deleting" + // Failed ... + Failed ProvisioningState = "Failed" + // Succeeded ... + Succeeded ProvisioningState = "Succeeded" + // Updating ... + Updating ProvisioningState = "Updating" + // Deleted represents a deleted resource. + Deleted ProvisioningState = "Deleted" +) + // Taints is an array of Taints. type Taints []Taint diff --git a/exp/api/v1beta1/zz_generated.deepcopy.go b/exp/api/v1beta1/zz_generated.deepcopy.go index a82cf74f5..f6bab77f4 100644 --- a/exp/api/v1beta1/zz_generated.deepcopy.go +++ b/exp/api/v1beta1/zz_generated.deepcopy.go @@ -21,7 +21,10 @@ limitations under the License. package v1beta1 import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" apiv1beta1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" cluster_apiapiv1beta1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/errors" @@ -32,7 +35,7 @@ func (in *AttachedDiskSpec) DeepCopyInto(out *AttachedDiskSpec) { *out = *in if in.DeviceType != nil { in, out := &in.DeviceType, &out.DeviceType - *out = new(DiskType) + *out = new(string) **out = **in } if in.Size != nil { @@ -79,6 +82,26 @@ func (in *GCPMachinePool) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolDeploymentStrategy) DeepCopyInto(out *GCPMachinePoolDeploymentStrategy) { + *out = *in + if in.RollingUpdate != nil { + in, out := &in.RollingUpdate, &out.RollingUpdate + *out = new(MachineRollingUpdateDeployment) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolDeploymentStrategy. +func (in *GCPMachinePoolDeploymentStrategy) DeepCopy() *GCPMachinePoolDeploymentStrategy { + if in == nil { + return nil + } + out := new(GCPMachinePoolDeploymentStrategy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GCPMachinePoolList) DeepCopyInto(out *GCPMachinePoolList) { *out = *in @@ -111,6 +134,117 @@ func (in *GCPMachinePoolList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolMachine) DeepCopyInto(out *GCPMachinePoolMachine) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolMachine. +func (in *GCPMachinePoolMachine) DeepCopy() *GCPMachinePoolMachine { + if in == nil { + return nil + } + out := new(GCPMachinePoolMachine) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GCPMachinePoolMachine) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolMachineList) DeepCopyInto(out *GCPMachinePoolMachineList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]GCPMachinePoolMachine, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolMachineList. +func (in *GCPMachinePoolMachineList) DeepCopy() *GCPMachinePoolMachineList { + if in == nil { + return nil + } + out := new(GCPMachinePoolMachineList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GCPMachinePoolMachineList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolMachineSpec) DeepCopyInto(out *GCPMachinePoolMachineSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolMachineSpec. +func (in *GCPMachinePoolMachineSpec) DeepCopy() *GCPMachinePoolMachineSpec { + if in == nil { + return nil + } + out := new(GCPMachinePoolMachineSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolMachineStatus) DeepCopyInto(out *GCPMachinePoolMachineStatus) { + *out = *in + if in.NodeRef != nil { + in, out := &in.NodeRef, &out.NodeRef + *out = new(corev1.ObjectReference) + **out = **in + } + if in.FailureReason != nil { + in, out := &in.FailureReason, &out.FailureReason + *out = new(errors.MachineStatusError) + **out = **in + } + if in.FailureMessage != nil { + in, out := &in.FailureMessage, &out.FailureMessage + *out = new(string) + **out = **in + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make(cluster_apiapiv1beta1.Conditions, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolMachineStatus. +func (in *GCPMachinePoolMachineStatus) DeepCopy() *GCPMachinePoolMachineStatus { + if in == nil { + return nil + } + out := new(GCPMachinePoolMachineStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GCPMachinePoolSpec) DeepCopyInto(out *GCPMachinePoolSpec) { *out = *in @@ -175,6 +309,12 @@ func (in *GCPMachinePoolSpec) DeepCopyInto(out *GCPMachinePoolSpec) { *out = new(string) **out = **in } + in.Strategy.DeepCopyInto(&out.Strategy) + if in.NodeDrainTimeout != nil { + in, out := &in.NodeDrainTimeout, &out.NodeDrainTimeout + *out = new(v1.Duration) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolSpec. @@ -661,6 +801,31 @@ func (in *LinuxNodeConfig) DeepCopy() *LinuxNodeConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineRollingUpdateDeployment) DeepCopyInto(out *MachineRollingUpdateDeployment) { + *out = *in + if in.MaxUnavailable != nil { + in, out := &in.MaxUnavailable, &out.MaxUnavailable + *out = new(intstr.IntOrString) + **out = **in + } + if in.MaxSurge != nil { + in, out := &in.MaxSurge, &out.MaxSurge + *out = new(intstr.IntOrString) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineRollingUpdateDeployment. +func (in *MachineRollingUpdateDeployment) DeepCopy() *MachineRollingUpdateDeployment { + if in == nil { + return nil + } + out := new(MachineRollingUpdateDeployment) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MasterAuthorizedNetworksConfig) DeepCopyInto(out *MasterAuthorizedNetworksConfig) { *out = *in diff --git a/exp/controllers/gcpmachinepool_controller.go b/exp/controllers/gcpmachinepool_controller.go index 058454f59..b34bd2e2f 100644 --- a/exp/controllers/gcpmachinepool_controller.go +++ b/exp/controllers/gcpmachinepool_controller.go @@ -60,6 +60,9 @@ type GCPMachinePoolReconciler struct { //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepools,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepools/status,verbs=get;update;patch //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepools/finalizers,verbs=update +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepoolmachines,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepoolmachines/status,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepoolmachines/finalizers,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=kubeadmconfigs;kubeadmconfigs/status,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;update;patch // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch @@ -94,6 +97,16 @@ func (r *GCPMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctr return errors.Wrapf(err, "error creating controller") } + // Watch for changes in the GCPMachinePool instances and enqueue the GCPMachinePool object for the controller + if err := c.Watch( + source.Kind(mgr.GetCache(), &infrav1exp.GCPMachinePoolMachine{}), + handler.EnqueueRequestsFromMapFunc(GCPMachinePoolMachineMapper(mgr.GetScheme(), log)), + MachinePoolMachineHasStateOrVersionChange(log), + predicates.ResourceNotPausedAndHasFilterLabel(log, r.WatchFilterValue), + ); err != nil { + return errors.Wrap(err, "failed adding a watch for GCPMachinePoolMachine") + } + // Add a watch on clusterv1.Cluster object for unpause & ready notifications. if err := c.Watch( source.Kind(mgr.GetCache(), &clusterv1.Cluster{}), @@ -210,7 +223,7 @@ func (r *GCPMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP } // If the GCPMachinePool doesn't have our finalizer, add it. - controllerutil.AddFinalizer(machinePoolScope.GCPMachinePool, infrav1exp.MachinePoolFinalizer) + controllerutil.AddFinalizer(machinePoolScope.GCPMachinePool, expclusterv1.MachinePoolFinalizer) if err := machinePoolScope.PatchObject(ctx); err != nil { return ctrl.Result{}, err } @@ -234,10 +247,18 @@ func (r *GCPMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP return ctrl.Result{}, err } if res.Requeue { + log.Info("Requeueing GCPMachinePool reconcile") return res, nil } } + if machinePoolScope.NeedsRequeue() { + log.Info("Requeueing GCPMachinePool reconcile", "RequeueAfter", 30*time.Second) + return reconcile.Result{ + RequeueAfter: 30 * time.Second, + }, nil + } + return ctrl.Result{}, nil } @@ -262,9 +283,8 @@ func (r *GCPMachinePoolReconciler) reconcileDelete(ctx context.Context, machineP } } - // Remove the finalizer - - controllerutil.RemoveFinalizer(machinePoolScope.GCPMachinePool, infrav1exp.MachinePoolFinalizer) + // Remove the finalizer from the GCPMachinePool + controllerutil.RemoveFinalizer(machinePoolScope.GCPMachinePool, expclusterv1.MachinePoolFinalizer) return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil } diff --git a/exp/controllers/gcpmachinepoolmachine_controller.go b/exp/controllers/gcpmachinepoolmachine_controller.go new file mode 100644 index 000000000..2fa6ee5c6 --- /dev/null +++ b/exp/controllers/gcpmachinepoolmachine_controller.go @@ -0,0 +1,274 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "time" + + "github.com/googleapis/gax-go/v2/apierror" + "github.com/pkg/errors" + "google.golang.org/grpc/codes" + apierrors "k8s.io/apimachinery/pkg/api/errors" + infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/cloud" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/services/compute/instancegroupinstances" + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/util/reconciler" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/predicates" + "sigs.k8s.io/cluster-api/util/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +// GCPMachinePoolMachineReconciler reconciles a GCPMachinePoolMachine object and the corresponding MachinePool object. +type GCPMachinePoolMachineReconciler struct { + client.Client + ReconcileTimeout time.Duration + WatchFilterValue string +} + +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepoolmachines,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepoolmachines/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepoolmachines/finalizers,verbs=update +// +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=kubeadmconfigs;kubeadmconfigs/status,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch +// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch + +// SetupWithManager sets up the controller with the Manager. +func (r *GCPMachinePoolMachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { + log := log.FromContext(ctx).WithValues("controller", "GCPMachinePoolMachine") + + gvk, err := apiutil.GVKForObject(new(infrav1exp.GCPMachinePoolMachine), mgr.GetScheme()) + if err != nil { + return errors.Wrapf(err, "failed to find GVK for GCPMachinePool") + } + + c, err := ctrl.NewControllerManagedBy(mgr). + WithOptions(options). + For(&infrav1exp.GCPMachinePoolMachine{}). + WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(log, r.WatchFilterValue)). + Watches( + &expclusterv1.MachinePool{}, + handler.EnqueueRequestsFromMapFunc(machinePoolToInfrastructureMapFunc(gvk)), + ). + Build(r) + if err != nil { + return errors.Wrapf(err, "error creating controller") + } + + // Add a watch on clusterv1.Cluster object for unpause & ready notifications. + if err := c.Watch( + source.Kind(mgr.GetCache(), &clusterv1.Cluster{}), + handler.EnqueueRequestsFromMapFunc(util.ClusterToInfrastructureMapFunc(ctx, gvk, mgr.GetClient(), &infrav1exp.GCPMachinePoolMachine{})), + predicates.ClusterUnpausedAndInfrastructureReady(log), + ); err != nil { + return errors.Wrap(err, "failed adding a watch for ready clusters") + } + + return nil +} + +// Reconcile handles GCPMachinePoolMachine events and reconciles the corresponding MachinePool. +func (r *GCPMachinePoolMachineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { + ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultedLoopTimeout(r.ReconcileTimeout)) + + defer cancel() + + log := ctrl.LoggerFrom(ctx) + + // Fetch the GCPMachinePoolMachine instance. + gcpMachinePoolMachine := &infrav1exp.GCPMachinePoolMachine{} + if err := r.Get(ctx, req.NamespacedName, gcpMachinePoolMachine); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{Requeue: true}, nil + } + + // Get the GCPMachinePool. + gcpMachinePool, err := GetOwnerGCPMachinePool(ctx, r.Client, gcpMachinePoolMachine.ObjectMeta) + if err != nil { + log.Error(err, "Failed to retrieve owner GCPMachinePool from the API Server") + return ctrl.Result{}, err + } + if gcpMachinePool == nil { + log.Info("Waiting for GCPMachinePool Controller to set OwnerRef on GCPMachinePoolMachine") + return ctrl.Result{}, nil + } + + // Get the MachinePool. + machinePool, err := GetOwnerMachinePool(ctx, r.Client, gcpMachinePool.ObjectMeta) + if err != nil { + log.Error(err, "Failed to retrieve owner MachinePool from the API Server") + return ctrl.Result{}, err + } + if machinePool == nil { + log.Info("Waiting for MachinePool Controller to set OwnerRef on GCPMachinePool") + return ctrl.Result{}, nil + } + + // Get the Cluster. + cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machinePool.ObjectMeta) + if err != nil { + log.Error(err, "Failed to retrieve owner Cluster from the API Server") + return ctrl.Result{}, err + } + if annotations.IsPaused(cluster, gcpMachinePoolMachine) { + log.Info("GCPMachinePool or linked Cluster is marked as paused. Won't reconcile") + return ctrl.Result{}, nil + } + + // Create the logger with the GCPMachinePoolMachine name and delegate to the Reconcile method of the GCPMachinePoolMachineReconciler. + log = log.WithValues("cluster", cluster.Name) + gcpClusterName := client.ObjectKey{ + Namespace: gcpMachinePoolMachine.Namespace, + Name: cluster.Spec.InfrastructureRef.Name, + } + gcpCluster := &infrav1.GCPCluster{} + if err := r.Client.Get(ctx, gcpClusterName, gcpCluster); err != nil { + log.Info("GCPCluster is not available yet") + return ctrl.Result{}, err + } + + // Create the cluster scope + clusterScope, err := scope.NewClusterScope(ctx, scope.ClusterScopeParams{ + Client: r.Client, + Cluster: cluster, + GCPCluster: gcpCluster, + }) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create scope") + } + + // Create the machine pool scope + machinePoolMachineScope, err := scope.NewMachinePoolMachineScope(scope.MachinePoolMachineScopeParams{ + Client: r.Client, + MachinePool: machinePool, + ClusterGetter: clusterScope, + GCPMachinePool: gcpMachinePool, + GCPMachinePoolMachine: gcpMachinePoolMachine, + }) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create scope") + } + + // Always close the scope when exiting this function so we can persist any GCPMachinePoolMachine changes. + defer func() { + if err := machinePoolMachineScope.Close(ctx); err != nil && reterr == nil { + reterr = err + } + }() + + // Handle deleted machine pools + if !gcpMachinePoolMachine.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, machinePoolMachineScope) + } + + // Handle non-deleted machine pools + return r.reconcileNormal(ctx, machinePoolMachineScope) +} + +// reconcileNormal handles non-deleted GCPMachinePoolMachine instances. +func (r *GCPMachinePoolMachineReconciler) reconcileNormal(ctx context.Context, machinePoolMachineScope *scope.MachinePoolMachineScope) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.Info("Reconciling GCPMachinePoolMachine") + + // If the GCPMachinePoolMachine is in an error state, return early. + if machinePoolMachineScope.GCPMachinePool.Status.FailureReason != nil || machinePoolMachineScope.GCPMachinePool.Status.FailureMessage != nil { + log.Info("Error state detected, skipping reconciliation") + return ctrl.Result{}, nil + } + + reconcilers := []cloud.ReconcilerWithResult{ + instancegroupinstances.New(machinePoolMachineScope), + } + + for _, r := range reconcilers { + res, err := r.Reconcile(ctx) + if err != nil { + var e *apierror.APIError + if ok := errors.As(err, &e); ok { + if e.GRPCStatus().Code() == codes.FailedPrecondition { + log.Info("GCP API returned a failed precondition error, retrying") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + } + log.Error(err, "Failed to reconcile GCPMachinePoolMachine") + record.Warnf(machinePoolMachineScope.GCPMachinePoolMachine, "FailedReconcile", "Failed to reconcile GCPMachinePoolMachine: %v", err) + return ctrl.Result{}, err + } + if res.Requeue || res.RequeueAfter > 0 { + return res, nil + } + } + + return ctrl.Result{}, nil +} + +// reconcileDelete handles deleted GCPMachinePoolMachine instances. +func (r *GCPMachinePoolMachineReconciler) reconcileDelete(ctx context.Context, machinePoolMachineScope *scope.MachinePoolMachineScope) (ctrl.Result, error) { + log := log.FromContext(ctx) + log.Info("Reconciling GCPMachinePoolMachine delete") + + reconcilers := []cloud.ReconcilerWithResult{ + instancegroupinstances.New(machinePoolMachineScope), + } + + for _, r := range reconcilers { + res, err := r.Delete(ctx) + if err != nil { + log.Error(err, "Failed to reconcile GCPMachinePoolMachine delete") + record.Warnf(machinePoolMachineScope.GCPMachinePoolMachine, "FailedDelete", "Failed to delete GCPMachinePoolMachine: %v", err) + return ctrl.Result{}, err + } + if res.Requeue { + return res, nil + } + } + + // Remove the finalizer from the GCPMachinePoolMachine. + controllerutil.RemoveFinalizer(machinePoolMachineScope.GCPMachinePoolMachine, infrav1exp.GCPMachinePoolMachineFinalizer) + + return ctrl.Result{}, nil +} + +// getGCPMachinePoolByName returns the GCPMachinePool object owning the current resource. +func getGCPMachinePoolByName(ctx context.Context, c client.Client, namespace, name string) (*infrav1exp.GCPMachinePool, error) { + gcpMachinePool := &infrav1exp.GCPMachinePool{} + key := client.ObjectKey{ + Namespace: namespace, + Name: name, + } + if err := c.Get(ctx, key, gcpMachinePool); err != nil { + return nil, err + } + return gcpMachinePool, nil +} diff --git a/exp/controllers/helpers.go b/exp/controllers/helpers.go index 14c7a09a9..69c965d27 100644 --- a/exp/controllers/helpers.go +++ b/exp/controllers/helpers.go @@ -22,9 +22,16 @@ import ( "github.com/go-logr/logr" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" "sigs.k8s.io/cluster-api-provider-gcp/util/reconciler" kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/pkg/errors" @@ -52,6 +59,23 @@ func GetOwnerMachinePool(ctx context.Context, c client.Client, obj metav1.Object return nil, nil } +// GetOwnerGCPMachinePool returns the GCPMachinePool object owning the current resource. +func GetOwnerGCPMachinePool(ctx context.Context, c client.Client, obj metav1.ObjectMeta) (*infrav1exp.GCPMachinePool, error) { + for _, ref := range obj.OwnerReferences { + if ref.Kind != "GCPMachinePool" { + continue + } + gv, err := schema.ParseGroupVersion(ref.APIVersion) + if err != nil { + return nil, errors.WithStack(err) + } + if gv.Group == infrav1exp.GroupVersion.Group { + return getGCPMachinePoolByName(ctx, c, obj.Namespace, ref.Name) + } + } + return nil, nil +} + // KubeadmConfigToInfrastructureMapFunc returns a handler.ToRequestsFunc that watches for KubeadmConfig events and returns. func KubeadmConfigToInfrastructureMapFunc(_ context.Context, c client.Client, log logr.Logger) handler.MapFunc { return func(ctx context.Context, o client.Object) []reconcile.Request { @@ -110,3 +134,78 @@ func KubeadmConfigToInfrastructureMapFunc(_ context.Context, c client.Client, lo } } } + +// GCPMachinePoolMachineMapper returns a handler.ToRequestsFunc that watches for GCPMachinePool events and returns. +func GCPMachinePoolMachineMapper(scheme *runtime.Scheme, log logr.Logger) handler.MapFunc { + return func(ctx context.Context, o client.Object) []ctrl.Request { + gvk, err := apiutil.GVKForObject(new(infrav1exp.GCPMachinePool), scheme) + if err != nil { + log.Error(errors.WithStack(err), "failed to find GVK for GCPMachinePool") + return nil + } + + gcpMachinePoolMachine, ok := o.(*infrav1exp.GCPMachinePoolMachine) + if !ok { + log.Error(errors.Errorf("expected an GCPCluster, got %T instead", o), "failed to map GCPMachinePoolMachine") + return nil + } + + log := log.WithValues("GCPMachinePoolMachine", gcpMachinePoolMachine.Name, "Namespace", gcpMachinePoolMachine.Namespace) + for _, ref := range gcpMachinePoolMachine.OwnerReferences { + if ref.Kind != gvk.Kind { + continue + } + + gv, err := schema.ParseGroupVersion(ref.APIVersion) + if err != nil { + log.Error(errors.WithStack(err), "unable to parse group version", "APIVersion", ref.APIVersion) + return nil + } + + if gv.Group == gvk.Group { + return []ctrl.Request{ + { + NamespacedName: types.NamespacedName{ + Name: ref.Name, + Namespace: gcpMachinePoolMachine.Namespace, + }, + }, + } + } + } + + return nil + } +} + +// MachinePoolMachineHasStateOrVersionChange predicates any events based on changes to the GCPMachinePoolMachine status +// relevant for the GCPMachinePool controller. +func MachinePoolMachineHasStateOrVersionChange(logger logr.Logger) predicate.Funcs { + return predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + log := logger.WithValues("predicate", "MachinePoolModelHasChanged", "eventType", "update") + + oldGmp, ok := e.ObjectOld.(*infrav1exp.GCPMachinePoolMachine) + if !ok { + log.V(4).Info("Expected GCPMachinePoolMachine", "type", e.ObjectOld.GetObjectKind().GroupVersionKind().String()) + return false + } + log = log.WithValues("namespace", oldGmp.Namespace, "machinePoolMachine", oldGmp.Name) + + newGmp := e.ObjectNew.(*infrav1exp.GCPMachinePoolMachine) + + // if any of these are not equal, run the update + shouldUpdate := oldGmp.Status.LatestModelApplied != newGmp.Status.LatestModelApplied || + oldGmp.Status.Version != newGmp.Status.Version || + oldGmp.Status.Ready != newGmp.Status.Ready + + if shouldUpdate { + log.Info("machine pool machine predicate", "shouldUpdate", shouldUpdate) + } + return shouldUpdate + }, + CreateFunc: func(e event.CreateEvent) bool { return false }, + DeleteFunc: func(e event.DeleteEvent) bool { return false }, + GenericFunc: func(e event.GenericEvent) bool { return false }, + } +} diff --git a/go.mod b/go.mod index 90934b752..eda3f5e5d 100644 --- a/go.mod +++ b/go.mod @@ -34,6 +34,7 @@ require ( k8s.io/client-go v0.28.6 k8s.io/component-base v0.28.6 k8s.io/klog/v2 v2.110.1 + k8s.io/kubectl v0.28.4 k8s.io/utils v0.0.0-20240102154912-e7106e64919e sigs.k8s.io/cluster-api v1.6.2 sigs.k8s.io/cluster-api/test v1.6.2 @@ -43,24 +44,39 @@ require ( require ( cloud.google.com/go v0.112.0 // indirect cloud.google.com/go/longrunning v0.5.5 // indirect + github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect github.com/NYTimes/gziphandler v1.1.1 // indirect github.com/ProtonMail/go-crypto v0.0.0-20230217124315-7d5c6f04bbb8 // indirect github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect + github.com/chai2010/gettext-go v1.0.2 // indirect github.com/cloudflare/circl v1.3.7 // indirect github.com/coreos/go-semver v0.3.1 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/distribution/reference v0.5.0 // indirect + github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-errors/errors v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/btree v1.0.1 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-github/v53 v53.2.0 // indirect + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect + github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect + github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect + github.com/mitchellh/go-wordwrap v1.0.1 // indirect + github.com/moby/spdystream v0.2.0 // indirect + github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect + github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect + github.com/peterbourgon/diskv v2.0.1+incompatible // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sagikazarmark/locafero v0.3.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect + github.com/xlab/treeprint v1.2.0 // indirect go.etcd.io/etcd/api/v3 v3.5.10 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.10 // indirect go.etcd.io/etcd/client/v3 v3.5.10 // indirect @@ -73,11 +89,15 @@ require ( go.opentelemetry.io/otel/sdk v1.21.0 // indirect go.opentelemetry.io/otel/trace v1.23.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect + go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect golang.org/x/sync v0.6.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect + k8s.io/cli-runtime v0.28.4 // indirect k8s.io/kms v0.28.6 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.1.2 // indirect + sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 // indirect + sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 // indirect ) require ( diff --git a/go.sum b/go.sum index 0745c1013..6dac53779 100644 --- a/go.sum +++ b/go.sum @@ -78,6 +78,8 @@ github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVK github.com/alessio/shellescape v1.4.1/go.mod h1:PZAiSCk0LJaZkiCSkPv8qIobYglO3FPpyFjDCtHLS30= github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df h1:7RFfzj4SSt6nnvCPbCqijJi1nWCd+TqAT3bYCStRC18= github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA= github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -90,6 +92,8 @@ github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chai2010/gettext-go v1.0.2 h1:1Lwwip6Q2QGsAdl/ZKPCwTe9fe0CjlUbqj5bFNSjIRk= +github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -113,6 +117,8 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= +github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -146,12 +152,16 @@ github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLi github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4= github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1u0KQro= github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d h1:105gxyaGwCFad8crR9dcMQWvV9Hvulu6hwUh4tWPJnM= +github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZMPRZwes7CROmyNKgQzC3XPs6L/G2EJLHddWejkmf4= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= +github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -260,6 +270,8 @@ github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= github.com/google/safetext v0.0.0-20220905092116-b49f7bc46da2 h1:SJ+NtwL6QaZ21U+IrK7d0gGgpjGGvd2kz+FzTHVzdqI= github.com/google/safetext v0.0.0-20220905092116-b49f7bc46da2/go.mod h1:Tv1PlzqC9t8wNnpPdctvtSUOPUUg4SHeE6vR1Ir2hmg= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -273,6 +285,8 @@ github.com/googleapis/gax-go/v2 v2.12.2/go.mod h1:61M8vcyyXR2kqKFxKrfA22jaA8JGF7 github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 h1:pdN6V1QBWetyv/0+wjACpqVH+eVULgEjkurDLq3goeM= +github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= @@ -315,6 +329,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0= +github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= @@ -327,11 +343,15 @@ github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQth github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= +github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= +github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= +github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -339,6 +359,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0= +github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= @@ -357,6 +379,8 @@ github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3v github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4= github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= +github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= +github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -376,11 +400,14 @@ github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3c github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sagikazarmark/locafero v0.3.0 h1:zT7VEGWC2DTflmccN/5T1etyKvxSxpHsjb9cJvm4SvQ= github.com/sagikazarmark/locafero v0.3.0/go.mod h1:w+v7UsPNFwzF1cHuOajOOzoq4U7v/ig1mpRjqV+Bu1U= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= +github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= +github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8= github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= @@ -408,6 +435,8 @@ github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= @@ -428,6 +457,8 @@ github.com/valyala/fastjson v1.6.4 h1:uAUNq9Z6ymTgGhcm0UynUAB6tlbakBrz6CQFax3BXV github.com/valyala/fastjson v1.6.4/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= +github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -475,6 +506,8 @@ go.opentelemetry.io/otel/trace v1.23.0 h1:37Ik5Ib7xfYVb4V1UtnT97T1jI+AoIYkJyPkuL go.opentelemetry.io/otel/trace v1.23.0/go.mod h1:GSGTbIClEsuZrGIzoEHqsVfxgn5UkggkflQwDScNUsk= go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= +go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY= +go.starlark.net v0.0.0-20230525235612-a134d8f9ddca/go.mod h1:jxU+3+j+71eXOW14274+SmmuW82qJzl6iZSeqEtTGds= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -628,10 +661,12 @@ golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -639,6 +674,7 @@ golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= @@ -859,6 +895,8 @@ k8s.io/apimachinery v0.28.6 h1:RsTeR4z6S07srPg6XYrwXpTJVMXsjPXn0ODakMytSW0= k8s.io/apimachinery v0.28.6/go.mod h1:QFNX/kCl/EMT2WTSz8k4WLCv2XnkOLMaL8GAVRMdpsA= k8s.io/apiserver v0.28.6 h1:SfS5v4I5UGvh0q/1rzvNwLFsK+r7YzcsixnUc0NwoEk= k8s.io/apiserver v0.28.6/go.mod h1:8n0aerS3kPm9usyB8B+an6/BZ5+Fa9fNqlASFdDDVwk= +k8s.io/cli-runtime v0.28.4 h1:IW3aqSNFXiGDllJF4KVYM90YX4cXPGxuCxCVqCD8X+Q= +k8s.io/cli-runtime v0.28.4/go.mod h1:MLGRB7LWTIYyYR3d/DOgtUC8ihsAPA3P8K8FDNIqJ0k= k8s.io/client-go v0.28.6 h1:Gge6ziyIdafRchfoBKcpaARuz7jfrK1R1azuwORIsQI= k8s.io/client-go v0.28.6/go.mod h1:+nu0Yp21Oeo/cBCsprNVXB2BfJTV51lFfe5tXl2rUL8= k8s.io/cluster-bootstrap v0.28.4 h1:4MKNy1Qd9QY7pl47rSMGIORF+tm3CUaqC1M8U9bjn4Q= @@ -871,6 +909,8 @@ k8s.io/kms v0.28.6 h1:WfpL9iSiB012zPUtPGT+OGv4yncdcvwH1ce/UYv4RjQ= k8s.io/kms v0.28.6/go.mod h1:ONhtDMHoDgKQ/QzN6WiqJlmnpE9iyMQg1pLock4zug8= k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 h1:LyMgNKD2P8Wn1iAwQU5OhxCKlKJy0sHc+PcDwFB24dQ= k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9/go.mod h1:wZK2AVp1uHCp4VamDVgBP2COHZjqD1T68Rf0CM3YjSM= +k8s.io/kubectl v0.28.4 h1:gWpUXW/T7aFne+rchYeHkyB8eVDl5UZce8G4X//kjUQ= +k8s.io/kubectl v0.28.4/go.mod h1:CKOccVx3l+3MmDbkXtIUtibq93nN2hkDR99XDCn7c/c= k8s.io/utils v0.0.0-20240102154912-e7106e64919e h1:eQ/4ljkx21sObifjzXwlPKpdGLrCfRziVtos3ofG/sQ= k8s.io/utils v0.0.0-20240102154912-e7106e64919e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= @@ -888,8 +928,12 @@ sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMm sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/kind v0.20.0 h1:f0sc3v9mQbGnjBUaqSFST1dwIuiikKVGgoTwpoP33a8= sigs.k8s.io/kind v0.20.0/go.mod h1:aBlbxg08cauDgZ612shr017/rZwqd7AS563FvpWKPVs= +sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 h1:XX3Ajgzov2RKUdc5jW3t5jwY7Bo7dcRm+tFxT+NfgY0= +sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3/go.mod h1:9n16EZKMhXBNSiUC5kSdFQJkdH3zbxS/JoO619G1VAY= +sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 h1:W6cLQc5pnqM7vh3b7HvGNfXrJ/xL6BDMS0v1V/HHg5U= +sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3/go.mod h1:JWP1Fj0VWGHyw3YUPjXSQnRnrwezrZSrApfX5S0nIag= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= \ No newline at end of file diff --git a/main.go b/main.go index 5cce987fc..ee12efb88 100644 --- a/main.go +++ b/main.go @@ -242,6 +242,12 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) error { }).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: gcpMachineConcurrency}); err != nil { return fmt.Errorf("setting up GCPMachinePool controller: %w", err) } + + if err := (&expcontrollers.GCPMachinePoolMachineReconciler{ + Client: mgr.GetClient(), + }).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: gcpMachineConcurrency}); err != nil { + return fmt.Errorf("setting up GCPMachinePoolMachine controller: %w", err) + } } return nil