Skip to content

Commit

Permalink
Fix govmomi actuator's Create when using clusterctl (kubernetes-sigs#77)
Browse files Browse the repository at this point in the history
Clusterctl create cluster was not able to properly create the target cluster.  It would sit
in an infinite loop, attempting to clone the VM template.  The fix was to not use annotations
for task ref and vm ref during create.  Instead, use provider status.  Once this fix was in,
the Target cluster would then sit in an infinite loop, attempting to clone the VM template.
The fix was to check for nodeRef in the actuator's Exists() function.

In addition, moved utils from provisioner/common to provisioner/govmomi as it's only ever
used by the govmomi actuator.

Fixes kubernetes-sigs#70
  • Loading branch information
Loc Nguyen authored and k8s-ci-robot committed Oct 16, 2018
1 parent 8704ad1 commit 2976b2f
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 83 deletions.
3 changes: 3 additions & 0 deletions cloud/vsphere/clusteractuator.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,10 @@ func (vc *ClusterActuator) updateK8sAPIStatus(cluster *clusterv1.Cluster) error
// In case the secret does not exist, then it fetches from the target master node and caches it for
func (vc *ClusterActuator) fetchKubeConfig(cluster *clusterv1.Cluster, masters []*clusterv1.Machine) (string, error) {
var kubeconfig string
glog.Infof("attempting to fetch kubeconfig")
secret, err := vc.k8sClient.Core().Secrets(cluster.Namespace).Get(fmt.Sprintf(constants.KubeConfigSecretName, cluster.UID), metav1.GetOptions{})
if err != nil {
glog.Info("could not pull secrets for kubeconfig")
// TODO: Check for the proper err type for *not present* case. rather than all other cases
// Fetch the kubeconfig and create the secret saving it
// Currently we support only a single master thus the below assumption
Expand All @@ -118,6 +120,7 @@ func (vc *ClusterActuator) fetchKubeConfig(cluster *clusterv1.Cluster, masters [
glog.Warningf("Could not create the secret for the saving kubeconfig: err [%s]", err.Error())
}
} else {
glog.Info("found kubeconfig in secrets")
kubeconfig = string(secret.Data[constants.KubeConfigSecretData])
}
return kubeconfig, nil
Expand Down
12 changes: 0 additions & 12 deletions cloud/vsphere/machineactuator.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,6 @@ func NewTerraformMachineActuator(clusterV1alpha1 clusterv1alpha1.ClusterV1alpha1
}

func (vc *VsphereClient) Create(cluster *clusterv1.Cluster, machine *clusterv1.Machine) error {
//creator := nativeprovisioner.NewCreator()
//creator.Create(cluster, machine)

if vc.provisioner != nil {
err := vc.provisioner.Create(cluster, machine)
if err != nil {
Expand All @@ -91,9 +88,6 @@ func (vc *VsphereClient) Create(cluster *clusterv1.Cluster, machine *clusterv1.M
}

func (vc *VsphereClient) Delete(cluster *clusterv1.Cluster, machine *clusterv1.Machine) error {
//deleter := nativeprovisioner.NewDeleter()
//deleter.Delete(cluster, machine)

if vc.provisioner != nil {
return vc.provisioner.Delete(cluster, machine)
}
Expand All @@ -102,9 +96,6 @@ func (vc *VsphereClient) Delete(cluster *clusterv1.Cluster, machine *clusterv1.M
}

func (vc *VsphereClient) Update(cluster *clusterv1.Cluster, goalMachine *clusterv1.Machine) error {
//updater := nativeprovisioner.NewUpdater()
//updater.Update(cluster, goalMachine)

if vc.provisioner != nil {
return vc.provisioner.Update(cluster, goalMachine)
}
Expand All @@ -113,9 +104,6 @@ func (vc *VsphereClient) Update(cluster *clusterv1.Cluster, goalMachine *cluster
}

func (vc *VsphereClient) Exists(cluster *clusterv1.Cluster, machine *clusterv1.Machine) (bool, error) {
//validator := nativeprovisioner.NewValidator()
//validator.Exists(cluster, goalMachine)

if vc.provisioner != nil {
return vc.provisioner.Exists(cluster, machine)
}
Expand Down
97 changes: 66 additions & 31 deletions cloud/vsphere/provisioner/govmomi/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
)

func (vc *Provisioner) Create(cluster *clusterv1.Cluster, machine *clusterv1.Machine) error {
glog.Info("Inside Govmomi provisioner create method")
glog.Infof("govmomi.Actuator.Create %s", machine.Spec.Name)
s, err := vc.sessionFromProviderConfig(cluster, machine)
if err != nil {
return err
Expand Down Expand Up @@ -60,7 +60,7 @@ func (vc *Provisioner) verifyAndUpdateTask(s *SessionContext, machine *clusterv1
if err != nil {
//TODO: inspect the error and act appropriately.
// Naive assumption is that the task does not exist any more, thus clear that from the machine
return vc.removeTaskRef(machine)
return vc.setTaskRef(machine, "")
}
switch taskmo.Info.State {
// Queued or Running
Expand All @@ -73,16 +73,20 @@ func (vc *Provisioner) verifyAndUpdateTask(s *SessionContext, machine *clusterv1
vmref := taskmo.Info.Result.(types.ManagedObjectReference)
vc.eventRecorder.Eventf(machine, corev1.EventTypeNormal, "Created", "Created Machine %s(%s)", machine.Name, vmref.Value)
// Update the Machine object with the VM Reference annotation
return vc.updateVMReferenceAnnotations(machine, vmref, true)
err := vc.updateVMReference(machine, vmref.Value)
if err != nil {
return err
}
return vc.setTaskRef(machine, "")
} else if taskmo.Info.DescriptionId == "VirtualMachine.reconfigure" {
vc.eventRecorder.Eventf(machine, corev1.EventTypeNormal, "Reconfigured", "Reconfigured Machine %s", taskmo.Info.EntityName)
}
return vc.removeTaskRef(machine)
return vc.setTaskRef(machine, "")
case types.TaskInfoStateError:
if taskmo.Info.DescriptionId == "VirtualMachine.clone" {
vc.eventRecorder.Eventf(machine, corev1.EventTypeNormal, "Failed", "Creation failed for Machine %v", machine.Name)
// Clear the reference to the failed task so that the next reconcile loop can re-create it
return vc.removeTaskRef(machine)
return vc.setTaskRef(machine, "")
}
default:
glog.Warningf("unknown state %s for task %s detected", taskmoref, taskmo.Info.State)
Expand Down Expand Up @@ -189,7 +193,7 @@ func (vc *Provisioner) cloneVirtualMachine(s *SessionContext, cluster *clusterv1
prop.Info.Value = userData
}
if p.Id == "public-keys" {
prop.Info.Value, err = vc.utils.GetSSHPublicKey(cluster)
prop.Info.Value, err = vc.GetSSHPublicKey(cluster)
if err != nil {
return err
}
Expand Down Expand Up @@ -236,7 +240,7 @@ func (vc *Provisioner) cloneVirtualMachine(s *SessionContext, cluster *clusterv1
if err != nil {
return err
}
return vc.setTaskRef(machine, task.Reference())
return vc.setTaskRef(machine, task.Reference().Value)

}

Expand Down Expand Up @@ -264,41 +268,72 @@ func (vc *Provisioner) removeTaskRef(machine *clusterv1.Machine) error {
return err
}

// Updates the VM Reference on the Machine object to the VM in the vsphere infrastructure
// This method is ideally to be called only during inital creation of the VM
func (vc *Provisioner) updateVMReferenceAnnotations(machine *clusterv1.Machine, vmref types.ManagedObjectReference, cleartask bool) error {
nmachine := machine.DeepCopy()
if nmachine.ObjectMeta.Annotations == nil {
nmachine.ObjectMeta.Annotations = make(map[string]string)
func (vc *Provisioner) updateVMReference(machine *clusterv1.Machine, vmref string) error {
oldProviderStatus, err := vsphereutils.GetMachineProviderStatus(machine)
if err != nil {
return err
}
nmachine.ObjectMeta.Annotations[constants.VirtualMachineRef] = vmref.Value
if cleartask {
// Clear any reference to active task
delete(nmachine.ObjectMeta.Annotations, constants.VirtualMachineTaskRef)

if oldProviderStatus != nil && oldProviderStatus.MachineRef == vmref {
// Nothing to update
return nil
}
_, err := vc.clusterV1alpha1.Machines(nmachine.Namespace).Update(nmachine)
return err
newProviderStatus := &vsphereconfig.VsphereMachineProviderStatus{}
// create a copy of the old status so that any other fields except the ones we want to change can be retained
if oldProviderStatus != nil {
newProviderStatus = oldProviderStatus.DeepCopy()
}
newProviderStatus.MachineRef = vmref
newProviderStatus.LastUpdated = time.Now().UTC().String()
out, err := json.Marshal(newProviderStatus)
newMachine := machine.DeepCopy()
newMachine.Status.ProviderStatus = &runtime.RawExtension{Raw: out}
_, err = vc.clusterV1alpha1.Machines(newMachine.Namespace).UpdateStatus(newMachine)
if err != nil {
glog.Infof("Error in updating the machine ref: %s", err)
return err
}
return nil
}

// Sets the current task reference on the Machine object
func (vc *Provisioner) setTaskRef(machine *clusterv1.Machine, taskref types.ManagedObjectReference) error {
nmachine := machine.DeepCopy()
if nmachine.ObjectMeta.Annotations == nil {
nmachine.ObjectMeta.Annotations = make(map[string]string)
func (vc *Provisioner) setTaskRef(machine *clusterv1.Machine, taskref string) error {
oldProviderStatus, err := vsphereutils.GetMachineProviderStatus(machine)
if err != nil {
return err
}
nmachine.ObjectMeta.Annotations[constants.VirtualMachineTaskRef] = taskref.Value
_, err := vc.clusterV1alpha1.Machines(nmachine.Namespace).Update(nmachine)
return err

if oldProviderStatus != nil && oldProviderStatus.TaskRef == taskref {
// Nothing to update
return nil
}
newProviderStatus := &vsphereconfig.VsphereMachineProviderStatus{}
// create a copy of the old status so that any other fields except the ones we want to change can be retained
if oldProviderStatus != nil {
newProviderStatus = oldProviderStatus.DeepCopy()
}
newProviderStatus.TaskRef = taskref
newProviderStatus.LastUpdated = time.Now().UTC().String()
out, err := json.Marshal(newProviderStatus)
newMachine := machine.DeepCopy()
newMachine.Status.ProviderStatus = &runtime.RawExtension{Raw: out}
_, err = vc.clusterV1alpha1.Machines(newMachine.Namespace).UpdateStatus(newMachine)
if err != nil {
glog.Infof("Error in updating the machine ref: %s", err)
return err
}
return nil
}

// We are storing these as annotations and not in Machine Status because that's intended for
// "Provider-specific status" that will usually be used to detect updates. Additionally,
// Status requires yet another version API resource which is too heavy to store IP and TF state.
func (vc *Provisioner) updateAnnotations(cluster *clusterv1.Cluster, machine *clusterv1.Machine, vmIP string, vm *object.VirtualMachine) error {
glog.Infof("Updating annotations for machine %s", machine.ObjectMeta.Name)
nmachine := machine.DeepCopy()
if nmachine.ObjectMeta.Annotations == nil {
nmachine.ObjectMeta.Annotations = make(map[string]string)
}
glog.Infof("updateAnnotations - IP = %s", vmIP)
nmachine.ObjectMeta.Annotations[constants.VmIpAnnotationKey] = vmIP
nmachine.ObjectMeta.Annotations[constants.ControlPlaneVersionAnnotationKey] = nmachine.Spec.Versions.ControlPlane
nmachine.ObjectMeta.Annotations[constants.KubeletVersionAnnotationKey] = nmachine.Spec.Versions.Kubelet
Expand Down Expand Up @@ -381,21 +416,21 @@ func (vc *Provisioner) getCloudProviderConfig(cluster *clusterv1.Cluster, machin
func (vc *Provisioner) getStartupScript(cluster *clusterv1.Cluster, machine *clusterv1.Machine) (string, error) {
config, err := vsphereutils.GetMachineProviderConfig(machine.Spec.ProviderConfig)
if err != nil {
return "", vc.utils.HandleMachineError(machine, apierrors.InvalidMachineConfiguration(
return "", vc.HandleMachineError(machine, apierrors.InvalidMachineConfiguration(
"Cannot unmarshal providerConfig field: %v", err), constants.CreateEventAction)
}
preloaded := false
if val, ok := config.MachineVariables["preloaded"]; ok {
preloaded, err = strconv.ParseBool(val)
if err != nil {
return "", vc.utils.HandleMachineError(machine, apierrors.InvalidMachineConfiguration(
return "", vc.HandleMachineError(machine, apierrors.InvalidMachineConfiguration(
"Invalid value for preloaded: %v", err), constants.CreateEventAction)
}
}
var startupScript string
if util.IsMaster(machine) {
if machine.Spec.Versions.ControlPlane == "" {
return "", vc.utils.HandleMachineError(machine, apierrors.InvalidMachineConfiguration(
return "", vc.HandleMachineError(machine, apierrors.InvalidMachineConfiguration(
"invalid master configuration: missing Machine.Spec.Versions.ControlPlane"), constants.CreateEventAction)
}
var err error
Expand All @@ -414,7 +449,7 @@ func (vc *Provisioner) getStartupScript(cluster *clusterv1.Cluster, machine *clu
glog.Infof("invalid cluster state: cannot create a Kubernetes node without an API endpoint")
return "", &clustererror.RequeueAfterError{RequeueAfter: constants.RequeueAfterSeconds}
}
kubeadmToken, err := vc.utils.GetKubeadmToken(cluster)
kubeadmToken, err := vc.GetKubeadmToken(cluster)
if err != nil {
glog.Infof("Error generating kubeadm token, will requeue: %s", err.Error())
return "", &clustererror.RequeueAfterError{RequeueAfter: constants.RequeueAfterSeconds}
Expand Down
6 changes: 6 additions & 0 deletions cloud/vsphere/provisioner/govmomi/exists.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,19 @@ package govmomi
import (
"context"

"github.com/golang/glog"
"github.com/vmware/govmomi/vim25/mo"
"github.com/vmware/govmomi/vim25/types"
vsphereutils "sigs.k8s.io/cluster-api-provider-vsphere/cloud/vsphere/utils"
clusterv1 "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1"
)

func (vc *Provisioner) Exists(cluster *clusterv1.Cluster, machine *clusterv1.Machine) (bool, error) {
glog.Infof("govmomi.Actuator.Exists %s", machine.Spec.Name)
if machine.Status.NodeRef != nil {
glog.Infof("govmomi.Actuator.Exists() - running on target cluster, returning exist")
return true, nil
}
s, err := vc.sessionFromProviderConfig(cluster, machine)
if err != nil {
return false, err
Expand Down
5 changes: 2 additions & 3 deletions cloud/vsphere/provisioner/govmomi/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package govmomi
import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/record"
vpshereprovisionercommon "sigs.k8s.io/cluster-api-provider-vsphere/cloud/vsphere/provisioner/common"
clusterv1alpha1 "sigs.k8s.io/cluster-api/pkg/client/clientset_generated/clientset/typed/cluster/v1alpha1"
"sigs.k8s.io/cluster-api/pkg/client/informers_generated/externalversions/cluster/v1alpha1"
)
Expand All @@ -13,7 +12,7 @@ type Provisioner struct {
lister v1alpha1.Interface
eventRecorder record.EventRecorder
sessioncache map[string]interface{}
utils *vpshereprovisionercommon.ProvisionerUtil
k8sClient kubernetes.Interface
}

func New(clusterV1alpha1 clusterv1alpha1.ClusterV1alpha1Interface, k8sClient kubernetes.Interface, lister v1alpha1.Interface, eventRecorder record.EventRecorder) (*Provisioner, error) {
Expand All @@ -22,6 +21,6 @@ func New(clusterV1alpha1 clusterv1alpha1.ClusterV1alpha1Interface, k8sClient kub
lister: lister,
eventRecorder: eventRecorder,
sessioncache: make(map[string]interface{}),
utils: vpshereprovisionercommon.New(clusterV1alpha1, k8sClient, lister, eventRecorder),
k8sClient: k8sClient,
}, nil
}
6 changes: 5 additions & 1 deletion cloud/vsphere/provisioner/govmomi/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ func (vc *Provisioner) Update(cluster *clusterv1.Cluster, machine *clusterv1.Mac
// Fetch any active task in vsphere if any
// If an active task is there,

glog.Infof("govmomi.Actuator.Update %s", machine.Spec.Name)

s, err := vc.sessionFromProviderConfig(cluster, machine)
if err != nil {
return err
Expand All @@ -48,12 +50,13 @@ func (vc *Provisioner) Update(cluster *clusterv1.Cluster, machine *clusterv1.Mac
}

if _, err := vsphereutils.GetIP(cluster, machine); err != nil {
glog.Info("actuator.Update() - did not find IP, waiting on IP")
vm := object.NewVirtualMachine(s.session.Client, vmref)
vmIP, err := vm.WaitForIP(ctx)
if err != nil {
return err
}
vc.eventRecorder.Eventf(machine, corev1.EventTypeNormal, "IP Detcted", "IP %s detected for Virtual Machine %s", vmIP, vm.Name)
vc.eventRecorder.Eventf(machine, corev1.EventTypeNormal, "IP Detected", "IP %s detected for Virtual Machine %s", vmIP, vm.Name)
return vc.updateIP(cluster, machine, vmIP)
}
return nil
Expand All @@ -65,6 +68,7 @@ func (vc *Provisioner) updateIP(cluster *clusterv1.Cluster, machine *clusterv1.M
if nmachine.ObjectMeta.Annotations == nil {
nmachine.ObjectMeta.Annotations = make(map[string]string)
}
glog.Infof("updateIP - IP = %s", vmIP)
nmachine.ObjectMeta.Annotations[constants.VmIpAnnotationKey] = vmIP
_, err := vc.clusterV1alpha1.Machines(nmachine.Namespace).Update(nmachine)
if err != nil {
Expand Down
Loading

0 comments on commit 2976b2f

Please sign in to comment.