Skip to content

Commit

Permalink
Use get instead of list to retrieve servers
Browse files Browse the repository at this point in the history
Under certain circumstances, when they are non-responsive cells, nova
may return a partial result instead of an error.

(this is controlled by list_records_by_skipping_down_cells parameter
that is true by default - no error will be thrown)

When it faces this issue, capo controller will try to create a new
server for the machine, resulting in deleting ports of existing one.

In order avoid this issue, use GET instead of LIST with server uuid stored
in machine spec when it is created, and store server ID in machine spec
immediately.

For that purpose server status polling that used to take place in
CreateMachine has to be moved to main reconcile loop. ReconcileBastion
also needed to be revisited to properly support requeuing.
  • Loading branch information
zioc committed Jan 12, 2024
1 parent 5440246 commit 75ffe73
Show file tree
Hide file tree
Showing 11 changed files with 349 additions and 151 deletions.
7 changes: 5 additions & 2 deletions api/v1alpha5/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
type InstanceState string

var (
// InstanceStateBuilding is the string representing an instance in a building state.
InstanceStateBuilding = InstanceState("BUILDING")
// InstanceStateBuild is the string representing an instance in a build state.
InstanceStateBuild = InstanceState("BUILD")

// InstanceStateActive is the string representing an instance in an active state.
InstanceStateActive = InstanceState("ACTIVE")
Expand All @@ -290,6 +290,9 @@ var (

// InstanceStateDeleted is the string representing an instance in a deleted state.
InstanceStateDeleted = InstanceState("DELETED")

// InstanceStateUndefined is the string representing an undefined instance state.
InstanceStateUndefined = InstanceState("")
)

// Bastion represents basic information about the bastion node.
Expand Down
7 changes: 5 additions & 2 deletions api/v1alpha6/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
type InstanceState string

var (
// InstanceStateBuilding is the string representing an instance in a building state.
InstanceStateBuilding = InstanceState("BUILDING")
// InstanceStateBuild is the string representing an instance in a build state.
InstanceStateBuild = InstanceState("BUILD")

// InstanceStateActive is the string representing an instance in an active state.
InstanceStateActive = InstanceState("ACTIVE")
Expand All @@ -302,6 +302,9 @@ var (

// InstanceStateDeleted is the string representing an instance in a deleted state.
InstanceStateDeleted = InstanceState("DELETED")

// InstanceStateUndefined is the string representing an undefined instance state.
InstanceStateUndefined = InstanceState("")
)

// Bastion represents basic information about the bastion node.
Expand Down
7 changes: 5 additions & 2 deletions api/v1alpha7/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
type InstanceState string

var (
// InstanceStateBuilding is the string representing an instance in a building state.
InstanceStateBuilding = InstanceState("BUILDING")
// InstanceStateBuild is the string representing an instance in a build state.
InstanceStateBuild = InstanceState("BUILD")

// InstanceStateActive is the string representing an instance in an active state.
InstanceStateActive = InstanceState("ACTIVE")
Expand All @@ -332,6 +332,9 @@ var (

// InstanceStateDeleted is the string representing an instance in a deleted state.
InstanceStateDeleted = InstanceState("DELETED")

// InstanceStateUndefined is the string representing an undefined instance state.
InstanceStateUndefined = InstanceState("")
)

// Bastion represents basic information about the bastion node.
Expand Down
7 changes: 5 additions & 2 deletions api/v1alpha8/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
type InstanceState string

var (
// InstanceStateBuilding is the string representing an instance in a building state.
InstanceStateBuilding = InstanceState("BUILDING")
// InstanceStateBuild is the string representing an instance in a build state.
InstanceStateBuild = InstanceState("BUILD")

// InstanceStateActive is the string representing an instance in an active state.
InstanceStateActive = InstanceState("ACTIVE")
Expand All @@ -338,6 +338,9 @@ var (

// InstanceStateDeleted is the string representing an instance in a deleted state.
InstanceStateDeleted = InstanceState("DELETED")

// InstanceStateUndefined is the string representing an undefined instance state.
InstanceStateUndefined = InstanceState("")
)

// Bastion represents basic information about the bastion node.
Expand Down
144 changes: 94 additions & 50 deletions controllers/openstackcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,24 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust
return err
}

instanceName := fmt.Sprintf("%s-bastion", cluster.Name)
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, instanceName)
if err != nil {
return err
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.FloatingIP != "" {
if err = networkingService.DeleteFloatingIP(openStackCluster, openStackCluster.Status.Bastion.FloatingIP); err != nil {
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err))
return fmt.Errorf("failed to delete floating IP: %w", err)
}
}

var instanceStatus *compute.InstanceStatus
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" {
instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID)
if err != nil {
return err
}
} else {
instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, bastionName(cluster.Name))
if err != nil {
return err
}
}

if instanceStatus != nil {
Expand All @@ -230,6 +244,7 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust

for _, address := range addresses {
if address.Type == corev1.NodeExternalIP {
// Floating IP may not have properly saved in bastion status (thus not deleted above), delete any remaining floating IP
if err = networkingService.DeleteFloatingIP(openStackCluster, address.Address); err != nil {
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err))
return fmt.Errorf("failed to delete floating IP: %w", err)
Expand Down Expand Up @@ -277,8 +292,9 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
return reconcile.Result{}, err
}

if err = reconcileBastion(scope, cluster, openStackCluster); err != nil {
return reconcile.Result{}, err
result, err := reconcileBastion(scope, cluster, openStackCluster)
if err != nil || !reflect.DeepEqual(result, reconcile.Result{}) {
return result, err
}

availabilityZones, err := computeService.GetAvailabilityZones()
Expand Down Expand Up @@ -308,88 +324,112 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
return reconcile.Result{}, nil
}

func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) error {
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) (ctrl.Result, error) {
scope.Logger().Info("Reconciling Bastion")

if openStackCluster.Spec.Bastion == nil || !openStackCluster.Spec.Bastion.Enabled {
return deleteBastion(scope, cluster, openStackCluster)
return reconcile.Result{}, deleteBastion(scope, cluster, openStackCluster)
}

computeService, err := compute.NewService(scope)
if err != nil {
return err
return reconcile.Result{}, err
}

instanceSpec := bastionToInstanceSpec(openStackCluster, cluster.Name)
bastionHash, err := compute.HashInstanceSpec(instanceSpec)
if err != nil {
return fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
return reconcile.Result{}, fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
}
if bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) {
if err := deleteBastion(scope, cluster, openStackCluster); err != nil {
return ctrl.Result{}, err
}
}

instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, fmt.Sprintf("%s-bastion", cluster.Name))
if err != nil {
return err
var instanceStatus *compute.InstanceStatus
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" {
if instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID); err != nil {
return reconcile.Result{}, err
}
}
if instanceStatus != nil {
if !bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) {
bastion, err := instanceStatus.BastionStatus(openStackCluster)
if err != nil {
return err
}
// Add the current hash if no annotation is set.
if _, ok := openStackCluster.ObjectMeta.Annotations[BastionInstanceHashAnnotation]; !ok {
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
}
openStackCluster.Status.Bastion = bastion
return nil
if instanceStatus == nil {
// Check if there is an existing instance with bastion name, in case where bastion ID would not have been properly stored in cluster status
if instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, instanceSpec.Name); err != nil {
return reconcile.Result{}, err
}

if err := deleteBastion(scope, cluster, openStackCluster); err != nil {
return err
}
if instanceStatus == nil {
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to create bastion: %w", err)
}
}

instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
if err != nil {
return fmt.Errorf("failed to reconcile bastion: %w", err)
// Save hash & status as soon as we know we have an instance
instanceStatus.UpdateBastionStatus(openStackCluster)
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})

// Make sure that bastion instance has a valid state
switch instanceStatus.State() {
case infrav1.InstanceStateError:
return ctrl.Result{}, fmt.Errorf("failed to reconcile bastion, instance state is ERROR")
case infrav1.InstanceStateBuild, infrav1.InstanceStateUndefined:
scope.Logger().Info("Waiting for bastion instance to become ACTIVE", "id", instanceStatus.ID(), "status", instanceStatus.State())
return ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}, nil
case infrav1.InstanceStateDeleted:
// This should normally be handled by deleteBastion
openStackCluster.Status.Bastion = nil
return ctrl.Result{}, nil
}

networkingService, err := networking.NewService(scope)
if err != nil {
return err
}
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name)
fp, err := networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, openStackCluster.Spec.Bastion.Instance.FloatingIP)
if err != nil {
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
return fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
return ctrl.Result{}, err
}
port, err := computeService.GetManagementPort(openStackCluster, instanceStatus)
if err != nil {
err = fmt.Errorf("getting management port for bastion: %w", err)
handleUpdateOSCError(openStackCluster, err)
return err
return ctrl.Result{}, err
}
err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID)
fp, err := networkingService.GetFloatingIPByPortID(port.ID)
if err != nil {
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err))
return fmt.Errorf("failed to associate floating IP with bastion: %w", err)
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
return ctrl.Result{}, fmt.Errorf("failed to get floating IP for bastion port: %w", err)
}
if fp != nil {
// Floating IP is already attached to bastion, no need to proceed
openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP
return ctrl.Result{}, nil
}

bastion, err := instanceStatus.BastionStatus(openStackCluster)
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name)
floatingIP := openStackCluster.Spec.Bastion.Instance.FloatingIP
if openStackCluster.Status.Bastion.FloatingIP != "" {
// Some floating IP has already been created for this bastion, make sure we re-use it
floatingIP = openStackCluster.Status.Bastion.FloatingIP
}
// Check if there is an existing floating IP attached to bastion, in case where FloatingIP would not yet have been stored in cluster status
fp, err = networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, floatingIP)
if err != nil {
return err
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
return ctrl.Result{}, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
}
bastion.FloatingIP = fp.FloatingIP
openStackCluster.Status.Bastion = bastion
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
return nil
openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP

err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID)
if err != nil {
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err))
return ctrl.Result{}, fmt.Errorf("failed to associate floating IP with bastion: %w", err)
}

return ctrl.Result{}, nil
}

func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterName string) *compute.InstanceSpec {
name := fmt.Sprintf("%s-bastion", clusterName)
instanceSpec := &compute.InstanceSpec{
Name: name,
Name: bastionName(clusterName),
Flavor: openStackCluster.Spec.Bastion.Instance.Flavor,
SSHKeyName: openStackCluster.Spec.Bastion.Instance.SSHKeyName,
Image: openStackCluster.Spec.Bastion.Instance.Image,
Expand All @@ -412,6 +452,10 @@ func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterNa
return instanceSpec
}

func bastionName(clusterName string) string {
return fmt.Sprintf("%s-bastion", clusterName)
}

// bastionHashHasChanged returns a boolean whether if the latest bastion hash, built from the instance spec, has changed or not.
func bastionHashHasChanged(computeHash string, clusterAnnotations map[string]string) bool {
latestHash, ok := clusterAnnotations[BastionInstanceHashAnnotation]
Expand Down
Loading

0 comments on commit 75ffe73

Please sign in to comment.