Skip to content

Commit

Permalink
Merge pull request #236 from arangodb/bugfix/safeguard-current-image
Browse files Browse the repository at this point in the history
Use new CurrentImage field to prevent unintended upgrades.
  • Loading branch information
ewoutp authored Aug 9, 2018
2 parents d4c6a11 + e4736f1 commit 0165f98
Show file tree
Hide file tree
Showing 10 changed files with 237 additions and 41 deletions.
2 changes: 2 additions & 0 deletions pkg/apis/deployment/v1alpha/deployment_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ type DeploymentStatus struct {

// Images holds a list of ArangoDB images with their ID and ArangoDB version.
Images ImageInfoList `json:"arangodb-images,omitempty"`
// Image that is currently being used when new pods are created
CurrentImage *ImageInfo `json:"current-image,omitempty"`

// Members holds the status for all members in all server groups
Members DeploymentStatusMembers `json:"members"`
Expand Down
11 changes: 11 additions & 0 deletions pkg/apis/deployment/v1alpha/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ const (
ActionTypeRenewTLSCertificate ActionType = "RenewTLSCertificate"
// ActionTypeRenewTLSCACertificate causes the TLS CA certificate of the entire deployment to be renewed.
ActionTypeRenewTLSCACertificate ActionType = "RenewTLSCACertificate"
// ActionTypeSetCurrentImage causes status.CurrentImage to be updated to the image given in the action.
ActionTypeSetCurrentImage ActionType = "SetCurrentImage"
)

const (
Expand All @@ -73,6 +75,8 @@ type Action struct {
StartTime *metav1.Time `json:"startTime,omitempty"`
// Reason for this action
Reason string `json:"reason,omitempty"`
// Image used in can of a SetCurrentImage action.
Image string `json:"image,omitempty"`
}

// NewAction instantiates a new Action.
Expand All @@ -90,6 +94,13 @@ func NewAction(actionType ActionType, group ServerGroup, memberID string, reason
return a
}

// SetImage sets the Image field to the given value and returns the modified
// action.
func (a Action) SetImage(image string) Action {
a.Image = image
return a
}

// Plan is a list of actions that will be taken to update a deployment.
// Only 1 action is in progress at a time. The operator will wait for that
// action to be completely and then remove the action.
Expand Down
9 changes: 9 additions & 0 deletions pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,15 @@ func (in *DeploymentStatus) DeepCopyInto(out *DeploymentStatus) {
*out = make(ImageInfoList, len(*in))
copy(*out, *in)
}
if in.CurrentImage != nil {
in, out := &in.CurrentImage, &out.CurrentImage
if *in == nil {
*out = nil
} else {
*out = new(ImageInfo)
**out = **in
}
}
in.Members.DeepCopyInto(&out.Members)
if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions
Expand Down
24 changes: 24 additions & 0 deletions pkg/deployment/reconcile/action_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ type ActionContext interface {
DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error
// DeleteTLSCASecret removes the Secret containing the TLS CA certificate.
DeleteTLSCASecret() error
// GetImageInfo returns the image info for an image with given name.
// Returns: (info, infoFound)
GetImageInfo(imageName string) (api.ImageInfo, bool)
// SetCurrentImage changes the CurrentImage field in the deployment
// status to the given image.
SetCurrentImage(imageInfo api.ImageInfo) error
}

// newActionContext creates a new ActionContext implementation.
Expand Down Expand Up @@ -260,3 +266,21 @@ func (ac *actionContext) DeleteTLSCASecret() error {
}
return nil
}

// GetImageInfo returns the image info for an image with given name.
// Returns: (info, infoFound)
func (ac *actionContext) GetImageInfo(imageName string) (api.ImageInfo, bool) {
status, _ := ac.context.GetStatus()
return status.Images.GetByImage(imageName)
}

// SetCurrentImage changes the CurrentImage field in the deployment
// status to the given image.
func (ac *actionContext) SetCurrentImage(imageInfo api.ImageInfo) error {
status, lastVersion := ac.context.GetStatus()
status.CurrentImage = &imageInfo
if err := ac.context.UpdateStatus(status, lastVersion); err != nil {
return maskAny(err)
}
return nil
}
85 changes: 85 additions & 0 deletions pkg/deployment/reconcile/action_upgrade_current_image.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//

package reconcile

import (
"context"
"time"

api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
"github.com/rs/zerolog"
)

// NewSetCurrentImageAction creates a new Action that implements the given
// planned SetCurrentImage action.
func NewSetCurrentImageAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action {
return &setCurrentImageAction{
log: log,
action: action,
actionCtx: actionCtx,
}
}

// setCurrentImageAction implements an SetCurrentImage.
type setCurrentImageAction struct {
log zerolog.Logger
action api.Action
actionCtx ActionContext
}

// Start performs the start of the action.
// Returns true if the action is completely finished, false in case
// the start time needs to be recorded and a ready condition needs to be checked.
func (a *setCurrentImageAction) Start(ctx context.Context) (bool, error) {
ready, _, err := a.CheckProgress(ctx)
if err != nil {
return false, maskAny(err)
}
return ready, nil
}

// CheckProgress checks the progress of the action.
// Returns true if the action is completely finished, false otherwise.
func (a *setCurrentImageAction) CheckProgress(ctx context.Context) (bool, bool, error) {
log := a.log

imageInfo, found := a.actionCtx.GetImageInfo(a.action.Image)
if !found {
return false, false, nil
}
if err := a.actionCtx.SetCurrentImage(imageInfo); err != nil {
return false, false, maskAny(err)
}
log.Info().Str("image", a.action.Image).Msg("Changed current image")
return true, false, nil
}

// Timeout returns the amount of time after which this action will timeout.
func (a *setCurrentImageAction) Timeout() time.Duration {
return upgradeMemberTimeout
}

// Return the MemberID used / created in this action
func (a *setCurrentImageAction) MemberID() string {
return ""
}
90 changes: 65 additions & 25 deletions pkg/deployment/reconcile/plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ package reconcile
import (
"strings"

driver "github.com/arangodb/go-driver"
upgraderules "github.com/arangodb/go-upgrade-rules"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
Expand All @@ -37,6 +38,8 @@ import (

// upgradeDecision is the result of an upgrade check.
type upgradeDecision struct {
FromVersion driver.Version
ToVersion driver.Version
UpgradeNeeded bool // If set, the image version has changed
UpgradeAllowed bool // If set, it is an allowed version change
AutoUpgradeNeeded bool // If set, the database must be started with `--database.auto-upgrade` once
Expand Down Expand Up @@ -149,33 +152,56 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject,
}
return nil
}
status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error {
for _, m := range members {
if len(plan) > 0 {
// Only 1 change at a time
continue
}
if m.Phase != api.MemberPhaseCreated {
// Only rotate when phase is created
continue
}
if podName := m.PodName; podName != "" {
if p := getPod(podName); p != nil {
// Got pod, compare it with what it should be
decision := podNeedsUpgrading(*p, spec, status.Images)
if decision.UpgradeNeeded && decision.UpgradeAllowed {
plan = append(plan, createUpgradeMemberPlan(log, m, group, "Version upgrade")...)
} else {
rotNeeded, reason := podNeedsRotation(log, *p, apiObject, spec, group, status.Members.Agents, m.ID, context)
if rotNeeded {
plan = append(plan, createRotateMemberPlan(log, m, group, reason)...)
// createRotateOrUpgradePlan goes over all pods to check if an upgrade or rotate
// is needed. If an upgrade is needed but not allowed, the second return value
// will be true.
// Returns: (newPlan, upgradeNotAllowed)
createRotateOrUpgradePlan := func() (api.Plan, bool, driver.Version, driver.Version) {
var newPlan api.Plan
upgradeNotAllowed := false
var fromVersion, toVersion driver.Version
status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error {
for _, m := range members {
if m.Phase != api.MemberPhaseCreated {
// Only rotate when phase is created
continue
}
if podName := m.PodName; podName != "" {
if p := getPod(podName); p != nil {
// Got pod, compare it with what it should be
decision := podNeedsUpgrading(*p, spec, status.Images)
if decision.UpgradeNeeded && !decision.UpgradeAllowed {
// Oops, upgrade is not allowed
upgradeNotAllowed = true
fromVersion = decision.FromVersion
toVersion = decision.ToVersion
return nil
} else if len(newPlan) == 0 {
// Only rotate/upgrade 1 pod at a time
if decision.UpgradeNeeded {
// Yes, upgrade is needed (and allowed)
newPlan = createUpgradeMemberPlan(log, m, group, "Version upgrade", spec.GetImage(), status)
} else {
// Upgrade is not needed, see if rotation is needed
if rotNeeded, reason := podNeedsRotation(log, *p, apiObject, spec, group, status.Members.Agents, m.ID, context); rotNeeded {
newPlan = createRotateMemberPlan(log, m, group, reason)
}
}
}
}
}
}
}
return nil
})
return nil
})
return newPlan, upgradeNotAllowed, fromVersion, toVersion
}
if newPlan, upgradeNotAllowed, fromVersion, toVersion := createRotateOrUpgradePlan(); upgradeNotAllowed {
// Upgrade is needed, but not allowed
context.CreateEvent(k8sutil.NewUpgradeNotAllowedEvent(apiObject, fromVersion, toVersion))
} else {
// Use the new plan
plan = newPlan
}
}

// Check for the need to rotate TLS certificate of a members
Expand Down Expand Up @@ -218,18 +244,27 @@ func podNeedsUpgrading(p v1.Pod, spec api.DeploymentSpec, images api.ImageInfoLi
podVersion := podImageInfo.ArangoDBVersion
if err := upgraderules.CheckUpgradeRules(podVersion, specVersion); err != nil {
// E.g. 3.x -> 4.x, we cannot allow automatically
return upgradeDecision{UpgradeNeeded: true, UpgradeAllowed: false}
return upgradeDecision{
FromVersion: podVersion,
ToVersion: specVersion,
UpgradeNeeded: true,
UpgradeAllowed: false,
}
}
if specVersion.Major() != podVersion.Major() || specVersion.Minor() != podVersion.Minor() {
// Is allowed, with `--database.auto-upgrade`
return upgradeDecision{
FromVersion: podVersion,
ToVersion: specVersion,
UpgradeNeeded: true,
UpgradeAllowed: true,
AutoUpgradeNeeded: true,
}
}
// Patch version change, rotate only
return upgradeDecision{
FromVersion: podVersion,
ToVersion: specVersion,
UpgradeNeeded: true,
UpgradeAllowed: true,
AutoUpgradeNeeded: false,
Expand Down Expand Up @@ -343,7 +378,7 @@ func createRotateMemberPlan(log zerolog.Logger, member api.MemberStatus,
// createUpgradeMemberPlan creates a plan to upgrade (stop-recreateWithAutoUpgrade-stop-start) an existing
// member.
func createUpgradeMemberPlan(log zerolog.Logger, member api.MemberStatus,
group api.ServerGroup, reason string) api.Plan {
group api.ServerGroup, reason string, imageName string, status api.DeploymentStatus) api.Plan {
log.Debug().
Str("id", member.ID).
Str("role", group.AsRole()).
Expand All @@ -353,6 +388,11 @@ func createUpgradeMemberPlan(log zerolog.Logger, member api.MemberStatus,
api.NewAction(api.ActionTypeUpgradeMember, group, member.ID, reason),
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
}
if status.CurrentImage == nil || status.CurrentImage.Image != imageName {
plan = append(api.Plan{
api.NewAction(api.ActionTypeSetCurrentImage, group, "", reason).SetImage(imageName),
}, plan...)
}
return plan
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/deployment/reconcile/plan_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ func (d *Reconciler) createAction(ctx context.Context, log zerolog.Logger, actio
return NewRenewTLSCertificateAction(log, action, actionCtx)
case api.ActionTypeRenewTLSCACertificate:
return NewRenewTLSCACertificateAction(log, action, actionCtx)
case api.ActionTypeSetCurrentImage:
return NewSetCurrentImageAction(log, action, actionCtx)
default:
panic(fmt.Sprintf("Unknown action type '%s'", action.Type))
}
Expand Down
24 changes: 17 additions & 7 deletions pkg/deployment/resources/pod_creator.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,23 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, memberID string,
podSuffix := createPodSuffix(spec)
m.PodName = k8sutil.CreatePodName(apiObject.GetName(), roleAbbr, m.ID, podSuffix)
newPhase := api.MemberPhaseCreated
// Find image ID
imageInfo, imageFound := status.Images.GetByImage(spec.GetImage())
if !imageFound {
imageNotFoundOnce.Do(func() {
log.Debug().Str("image", spec.GetImage()).Msg("Image ID is not known yet for image")
})
return nil
// Select image
var imageInfo api.ImageInfo
if current := status.CurrentImage; current != nil {
// Use current image
imageInfo = *current
} else {
// Find image ID
info, imageFound := status.Images.GetByImage(spec.GetImage())
if !imageFound {
imageNotFoundOnce.Do(func() {
log.Debug().Str("image", spec.GetImage()).Msg("Image ID is not known yet for image")
})
return nil
}
imageInfo = info
// Save image as current image
status.CurrentImage = &info
}
// Create pod
if group.IsArangod() {
Expand Down
16 changes: 7 additions & 9 deletions pkg/deployment/server_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,17 +218,15 @@ func (d *Deployment) DatabaseURL() string {
// DatabaseVersion returns the version used by the deployment
// Returns versionNumber, licenseType
func (d *Deployment) DatabaseVersion() (string, string) {
image := d.GetSpec().GetImage()
status, _ := d.GetStatus()
info, found := status.Images.GetByImage(image)
if !found {
return "", ""
}
license := "community"
if info.Enterprise {
license = "enterprise"
if current := status.CurrentImage; current != nil {
license := "community"
if current.Enterprise {
license = "enterprise"
}
return string(current.ArangoDBVersion), license
}
return string(info.ArangoDBVersion), license
return "", ""
}

// Members returns all members of the deployment by role.
Expand Down
Loading

0 comments on commit 0165f98

Please sign in to comment.