Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use new CurrentImage field to prevent unintended upgrades. #236

Merged
merged 2 commits into from
Aug 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/apis/deployment/v1alpha/deployment_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ type DeploymentStatus struct {

// Images holds a list of ArangoDB images with their ID and ArangoDB version.
Images ImageInfoList `json:"arangodb-images,omitempty"`
// Image that is currently being used when new pods are created
CurrentImage *ImageInfo `json:"current-image,omitempty"`

// Members holds the status for all members in all server groups
Members DeploymentStatusMembers `json:"members"`
Expand Down
11 changes: 11 additions & 0 deletions pkg/apis/deployment/v1alpha/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ const (
ActionTypeRenewTLSCertificate ActionType = "RenewTLSCertificate"
// ActionTypeRenewTLSCACertificate causes the TLS CA certificate of the entire deployment to be renewed.
ActionTypeRenewTLSCACertificate ActionType = "RenewTLSCACertificate"
// ActionTypeSetCurrentImage causes status.CurrentImage to be updated to the image given in the action.
ActionTypeSetCurrentImage ActionType = "SetCurrentImage"
)

const (
Expand All @@ -73,6 +75,8 @@ type Action struct {
StartTime *metav1.Time `json:"startTime,omitempty"`
// Reason for this action
Reason string `json:"reason,omitempty"`
// Image used in can of a SetCurrentImage action.
Image string `json:"image,omitempty"`
}

// NewAction instantiates a new Action.
Expand All @@ -90,6 +94,13 @@ func NewAction(actionType ActionType, group ServerGroup, memberID string, reason
return a
}

// SetImage sets the Image field to the given value and returns the modified
// action.
func (a Action) SetImage(image string) Action {
a.Image = image
return a
}

// Plan is a list of actions that will be taken to update a deployment.
// Only 1 action is in progress at a time. The operator will wait for that
// action to be completely and then remove the action.
Expand Down
9 changes: 9 additions & 0 deletions pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,15 @@ func (in *DeploymentStatus) DeepCopyInto(out *DeploymentStatus) {
*out = make(ImageInfoList, len(*in))
copy(*out, *in)
}
if in.CurrentImage != nil {
in, out := &in.CurrentImage, &out.CurrentImage
if *in == nil {
*out = nil
} else {
*out = new(ImageInfo)
**out = **in
}
}
in.Members.DeepCopyInto(&out.Members)
if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions
Expand Down
24 changes: 24 additions & 0 deletions pkg/deployment/reconcile/action_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ type ActionContext interface {
DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error
// DeleteTLSCASecret removes the Secret containing the TLS CA certificate.
DeleteTLSCASecret() error
// GetImageInfo returns the image info for an image with given name.
// Returns: (info, infoFound)
GetImageInfo(imageName string) (api.ImageInfo, bool)
// SetCurrentImage changes the CurrentImage field in the deployment
// status to the given image.
SetCurrentImage(imageInfo api.ImageInfo) error
}

// newActionContext creates a new ActionContext implementation.
Expand Down Expand Up @@ -260,3 +266,21 @@ func (ac *actionContext) DeleteTLSCASecret() error {
}
return nil
}

// GetImageInfo returns the image info for an image with given name.
// Returns: (info, infoFound)
func (ac *actionContext) GetImageInfo(imageName string) (api.ImageInfo, bool) {
status, _ := ac.context.GetStatus()
return status.Images.GetByImage(imageName)
}

// SetCurrentImage changes the CurrentImage field in the deployment
// status to the given image.
func (ac *actionContext) SetCurrentImage(imageInfo api.ImageInfo) error {
status, lastVersion := ac.context.GetStatus()
status.CurrentImage = &imageInfo
if err := ac.context.UpdateStatus(status, lastVersion); err != nil {
return maskAny(err)
}
return nil
}
85 changes: 85 additions & 0 deletions pkg/deployment/reconcile/action_upgrade_current_image.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//

package reconcile

import (
"context"
"time"

api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
"github.com/rs/zerolog"
)

// NewSetCurrentImageAction creates a new Action that implements the given
// planned SetCurrentImage action.
func NewSetCurrentImageAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action {
return &setCurrentImageAction{
log: log,
action: action,
actionCtx: actionCtx,
}
}

// setCurrentImageAction implements an SetCurrentImage.
type setCurrentImageAction struct {
log zerolog.Logger
action api.Action
actionCtx ActionContext
}

// Start performs the start of the action.
// Returns true if the action is completely finished, false in case
// the start time needs to be recorded and a ready condition needs to be checked.
func (a *setCurrentImageAction) Start(ctx context.Context) (bool, error) {
ready, _, err := a.CheckProgress(ctx)
if err != nil {
return false, maskAny(err)
}
return ready, nil
}

// CheckProgress checks the progress of the action.
// Returns true if the action is completely finished, false otherwise.
func (a *setCurrentImageAction) CheckProgress(ctx context.Context) (bool, bool, error) {
log := a.log

imageInfo, found := a.actionCtx.GetImageInfo(a.action.Image)
if !found {
return false, false, nil
}
if err := a.actionCtx.SetCurrentImage(imageInfo); err != nil {
return false, false, maskAny(err)
}
log.Info().Str("image", a.action.Image).Msg("Changed current image")
return true, false, nil
}

// Timeout returns the amount of time after which this action will timeout.
func (a *setCurrentImageAction) Timeout() time.Duration {
return upgradeMemberTimeout
}

// Return the MemberID used / created in this action
func (a *setCurrentImageAction) MemberID() string {
return ""
}
90 changes: 65 additions & 25 deletions pkg/deployment/reconcile/plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ package reconcile
import (
"strings"

driver "github.com/arangodb/go-driver"
upgraderules "github.com/arangodb/go-upgrade-rules"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
Expand All @@ -37,6 +38,8 @@ import (

// upgradeDecision is the result of an upgrade check.
type upgradeDecision struct {
FromVersion driver.Version
ToVersion driver.Version
UpgradeNeeded bool // If set, the image version has changed
UpgradeAllowed bool // If set, it is an allowed version change
AutoUpgradeNeeded bool // If set, the database must be started with `--database.auto-upgrade` once
Expand Down Expand Up @@ -149,33 +152,56 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject,
}
return nil
}
status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error {
for _, m := range members {
if len(plan) > 0 {
// Only 1 change at a time
continue
}
if m.Phase != api.MemberPhaseCreated {
// Only rotate when phase is created
continue
}
if podName := m.PodName; podName != "" {
if p := getPod(podName); p != nil {
// Got pod, compare it with what it should be
decision := podNeedsUpgrading(*p, spec, status.Images)
if decision.UpgradeNeeded && decision.UpgradeAllowed {
plan = append(plan, createUpgradeMemberPlan(log, m, group, "Version upgrade")...)
} else {
rotNeeded, reason := podNeedsRotation(log, *p, apiObject, spec, group, status.Members.Agents, m.ID, context)
if rotNeeded {
plan = append(plan, createRotateMemberPlan(log, m, group, reason)...)
// createRotateOrUpgradePlan goes over all pods to check if an upgrade or rotate
// is needed. If an upgrade is needed but not allowed, the second return value
// will be true.
// Returns: (newPlan, upgradeNotAllowed)
createRotateOrUpgradePlan := func() (api.Plan, bool, driver.Version, driver.Version) {
var newPlan api.Plan
upgradeNotAllowed := false
var fromVersion, toVersion driver.Version
status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error {
for _, m := range members {
if m.Phase != api.MemberPhaseCreated {
// Only rotate when phase is created
continue
}
if podName := m.PodName; podName != "" {
if p := getPod(podName); p != nil {
// Got pod, compare it with what it should be
decision := podNeedsUpgrading(*p, spec, status.Images)
if decision.UpgradeNeeded && !decision.UpgradeAllowed {
// Oops, upgrade is not allowed
upgradeNotAllowed = true
fromVersion = decision.FromVersion
toVersion = decision.ToVersion
return nil
} else if len(newPlan) == 0 {
// Only rotate/upgrade 1 pod at a time
if decision.UpgradeNeeded {
// Yes, upgrade is needed (and allowed)
newPlan = createUpgradeMemberPlan(log, m, group, "Version upgrade", spec.GetImage(), status)
} else {
// Upgrade is not needed, see if rotation is needed
if rotNeeded, reason := podNeedsRotation(log, *p, apiObject, spec, group, status.Members.Agents, m.ID, context); rotNeeded {
newPlan = createRotateMemberPlan(log, m, group, reason)
}
}
}
}
}
}
}
return nil
})
return nil
})
return newPlan, upgradeNotAllowed, fromVersion, toVersion
}
if newPlan, upgradeNotAllowed, fromVersion, toVersion := createRotateOrUpgradePlan(); upgradeNotAllowed {
// Upgrade is needed, but not allowed
context.CreateEvent(k8sutil.NewUpgradeNotAllowedEvent(apiObject, fromVersion, toVersion))
} else {
// Use the new plan
plan = newPlan
}
}

// Check for the need to rotate TLS certificate of a members
Expand Down Expand Up @@ -218,18 +244,27 @@ func podNeedsUpgrading(p v1.Pod, spec api.DeploymentSpec, images api.ImageInfoLi
podVersion := podImageInfo.ArangoDBVersion
if err := upgraderules.CheckUpgradeRules(podVersion, specVersion); err != nil {
// E.g. 3.x -> 4.x, we cannot allow automatically
return upgradeDecision{UpgradeNeeded: true, UpgradeAllowed: false}
return upgradeDecision{
FromVersion: podVersion,
ToVersion: specVersion,
UpgradeNeeded: true,
UpgradeAllowed: false,
}
}
if specVersion.Major() != podVersion.Major() || specVersion.Minor() != podVersion.Minor() {
// Is allowed, with `--database.auto-upgrade`
return upgradeDecision{
FromVersion: podVersion,
ToVersion: specVersion,
UpgradeNeeded: true,
UpgradeAllowed: true,
AutoUpgradeNeeded: true,
}
}
// Patch version change, rotate only
return upgradeDecision{
FromVersion: podVersion,
ToVersion: specVersion,
UpgradeNeeded: true,
UpgradeAllowed: true,
AutoUpgradeNeeded: false,
Expand Down Expand Up @@ -343,7 +378,7 @@ func createRotateMemberPlan(log zerolog.Logger, member api.MemberStatus,
// createUpgradeMemberPlan creates a plan to upgrade (stop-recreateWithAutoUpgrade-stop-start) an existing
// member.
func createUpgradeMemberPlan(log zerolog.Logger, member api.MemberStatus,
group api.ServerGroup, reason string) api.Plan {
group api.ServerGroup, reason string, imageName string, status api.DeploymentStatus) api.Plan {
log.Debug().
Str("id", member.ID).
Str("role", group.AsRole()).
Expand All @@ -353,6 +388,11 @@ func createUpgradeMemberPlan(log zerolog.Logger, member api.MemberStatus,
api.NewAction(api.ActionTypeUpgradeMember, group, member.ID, reason),
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
}
if status.CurrentImage == nil || status.CurrentImage.Image != imageName {
plan = append(api.Plan{
api.NewAction(api.ActionTypeSetCurrentImage, group, "", reason).SetImage(imageName),
}, plan...)
}
return plan
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/deployment/reconcile/plan_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ func (d *Reconciler) createAction(ctx context.Context, log zerolog.Logger, actio
return NewRenewTLSCertificateAction(log, action, actionCtx)
case api.ActionTypeRenewTLSCACertificate:
return NewRenewTLSCACertificateAction(log, action, actionCtx)
case api.ActionTypeSetCurrentImage:
return NewSetCurrentImageAction(log, action, actionCtx)
default:
panic(fmt.Sprintf("Unknown action type '%s'", action.Type))
}
Expand Down
24 changes: 17 additions & 7 deletions pkg/deployment/resources/pod_creator.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,23 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, memberID string,
podSuffix := createPodSuffix(spec)
m.PodName = k8sutil.CreatePodName(apiObject.GetName(), roleAbbr, m.ID, podSuffix)
newPhase := api.MemberPhaseCreated
// Find image ID
imageInfo, imageFound := status.Images.GetByImage(spec.GetImage())
if !imageFound {
imageNotFoundOnce.Do(func() {
log.Debug().Str("image", spec.GetImage()).Msg("Image ID is not known yet for image")
})
return nil
// Select image
var imageInfo api.ImageInfo
if current := status.CurrentImage; current != nil {
// Use current image
imageInfo = *current
} else {
// Find image ID
info, imageFound := status.Images.GetByImage(spec.GetImage())
if !imageFound {
imageNotFoundOnce.Do(func() {
log.Debug().Str("image", spec.GetImage()).Msg("Image ID is not known yet for image")
})
return nil
}
imageInfo = info
// Save image as current image
status.CurrentImage = &info
}
// Create pod
if group.IsArangod() {
Expand Down
16 changes: 7 additions & 9 deletions pkg/deployment/server_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,17 +218,15 @@ func (d *Deployment) DatabaseURL() string {
// DatabaseVersion returns the version used by the deployment
// Returns versionNumber, licenseType
func (d *Deployment) DatabaseVersion() (string, string) {
image := d.GetSpec().GetImage()
status, _ := d.GetStatus()
info, found := status.Images.GetByImage(image)
if !found {
return "", ""
}
license := "community"
if info.Enterprise {
license = "enterprise"
if current := status.CurrentImage; current != nil {
license := "community"
if current.Enterprise {
license = "enterprise"
}
return string(current.ArangoDBVersion), license
}
return string(info.ArangoDBVersion), license
return "", ""
}

// Members returns all members of the deployment by role.
Expand Down
Loading