From 75357b95b42e10afd673b39b002baa5ee28ef819 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Thu, 9 Aug 2018 10:37:45 +0200 Subject: [PATCH 1/2] Use CurrentImage field in status to prevent unintended upgrades --- .../deployment/v1alpha/deployment_status.go | 2 + pkg/apis/deployment/v1alpha/plan.go | 11 +++ .../v1alpha/zz_generated.deepcopy.go | 9 ++ pkg/deployment/reconcile/action_context.go | 24 ++++++ .../reconcile/action_upgrade_current_image.go | 85 +++++++++++++++++++ pkg/deployment/reconcile/plan_builder.go | 71 ++++++++++------ pkg/deployment/reconcile/plan_executor.go | 2 + pkg/deployment/resources/pod_creator.go | 24 ++++-- pkg/deployment/server_api.go | 16 ++-- 9 files changed, 204 insertions(+), 40 deletions(-) create mode 100644 pkg/deployment/reconcile/action_upgrade_current_image.go diff --git a/pkg/apis/deployment/v1alpha/deployment_status.go b/pkg/apis/deployment/v1alpha/deployment_status.go index 5f612449d..280f3a661 100644 --- a/pkg/apis/deployment/v1alpha/deployment_status.go +++ b/pkg/apis/deployment/v1alpha/deployment_status.go @@ -38,6 +38,8 @@ type DeploymentStatus struct { // Images holds a list of ArangoDB images with their ID and ArangoDB version. Images ImageInfoList `json:"arangodb-images,omitempty"` + // Image that is currently being used when new pods are created + CurrentImage *ImageInfo `json:"current-image,omitempty"` // Members holds the status for all members in all server groups Members DeploymentStatusMembers `json:"members"` diff --git a/pkg/apis/deployment/v1alpha/plan.go b/pkg/apis/deployment/v1alpha/plan.go index fd0ad77a2..59a31afc4 100644 --- a/pkg/apis/deployment/v1alpha/plan.go +++ b/pkg/apis/deployment/v1alpha/plan.go @@ -49,6 +49,8 @@ const ( ActionTypeRenewTLSCertificate ActionType = "RenewTLSCertificate" // ActionTypeRenewTLSCACertificate causes the TLS CA certificate of the entire deployment to be renewed. ActionTypeRenewTLSCACertificate ActionType = "RenewTLSCACertificate" + // ActionTypeSetCurrentImage causes status.CurrentImage to be updated to the image given in the action. + ActionTypeSetCurrentImage ActionType = "SetCurrentImage" ) const ( @@ -73,6 +75,8 @@ type Action struct { StartTime *metav1.Time `json:"startTime,omitempty"` // Reason for this action Reason string `json:"reason,omitempty"` + // Image used in can of a SetCurrentImage action. + Image string `json:"image,omitempty"` } // NewAction instantiates a new Action. @@ -90,6 +94,13 @@ func NewAction(actionType ActionType, group ServerGroup, memberID string, reason return a } +// SetImage sets the Image field to the given value and returns the modified +// action. +func (a Action) SetImage(image string) Action { + a.Image = image + return a +} + // Plan is a list of actions that will be taken to update a deployment. // Only 1 action is in progress at a time. The operator will wait for that // action to be completely and then remove the action. diff --git a/pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go b/pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go index c8d083f60..44c6d3db6 100644 --- a/pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go +++ b/pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go @@ -295,6 +295,15 @@ func (in *DeploymentStatus) DeepCopyInto(out *DeploymentStatus) { *out = make(ImageInfoList, len(*in)) copy(*out, *in) } + if in.CurrentImage != nil { + in, out := &in.CurrentImage, &out.CurrentImage + if *in == nil { + *out = nil + } else { + *out = new(ImageInfo) + **out = **in + } + } in.Members.DeepCopyInto(&out.Members) if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions diff --git a/pkg/deployment/reconcile/action_context.go b/pkg/deployment/reconcile/action_context.go index 1802b3bda..a0c28a02f 100644 --- a/pkg/deployment/reconcile/action_context.go +++ b/pkg/deployment/reconcile/action_context.go @@ -78,6 +78,12 @@ type ActionContext interface { DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error // DeleteTLSCASecret removes the Secret containing the TLS CA certificate. DeleteTLSCASecret() error + // GetImageInfo returns the image info for an image with given name. + // Returns: (info, infoFound) + GetImageInfo(imageName string) (api.ImageInfo, bool) + // SetCurrentImage changes the CurrentImage field in the deployment + // status to the given image. + SetCurrentImage(imageInfo api.ImageInfo) error } // newActionContext creates a new ActionContext implementation. @@ -260,3 +266,21 @@ func (ac *actionContext) DeleteTLSCASecret() error { } return nil } + +// GetImageInfo returns the image info for an image with given name. +// Returns: (info, infoFound) +func (ac *actionContext) GetImageInfo(imageName string) (api.ImageInfo, bool) { + status, _ := ac.context.GetStatus() + return status.Images.GetByImage(imageName) +} + +// SetCurrentImage changes the CurrentImage field in the deployment +// status to the given image. +func (ac *actionContext) SetCurrentImage(imageInfo api.ImageInfo) error { + status, lastVersion := ac.context.GetStatus() + status.CurrentImage = &imageInfo + if err := ac.context.UpdateStatus(status, lastVersion); err != nil { + return maskAny(err) + } + return nil +} diff --git a/pkg/deployment/reconcile/action_upgrade_current_image.go b/pkg/deployment/reconcile/action_upgrade_current_image.go new file mode 100644 index 000000000..aba316826 --- /dev/null +++ b/pkg/deployment/reconcile/action_upgrade_current_image.go @@ -0,0 +1,85 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package reconcile + +import ( + "context" + "time" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/rs/zerolog" +) + +// NewSetCurrentImageAction creates a new Action that implements the given +// planned SetCurrentImage action. +func NewSetCurrentImageAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { + return &setCurrentImageAction{ + log: log, + action: action, + actionCtx: actionCtx, + } +} + +// setCurrentImageAction implements an SetCurrentImage. +type setCurrentImageAction struct { + log zerolog.Logger + action api.Action + actionCtx ActionContext +} + +// Start performs the start of the action. +// Returns true if the action is completely finished, false in case +// the start time needs to be recorded and a ready condition needs to be checked. +func (a *setCurrentImageAction) Start(ctx context.Context) (bool, error) { + ready, _, err := a.CheckProgress(ctx) + if err != nil { + return false, maskAny(err) + } + return ready, nil +} + +// CheckProgress checks the progress of the action. +// Returns true if the action is completely finished, false otherwise. +func (a *setCurrentImageAction) CheckProgress(ctx context.Context) (bool, bool, error) { + log := a.log + + imageInfo, found := a.actionCtx.GetImageInfo(a.action.Image) + if !found { + return false, false, nil + } + if err := a.actionCtx.SetCurrentImage(imageInfo); err != nil { + return false, false, maskAny(err) + } + log.Info().Str("image", a.action.Image).Msg("Changed current image") + return true, false, nil +} + +// Timeout returns the amount of time after which this action will timeout. +func (a *setCurrentImageAction) Timeout() time.Duration { + return upgradeMemberTimeout +} + +// Return the MemberID used / created in this action +func (a *setCurrentImageAction) MemberID() string { + return "" +} diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index 56ed4008e..8bb217ced 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -149,33 +149,51 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject, } return nil } - status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { - for _, m := range members { - if len(plan) > 0 { - // Only 1 change at a time - continue - } - if m.Phase != api.MemberPhaseCreated { - // Only rotate when phase is created - continue - } - if podName := m.PodName; podName != "" { - if p := getPod(podName); p != nil { - // Got pod, compare it with what it should be - decision := podNeedsUpgrading(*p, spec, status.Images) - if decision.UpgradeNeeded && decision.UpgradeAllowed { - plan = append(plan, createUpgradeMemberPlan(log, m, group, "Version upgrade")...) - } else { - rotNeeded, reason := podNeedsRotation(log, *p, apiObject, spec, group, status.Members.Agents, m.ID, context) - if rotNeeded { - plan = append(plan, createRotateMemberPlan(log, m, group, reason)...) + // createRotateOrUpgradePlan goes over all pods to check if an upgrade or rotate + // is needed. If an upgrade is needed but not allowed, the second return value + // will be true. + // Returns: (newPlan, upgradeNotAllowed) + createRotateOrUpgradePlan := func() (api.Plan, bool) { + var newPlan api.Plan + upgradeNotAllowed := false + status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { + for _, m := range members { + if m.Phase != api.MemberPhaseCreated { + // Only rotate when phase is created + continue + } + if podName := m.PodName; podName != "" { + if p := getPod(podName); p != nil { + // Got pod, compare it with what it should be + decision := podNeedsUpgrading(*p, spec, status.Images) + if decision.UpgradeNeeded && !decision.UpgradeAllowed { + // Oops, upgrade is not allowed + upgradeNotAllowed = true + return nil + } else if len(newPlan) == 0 { + // Only rotate/upgrade 1 pod at a time + if decision.UpgradeNeeded && decision.UpgradeAllowed { + newPlan = createUpgradeMemberPlan(log, m, group, "Version upgrade", spec.GetImage(), status) + } else { + rotNeeded, reason := podNeedsRotation(log, *p, apiObject, spec, group, status.Members.Agents, m.ID, context) + if rotNeeded { + newPlan = createRotateMemberPlan(log, m, group, reason) + } + } } } } } - } - return nil - }) + return nil + }) + return newPlan, upgradeNotAllowed + } + if newPlan, upgradeNotAllowed := createRotateOrUpgradePlan(); upgradeNotAllowed { + // TODO create event + } else { + // Use the new plan + plan = newPlan + } } // Check for the need to rotate TLS certificate of a members @@ -343,7 +361,7 @@ func createRotateMemberPlan(log zerolog.Logger, member api.MemberStatus, // createUpgradeMemberPlan creates a plan to upgrade (stop-recreateWithAutoUpgrade-stop-start) an existing // member. func createUpgradeMemberPlan(log zerolog.Logger, member api.MemberStatus, - group api.ServerGroup, reason string) api.Plan { + group api.ServerGroup, reason string, imageName string, status api.DeploymentStatus) api.Plan { log.Debug(). Str("id", member.ID). Str("role", group.AsRole()). @@ -353,6 +371,11 @@ func createUpgradeMemberPlan(log zerolog.Logger, member api.MemberStatus, api.NewAction(api.ActionTypeUpgradeMember, group, member.ID, reason), api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID), } + if status.CurrentImage == nil || status.CurrentImage.Image != imageName { + plan = append(api.Plan{ + api.NewAction(api.ActionTypeSetCurrentImage, group, "", reason).SetImage(imageName), + }, plan...) + } return plan } diff --git a/pkg/deployment/reconcile/plan_executor.go b/pkg/deployment/reconcile/plan_executor.go index a423a7b64..22ba1c39d 100644 --- a/pkg/deployment/reconcile/plan_executor.go +++ b/pkg/deployment/reconcile/plan_executor.go @@ -181,6 +181,8 @@ func (d *Reconciler) createAction(ctx context.Context, log zerolog.Logger, actio return NewRenewTLSCertificateAction(log, action, actionCtx) case api.ActionTypeRenewTLSCACertificate: return NewRenewTLSCACertificateAction(log, action, actionCtx) + case api.ActionTypeSetCurrentImage: + return NewSetCurrentImageAction(log, action, actionCtx) default: panic(fmt.Sprintf("Unknown action type '%s'", action.Type)) } diff --git a/pkg/deployment/resources/pod_creator.go b/pkg/deployment/resources/pod_creator.go index 0cfdfae48..3f41cdefb 100644 --- a/pkg/deployment/resources/pod_creator.go +++ b/pkg/deployment/resources/pod_creator.go @@ -459,13 +459,23 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, memberID string, podSuffix := createPodSuffix(spec) m.PodName = k8sutil.CreatePodName(apiObject.GetName(), roleAbbr, m.ID, podSuffix) newPhase := api.MemberPhaseCreated - // Find image ID - imageInfo, imageFound := status.Images.GetByImage(spec.GetImage()) - if !imageFound { - imageNotFoundOnce.Do(func() { - log.Debug().Str("image", spec.GetImage()).Msg("Image ID is not known yet for image") - }) - return nil + // Select image + var imageInfo api.ImageInfo + if current := status.CurrentImage; current != nil { + // Use current image + imageInfo = *current + } else { + // Find image ID + info, imageFound := status.Images.GetByImage(spec.GetImage()) + if !imageFound { + imageNotFoundOnce.Do(func() { + log.Debug().Str("image", spec.GetImage()).Msg("Image ID is not known yet for image") + }) + return nil + } + imageInfo = info + // Save image as current image + status.CurrentImage = &info } // Create pod if group.IsArangod() { diff --git a/pkg/deployment/server_api.go b/pkg/deployment/server_api.go index 46ce4793e..4bf82e1c2 100644 --- a/pkg/deployment/server_api.go +++ b/pkg/deployment/server_api.go @@ -218,17 +218,15 @@ func (d *Deployment) DatabaseURL() string { // DatabaseVersion returns the version used by the deployment // Returns versionNumber, licenseType func (d *Deployment) DatabaseVersion() (string, string) { - image := d.GetSpec().GetImage() status, _ := d.GetStatus() - info, found := status.Images.GetByImage(image) - if !found { - return "", "" - } - license := "community" - if info.Enterprise { - license = "enterprise" + if current := status.CurrentImage; current != nil { + license := "community" + if current.Enterprise { + license = "enterprise" + } + return string(current.ArangoDBVersion), license } - return string(info.ArangoDBVersion), license + return "", "" } // Members returns all members of the deployment by role. From e4736f15c7b3d54579881f1dc1699cd119069192 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Thu, 9 Aug 2018 12:02:30 +0200 Subject: [PATCH 2/2] Record event in case upgrade is not allowed --- pkg/deployment/reconcile/plan_builder.go | 33 ++++++++++++++++++------ pkg/util/k8sutil/events.go | 15 +++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index 8bb217ced..8e595c434 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -25,6 +25,7 @@ package reconcile import ( "strings" + driver "github.com/arangodb/go-driver" upgraderules "github.com/arangodb/go-upgrade-rules" "github.com/rs/zerolog" "github.com/rs/zerolog/log" @@ -37,6 +38,8 @@ import ( // upgradeDecision is the result of an upgrade check. type upgradeDecision struct { + FromVersion driver.Version + ToVersion driver.Version UpgradeNeeded bool // If set, the image version has changed UpgradeAllowed bool // If set, it is an allowed version change AutoUpgradeNeeded bool // If set, the database must be started with `--database.auto-upgrade` once @@ -153,9 +156,10 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject, // is needed. If an upgrade is needed but not allowed, the second return value // will be true. // Returns: (newPlan, upgradeNotAllowed) - createRotateOrUpgradePlan := func() (api.Plan, bool) { + createRotateOrUpgradePlan := func() (api.Plan, bool, driver.Version, driver.Version) { var newPlan api.Plan upgradeNotAllowed := false + var fromVersion, toVersion driver.Version status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { for _, m := range members { if m.Phase != api.MemberPhaseCreated { @@ -169,14 +173,17 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject, if decision.UpgradeNeeded && !decision.UpgradeAllowed { // Oops, upgrade is not allowed upgradeNotAllowed = true + fromVersion = decision.FromVersion + toVersion = decision.ToVersion return nil } else if len(newPlan) == 0 { // Only rotate/upgrade 1 pod at a time - if decision.UpgradeNeeded && decision.UpgradeAllowed { + if decision.UpgradeNeeded { + // Yes, upgrade is needed (and allowed) newPlan = createUpgradeMemberPlan(log, m, group, "Version upgrade", spec.GetImage(), status) } else { - rotNeeded, reason := podNeedsRotation(log, *p, apiObject, spec, group, status.Members.Agents, m.ID, context) - if rotNeeded { + // Upgrade is not needed, see if rotation is needed + if rotNeeded, reason := podNeedsRotation(log, *p, apiObject, spec, group, status.Members.Agents, m.ID, context); rotNeeded { newPlan = createRotateMemberPlan(log, m, group, reason) } } @@ -186,10 +193,11 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject, } return nil }) - return newPlan, upgradeNotAllowed + return newPlan, upgradeNotAllowed, fromVersion, toVersion } - if newPlan, upgradeNotAllowed := createRotateOrUpgradePlan(); upgradeNotAllowed { - // TODO create event + if newPlan, upgradeNotAllowed, fromVersion, toVersion := createRotateOrUpgradePlan(); upgradeNotAllowed { + // Upgrade is needed, but not allowed + context.CreateEvent(k8sutil.NewUpgradeNotAllowedEvent(apiObject, fromVersion, toVersion)) } else { // Use the new plan plan = newPlan @@ -236,11 +244,18 @@ func podNeedsUpgrading(p v1.Pod, spec api.DeploymentSpec, images api.ImageInfoLi podVersion := podImageInfo.ArangoDBVersion if err := upgraderules.CheckUpgradeRules(podVersion, specVersion); err != nil { // E.g. 3.x -> 4.x, we cannot allow automatically - return upgradeDecision{UpgradeNeeded: true, UpgradeAllowed: false} + return upgradeDecision{ + FromVersion: podVersion, + ToVersion: specVersion, + UpgradeNeeded: true, + UpgradeAllowed: false, + } } if specVersion.Major() != podVersion.Major() || specVersion.Minor() != podVersion.Minor() { // Is allowed, with `--database.auto-upgrade` return upgradeDecision{ + FromVersion: podVersion, + ToVersion: specVersion, UpgradeNeeded: true, UpgradeAllowed: true, AutoUpgradeNeeded: true, @@ -248,6 +263,8 @@ func podNeedsUpgrading(p v1.Pod, spec api.DeploymentSpec, images api.ImageInfoLi } // Patch version change, rotate only return upgradeDecision{ + FromVersion: podVersion, + ToVersion: specVersion, UpgradeNeeded: true, UpgradeAllowed: true, AutoUpgradeNeeded: false, diff --git a/pkg/util/k8sutil/events.go b/pkg/util/k8sutil/events.go index 201d4c3ba..47ed83fba 100644 --- a/pkg/util/k8sutil/events.go +++ b/pkg/util/k8sutil/events.go @@ -26,6 +26,7 @@ import ( "fmt" "strings" + driver "github.com/arangodb/go-driver" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -191,6 +192,20 @@ func NewDowntimeNotAllowedEvent(apiObject APIObject, operation string) *Event { return event } +// NewUpgradeNotAllowedEvent creates an event indicating that an upgrade (or downgrade) is not allowed. +func NewUpgradeNotAllowedEvent(apiObject APIObject, fromVersion, toVersion driver.Version) *Event { + event := newDeploymentEvent(apiObject) + event.Type = v1.EventTypeNormal + if fromVersion.CompareTo(toVersion) < 0 { + event.Reason = "Upgrade not allowed" + event.Message = fmt.Sprintf("Upgrading ArangoDB from version %s to %s is not allowed", fromVersion, toVersion) + } else { + event.Reason = "Downgrade not allowed" + event.Message = fmt.Sprintf("Downgrading ArangoDB from version %s to %s is not allowed", fromVersion, toVersion) + } + return event +} + // NewErrorEvent creates an even of type error. func NewErrorEvent(reason string, err error, apiObject APIObject) *Event { event := newDeploymentEvent(apiObject)