From 44581c77647c5f2f7e3b30dce80e367bb40cc2ab Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Fri, 15 Jun 2018 14:30:15 +0200 Subject: [PATCH 1/5] Allow changing storage class of a server group --- examples/arango-local-storage.yaml | 6 +- pkg/apis/deployment/v1alpha/plan.go | 6 + .../deployment/v1alpha/server_group_spec.go | 4 - pkg/deployment/context_impl.go | 19 ++- pkg/deployment/reconcile/action.go | 2 + pkg/deployment/reconcile/action_add_member.go | 16 ++- .../reconcile/action_cleanout_member.go | 5 + pkg/deployment/reconcile/action_context.go | 11 +- .../reconcile/action_remove_member.go | 5 + .../action_renew_tls_ca_certificate.go | 5 + .../reconcile/action_renew_tls_certificate.go | 5 + .../reconcile/action_rotate_member.go | 5 + .../reconcile/action_shutdown_member.go | 5 + .../reconcile/action_upgrade_member.go | 5 + .../reconcile/action_wait_for_member_up.go | 5 + pkg/deployment/reconcile/context.go | 5 +- pkg/deployment/reconcile/plan_builder.go | 18 ++- .../reconcile/plan_builder_storage.go | 111 ++++++++++++++++++ pkg/deployment/reconcile/plan_executor.go | 8 ++ 19 files changed, 219 insertions(+), 27 deletions(-) create mode 100644 pkg/deployment/reconcile/plan_builder_storage.go diff --git a/examples/arango-local-storage.yaml b/examples/arango-local-storage.yaml index 6c986ecd0..c89f918de 100644 --- a/examples/arango-local-storage.yaml +++ b/examples/arango-local-storage.yaml @@ -1,10 +1,10 @@ apiVersion: "storage.arangodb.com/v1alpha" kind: "ArangoLocalStorage" metadata: - name: "arangodb-local-storage" + name: "arangodb-new-local-storage" spec: storageClass: - name: my-local-ssd + name: my-new-local-ssd isDefault: true localPath: - - /var/lib/arango-storage + - /var/lib/arango-storage-new diff --git a/pkg/apis/deployment/v1alpha/plan.go b/pkg/apis/deployment/v1alpha/plan.go index 9a86de888..fd0ad77a2 100644 --- a/pkg/apis/deployment/v1alpha/plan.go +++ b/pkg/apis/deployment/v1alpha/plan.go @@ -51,6 +51,12 @@ const ( ActionTypeRenewTLSCACertificate ActionType = "RenewTLSCACertificate" ) +const ( + // MemberIDPreviousAction is used for Action.MemberID when the MemberID + // should be derived from the previous action. + MemberIDPreviousAction = "@previous" +) + // Action represents a single action to be taken to update a deployment. type Action struct { // ID of this action (unique for every action) diff --git a/pkg/apis/deployment/v1alpha/server_group_spec.go b/pkg/apis/deployment/v1alpha/server_group_spec.go index 7b21c2ae7..6d199599a 100644 --- a/pkg/apis/deployment/v1alpha/server_group_spec.go +++ b/pkg/apis/deployment/v1alpha/server_group_spec.go @@ -191,9 +191,5 @@ func (s ServerGroupSpec) ResetImmutableFields(group ServerGroup, fieldPrefix str resetFields = append(resetFields, fieldPrefix+".count") } } - if s.GetStorageClassName() != target.GetStorageClassName() { - target.StorageClassName = util.NewStringOrNil(s.StorageClassName) - resetFields = append(resetFields, fieldPrefix+".storageClassName") - } return resetFields } diff --git a/pkg/deployment/context_impl.go b/pkg/deployment/context_impl.go index 388328eea..651a0eafc 100644 --- a/pkg/deployment/context_impl.go +++ b/pkg/deployment/context_impl.go @@ -30,6 +30,7 @@ import ( "github.com/arangodb/arangosync/tasks" driver "github.com/arangodb/go-driver" "github.com/arangodb/go-driver/agency" + "github.com/rs/zerolog/log" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" @@ -191,23 +192,23 @@ func (d *Deployment) GetSyncServerClient(ctx context.Context, group api.ServerGr // CreateMember adds a new member to the given group. // If ID is non-empty, it will be used, otherwise a new ID is created. -func (d *Deployment) CreateMember(group api.ServerGroup, id string) error { +func (d *Deployment) CreateMember(group api.ServerGroup, id string) (string, error) { log := d.deps.Log status, lastVersion := d.GetStatus() id, err := createMember(log, &status, group, id, d.apiObject) if err != nil { log.Debug().Err(err).Str("group", group.AsRole()).Msg("Failed to create member") - return maskAny(err) + return "", maskAny(err) } // Save added member if err := d.UpdateStatus(status, lastVersion); err != nil { log.Debug().Err(err).Msg("Updating CR status failed") - return maskAny(err) + return "", maskAny(err) } // Create event about it d.CreateEvent(k8sutil.NewMemberAddEvent(id, group.AsRole(), d.apiObject)) - return nil + return id, nil } // DeletePod deletes a pod with given name in the namespace @@ -304,6 +305,16 @@ func (d *Deployment) GetOwnedPVCs() ([]v1.PersistentVolumeClaim, error) { return myPVCs, nil } +// GetPvc gets a PVC by the given name, in the samespace of the deployment. +func (d *Deployment) GetPvc(pvcName string) (*v1.PersistentVolumeClaim, error) { + pvc, err := d.deps.KubeCli.CoreV1().PersistentVolumeClaims(d.apiObject.GetNamespace()).Get(pvcName, metav1.GetOptions{}) + if err != nil { + log.Debug().Err(err).Str("pvc-name", pvcName).Msg("Failed to get PVC") + return nil, maskAny(err) + } + return pvc, nil +} + // GetTLSKeyfile returns the keyfile encoded TLS certificate+key for // the given member. func (d *Deployment) GetTLSKeyfile(group api.ServerGroup, member api.MemberStatus) (string, error) { diff --git a/pkg/deployment/reconcile/action.go b/pkg/deployment/reconcile/action.go index d6869b5a3..eecf9176d 100644 --- a/pkg/deployment/reconcile/action.go +++ b/pkg/deployment/reconcile/action.go @@ -38,4 +38,6 @@ type Action interface { CheckProgress(ctx context.Context) (bool, bool, error) // Timeout returns the amount of time after which this action will timeout. Timeout() time.Duration + // Return the MemberID used / created in this action + MemberID() string } diff --git a/pkg/deployment/reconcile/action_add_member.go b/pkg/deployment/reconcile/action_add_member.go index 4f4094781..1e282c67e 100644 --- a/pkg/deployment/reconcile/action_add_member.go +++ b/pkg/deployment/reconcile/action_add_member.go @@ -43,19 +43,22 @@ func NewAddMemberAction(log zerolog.Logger, action api.Action, actionCtx ActionC // actionAddMember implements an AddMemberAction. type actionAddMember struct { - log zerolog.Logger - action api.Action - actionCtx ActionContext + log zerolog.Logger + action api.Action + actionCtx ActionContext + newMemberID string } // Start performs the start of the action. // Returns true if the action is completely finished, false in case // the start time needs to be recorded and a ready condition needs to be checked. func (a *actionAddMember) Start(ctx context.Context) (bool, error) { - if err := a.actionCtx.CreateMember(a.action.Group, a.action.MemberID); err != nil { + newID, err := a.actionCtx.CreateMember(a.action.Group, a.action.MemberID) + if err != nil { log.Debug().Err(err).Msg("Failed to create member") return false, maskAny(err) } + a.newMemberID = newID return true, nil } @@ -70,3 +73,8 @@ func (a *actionAddMember) CheckProgress(ctx context.Context) (bool, bool, error) func (a *actionAddMember) Timeout() time.Duration { return addMemberTimeout } + +// Return the MemberID used / created in this action +func (a *actionAddMember) MemberID() string { + return a.newMemberID +} diff --git a/pkg/deployment/reconcile/action_cleanout_member.go b/pkg/deployment/reconcile/action_cleanout_member.go index fa6572fff..263bac205 100644 --- a/pkg/deployment/reconcile/action_cleanout_member.go +++ b/pkg/deployment/reconcile/action_cleanout_member.go @@ -155,3 +155,8 @@ func (a *actionCleanoutMember) CheckProgress(ctx context.Context) (bool, bool, e func (a *actionCleanoutMember) Timeout() time.Duration { return cleanoutMemberTimeout } + +// Return the MemberID used / created in this action +func (a *actionCleanoutMember) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_context.go b/pkg/deployment/reconcile/action_context.go index 6bcb67bfc..1802b3bda 100644 --- a/pkg/deployment/reconcile/action_context.go +++ b/pkg/deployment/reconcile/action_context.go @@ -59,7 +59,7 @@ type ActionContext interface { GetMemberStatusByID(id string) (api.MemberStatus, bool) // CreateMember adds a new member to the given group. // If ID is non-empty, it will be used, otherwise a new ID is created. - CreateMember(group api.ServerGroup, id string) error + CreateMember(group api.ServerGroup, id string) (string, error) // UpdateMember updates the deployment status wrt the given member. UpdateMember(member api.MemberStatus) error // RemoveMemberByID removes a member with given id. @@ -157,11 +157,12 @@ func (ac *actionContext) GetMemberStatusByID(id string) (api.MemberStatus, bool) // CreateMember adds a new member to the given group. // If ID is non-empty, it will be used, otherwise a new ID is created. -func (ac *actionContext) CreateMember(group api.ServerGroup, id string) error { - if err := ac.context.CreateMember(group, id); err != nil { - return maskAny(err) +func (ac *actionContext) CreateMember(group api.ServerGroup, id string) (string, error) { + result, err := ac.context.CreateMember(group, id) + if err != nil { + return "", maskAny(err) } - return nil + return result, nil } // UpdateMember updates the deployment status wrt the given member. diff --git a/pkg/deployment/reconcile/action_remove_member.go b/pkg/deployment/reconcile/action_remove_member.go index 4aee6cf4b..8824d373c 100644 --- a/pkg/deployment/reconcile/action_remove_member.go +++ b/pkg/deployment/reconcile/action_remove_member.go @@ -105,3 +105,8 @@ func (a *actionRemoveMember) CheckProgress(ctx context.Context) (bool, bool, err func (a *actionRemoveMember) Timeout() time.Duration { return removeMemberTimeout } + +// Return the MemberID used / created in this action +func (a *actionRemoveMember) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_renew_tls_ca_certificate.go b/pkg/deployment/reconcile/action_renew_tls_ca_certificate.go index d22a7abe5..5a0a3543e 100644 --- a/pkg/deployment/reconcile/action_renew_tls_ca_certificate.go +++ b/pkg/deployment/reconcile/action_renew_tls_ca_certificate.go @@ -69,3 +69,8 @@ func (a *renewTLSCACertificateAction) CheckProgress(ctx context.Context) (bool, func (a *renewTLSCACertificateAction) Timeout() time.Duration { return renewTLSCACertificateTimeout } + +// Return the MemberID used / created in this action +func (a *renewTLSCACertificateAction) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_renew_tls_certificate.go b/pkg/deployment/reconcile/action_renew_tls_certificate.go index 6f1b61bf1..8ea4c839f 100644 --- a/pkg/deployment/reconcile/action_renew_tls_certificate.go +++ b/pkg/deployment/reconcile/action_renew_tls_certificate.go @@ -75,3 +75,8 @@ func (a *renewTLSCertificateAction) CheckProgress(ctx context.Context) (bool, bo func (a *renewTLSCertificateAction) Timeout() time.Duration { return renewTLSCertificateTimeout } + +// Return the MemberID used / created in this action +func (a *renewTLSCertificateAction) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_rotate_member.go b/pkg/deployment/reconcile/action_rotate_member.go index 41e83e314..d6368281e 100644 --- a/pkg/deployment/reconcile/action_rotate_member.go +++ b/pkg/deployment/reconcile/action_rotate_member.go @@ -127,3 +127,8 @@ func (a *actionRotateMember) CheckProgress(ctx context.Context) (bool, bool, err func (a *actionRotateMember) Timeout() time.Duration { return rotateMemberTimeout } + +// Return the MemberID used / created in this action +func (a *actionRotateMember) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_shutdown_member.go b/pkg/deployment/reconcile/action_shutdown_member.go index 69735ce3b..e4db9ddc2 100644 --- a/pkg/deployment/reconcile/action_shutdown_member.go +++ b/pkg/deployment/reconcile/action_shutdown_member.go @@ -116,3 +116,8 @@ func (a *actionShutdownMember) CheckProgress(ctx context.Context) (bool, bool, e func (a *actionShutdownMember) Timeout() time.Duration { return shutdownMemberTimeout } + +// Return the MemberID used / created in this action +func (a *actionShutdownMember) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_upgrade_member.go b/pkg/deployment/reconcile/action_upgrade_member.go index 3b52030e0..97f82368a 100644 --- a/pkg/deployment/reconcile/action_upgrade_member.go +++ b/pkg/deployment/reconcile/action_upgrade_member.go @@ -133,3 +133,8 @@ func (a *actionUpgradeMember) CheckProgress(ctx context.Context) (bool, bool, er func (a *actionUpgradeMember) Timeout() time.Duration { return upgradeMemberTimeout } + +// Return the MemberID used / created in this action +func (a *actionUpgradeMember) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_wait_for_member_up.go b/pkg/deployment/reconcile/action_wait_for_member_up.go index b61ab3bae..a42f92bda 100644 --- a/pkg/deployment/reconcile/action_wait_for_member_up.go +++ b/pkg/deployment/reconcile/action_wait_for_member_up.go @@ -170,3 +170,8 @@ func (a *actionWaitForMemberUp) checkProgressArangoSync(ctx context.Context) (bo func (a *actionWaitForMemberUp) Timeout() time.Duration { return waitForMemberUpTimeout } + +// Return the MemberID used / created in this action +func (a *actionWaitForMemberUp) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/context.go b/pkg/deployment/reconcile/context.go index 0e6cb7681..6d9f5da08 100644 --- a/pkg/deployment/reconcile/context.go +++ b/pkg/deployment/reconcile/context.go @@ -62,7 +62,8 @@ type Context interface { CreateEvent(evt *v1.Event) // CreateMember adds a new member to the given group. // If ID is non-empty, it will be used, otherwise a new ID is created. - CreateMember(group api.ServerGroup, id string) error + // Returns ID, error + CreateMember(group api.ServerGroup, id string) (string, error) // DeletePod deletes a pod with given name in the namespace // of the deployment. If the pod does not exist, the error is ignored. DeletePod(podName string) error @@ -74,6 +75,8 @@ type Context interface { RemovePodFinalizers(podName string) error // GetOwnedPods returns a list of all pods owned by the deployment. GetOwnedPods() ([]v1.Pod, error) + // GetPvc gets a PVC by the given name, in the samespace of the deployment. + GetPvc(pvcName string) (*v1.PersistentVolumeClaim, error) // GetTLSKeyfile returns the keyfile encoded TLS certificate+key for // the given member. GetTLSKeyfile(group api.ServerGroup, member api.MemberStatus) (string, error) diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index bf6430444..a82a8ed70 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -54,7 +54,7 @@ func (d *Reconciler) CreatePlan() error { apiObject := d.context.GetAPIObject() spec := d.context.GetSpec() status, lastVersion := d.context.GetStatus() - newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods, d.context.GetTLSKeyfile, d.context.GetTLSCA) + newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods, d.context.GetTLSKeyfile, d.context.GetTLSCA, d.context.GetPvc) // If not change, we're done if !changed { @@ -80,7 +80,8 @@ func createPlan(log zerolog.Logger, apiObject metav1.Object, currentPlan api.Plan, spec api.DeploymentSpec, status api.DeploymentStatus, pods []v1.Pod, getTLSKeyfile func(group api.ServerGroup, member api.MemberStatus) (string, error), - getTLSCA func(string) (string, string, bool, error)) (api.Plan, bool) { + getTLSCA func(string) (string, string, bool, error), + getPVC func(pvcName string) (*v1.PersistentVolumeClaim, error)) (api.Plan, bool) { if len(currentPlan) > 0 { // Plan already exists, complete that first return currentPlan, false @@ -175,14 +176,19 @@ func createPlan(log zerolog.Logger, apiObject metav1.Object, }) } - // Check for the need to rotate TLS CA certificate and all members + // Check for the need to rotate TLS certificate of a members if len(plan) == 0 { - plan = createRotateTLSCAPlan(log, spec, status, getTLSCA) + plan = createRotateTLSServerCertificatePlan(log, spec, status, getTLSKeyfile) } - // Check for the need to rotate TLS certificate of a members + // Check for changes storage classes or requirements if len(plan) == 0 { - plan = createRotateTLSServerCertificatePlan(log, spec, status, getTLSKeyfile) + plan = createRotateServerStoragePlan(log, spec, status, getPVC) + } + + // Check for the need to rotate TLS CA certificate and all members + if len(plan) == 0 { + plan = createRotateTLSCAPlan(log, spec, status, getTLSCA) } // Return plan diff --git a/pkg/deployment/reconcile/plan_builder_storage.go b/pkg/deployment/reconcile/plan_builder_storage.go new file mode 100644 index 000000000..ce88de9db --- /dev/null +++ b/pkg/deployment/reconcile/plan_builder_storage.go @@ -0,0 +1,111 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package reconcile + +import ( + "github.com/rs/zerolog" + "k8s.io/api/core/v1" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/arangodb/kube-arangodb/pkg/util" +) + +// createRotateServerStoragePlan creates plan to rotate a server and its volume because of a +// different storage class or a difference in storage resource requirements. +func createRotateServerStoragePlan(log zerolog.Logger, spec api.DeploymentSpec, status api.DeploymentStatus, + getPVC func(pvcName string) (*v1.PersistentVolumeClaim, error)) api.Plan { + if spec.GetMode() == api.DeploymentModeSingle { + // Storage cannot be changed in single server deployments + return nil + } + var plan api.Plan + status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { + for _, m := range members { + if len(plan) > 0 { + // Only 1 change at a time + continue + } + if m.Phase != api.MemberPhaseCreated { + // Only make changes when phase is created + continue + } + if m.PersistentVolumeClaimName == "" { + // Plan is irrelevant without PVC + continue + } + if group == api.ServerGroupSyncWorkers { + // SyncWorkers have no externally created TLS keyfile + continue + } + groupSpec := spec.GetServerGroupSpec(group) + storageClassName := groupSpec.GetStorageClassName() + if storageClassName == "" { + // Using default storage class name + continue + } + // Load PVC + pvc, err := getPVC(m.PersistentVolumeClaimName) + if err != nil { + log.Warn().Err(err). + Str("role", group.AsRole()). + Str("id", m.ID). + Msg("Failed to get PVC") + continue + } + replacementNeeded := false + if util.StringOrDefault(pvc.Spec.StorageClassName) != storageClassName { + // Storageclass has changed + replacementNeeded = true + } + if replacementNeeded { + if group != api.ServerGroupAgents { + plan = append(plan, + // Scale up, so we're sure that a new member is available + api.NewAction(api.ActionTypeAddMember, group, ""), + api.NewAction(api.ActionTypeWaitForMemberUp, group, api.MemberIDPreviousAction), + ) + } + if group == api.ServerGroupDBServers { + plan = append(plan, + // Cleanout + api.NewAction(api.ActionTypeCleanOutMember, group, m.ID), + ) + } + plan = append(plan, + // Shutdown & remove the server + api.NewAction(api.ActionTypeShutdownMember, group, m.ID), + api.NewAction(api.ActionTypeRemoveMember, group, m.ID), + ) + if group == api.ServerGroupAgents { + plan = append(plan, + // Scale up, so we're adding the remove agent + api.NewAction(api.ActionTypeAddMember, group, ""), + api.NewAction(api.ActionTypeWaitForMemberUp, group, api.MemberIDPreviousAction), + ) + } + } + } + return nil + }) + return plan +} diff --git a/pkg/deployment/reconcile/plan_executor.go b/pkg/deployment/reconcile/plan_executor.go index 763d308aa..a423a7b64 100644 --- a/pkg/deployment/reconcile/plan_executor.go +++ b/pkg/deployment/reconcile/plan_executor.go @@ -76,6 +76,10 @@ func (d *Reconciler) ExecutePlan(ctx context.Context) (bool, error) { if ready { // Remove action from list status.Plan = status.Plan[1:] + if len(status.Plan) > 0 && status.Plan[0].MemberID == api.MemberIDPreviousAction { + // Fill in MemberID from previous action + status.Plan[0].MemberID = action.MemberID() + } } else { // Mark start time now := metav1.Now() @@ -105,6 +109,10 @@ func (d *Reconciler) ExecutePlan(ctx context.Context) (bool, error) { status, lastVersion := d.context.GetStatus() // Remove action from list status.Plan = status.Plan[1:] + if len(status.Plan) > 0 && status.Plan[0].MemberID == api.MemberIDPreviousAction { + // Fill in MemberID from previous action + status.Plan[0].MemberID = action.MemberID() + } // Save plan update if err := d.context.UpdateStatus(status, lastVersion); err != nil { log.Debug().Err(err).Msg("Failed to update CR status") From e9a5f713e2cb419d3f1a99b7f8f75310a99a1367 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Fri, 15 Jun 2018 14:57:41 +0200 Subject: [PATCH 2/5] Fixed agents --- pkg/deployment/reconcile/plan_builder_storage.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/deployment/reconcile/plan_builder_storage.go b/pkg/deployment/reconcile/plan_builder_storage.go index ce88de9db..4ac331975 100644 --- a/pkg/deployment/reconcile/plan_builder_storage.go +++ b/pkg/deployment/reconcile/plan_builder_storage.go @@ -98,9 +98,9 @@ func createRotateServerStoragePlan(log zerolog.Logger, spec api.DeploymentSpec, ) if group == api.ServerGroupAgents { plan = append(plan, - // Scale up, so we're adding the remove agent - api.NewAction(api.ActionTypeAddMember, group, ""), - api.NewAction(api.ActionTypeWaitForMemberUp, group, api.MemberIDPreviousAction), + // Scale up, so we're adding the removed agent (note: with the old ID) + api.NewAction(api.ActionTypeAddMember, group, m.ID), + api.NewAction(api.ActionTypeWaitForMemberUp, group, m.ID), ) } } From ccc65c7abb476dfffd110e77c0b8fa1d5b3760c7 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Fri, 15 Jun 2018 15:09:05 +0200 Subject: [PATCH 3/5] Revert changes in example --- examples/arango-local-storage.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/arango-local-storage.yaml b/examples/arango-local-storage.yaml index c89f918de..6c986ecd0 100644 --- a/examples/arango-local-storage.yaml +++ b/examples/arango-local-storage.yaml @@ -1,10 +1,10 @@ apiVersion: "storage.arangodb.com/v1alpha" kind: "ArangoLocalStorage" metadata: - name: "arangodb-new-local-storage" + name: "arangodb-local-storage" spec: storageClass: - name: my-new-local-ssd + name: my-local-ssd isDefault: true localPath: - - /var/lib/arango-storage-new + - /var/lib/arango-storage From ec7eebe40a9ab646ae524655367add2e5bf4b46b Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Mon, 18 Jun 2018 11:15:46 +0200 Subject: [PATCH 4/5] Improved availability check for active failover --- .../reconcile/action_wait_for_member_up.go | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pkg/deployment/reconcile/action_wait_for_member_up.go b/pkg/deployment/reconcile/action_wait_for_member_up.go index a42f92bda..20adfbabc 100644 --- a/pkg/deployment/reconcile/action_wait_for_member_up.go +++ b/pkg/deployment/reconcile/action_wait_for_member_up.go @@ -74,7 +74,7 @@ func (a *actionWaitForMemberUp) CheckProgress(ctx context.Context) (bool, bool, if a.action.Group == api.ServerGroupAgents { return a.checkProgressAgent(ctx) } - return a.checkProgressSingle(ctx) + return a.checkProgressSingleInActiveFailover(ctx) default: if a.action.Group == api.ServerGroupAgents { return a.checkProgressAgent(ctx) @@ -99,6 +99,26 @@ func (a *actionWaitForMemberUp) checkProgressSingle(ctx context.Context) (bool, return true, false, nil } +// checkProgressSingleInActiveFailover checks the progress of the action in the case +// of a single server as part of an active failover deployment. +func (a *actionWaitForMemberUp) checkProgressSingleInActiveFailover(ctx context.Context) (bool, bool, error) { + log := a.log + c, err := a.actionCtx.GetDatabaseClient(ctx) + if err != nil { + log.Debug().Err(err).Msg("Failed to create database client") + return false, false, maskAny(err) + } + if _, err := c.Version(ctx); err != nil { + log.Debug().Err(err).Msg("Failed to get version") + return false, false, maskAny(err) + } + if _, err := c.Databases(ctx); err != nil { + log.Debug().Err(err).Msg("Failed to get databases") + return false, false, maskAny(err) + } + return true, false, nil +} + // checkProgressAgent checks the progress of the action in the case // of an agent. func (a *actionWaitForMemberUp) checkProgressAgent(ctx context.Context) (bool, bool, error) { From cac9f2a11188dd0c6085844475936fcfefcc51d6 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Mon, 18 Jun 2018 11:18:05 +0200 Subject: [PATCH 5/5] Record event on not being allowed to change storage class --- pkg/deployment/reconcile/plan_builder.go | 9 +-- .../reconcile/plan_builder_storage.go | 60 ++++++++++--------- pkg/deployment/reconcile/plan_builder_test.go | 31 +++++++--- pkg/util/k8sutil/events.go | 10 ++++ 4 files changed, 69 insertions(+), 41 deletions(-) diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index a82a8ed70..484f05013 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -54,7 +54,7 @@ func (d *Reconciler) CreatePlan() error { apiObject := d.context.GetAPIObject() spec := d.context.GetSpec() status, lastVersion := d.context.GetStatus() - newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods, d.context.GetTLSKeyfile, d.context.GetTLSCA, d.context.GetPvc) + newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods, d.context.GetTLSKeyfile, d.context.GetTLSCA, d.context.GetPvc, d.context.CreateEvent) // If not change, we're done if !changed { @@ -76,12 +76,13 @@ func (d *Reconciler) CreatePlan() error { // createPlan considers the given specification & status and creates a plan to get the status in line with the specification. // If a plan already exists, the given plan is returned with false. // Otherwise the new plan is returned with a boolean true. -func createPlan(log zerolog.Logger, apiObject metav1.Object, +func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject, currentPlan api.Plan, spec api.DeploymentSpec, status api.DeploymentStatus, pods []v1.Pod, getTLSKeyfile func(group api.ServerGroup, member api.MemberStatus) (string, error), getTLSCA func(string) (string, string, bool, error), - getPVC func(pvcName string) (*v1.PersistentVolumeClaim, error)) (api.Plan, bool) { + getPVC func(pvcName string) (*v1.PersistentVolumeClaim, error), + createEvent func(evt *v1.Event)) (api.Plan, bool) { if len(currentPlan) > 0 { // Plan already exists, complete that first return currentPlan, false @@ -183,7 +184,7 @@ func createPlan(log zerolog.Logger, apiObject metav1.Object, // Check for changes storage classes or requirements if len(plan) == 0 { - plan = createRotateServerStoragePlan(log, spec, status, getPVC) + plan = createRotateServerStoragePlan(log, apiObject, spec, status, getPVC, createEvent) } // Check for the need to rotate TLS CA certificate and all members diff --git a/pkg/deployment/reconcile/plan_builder_storage.go b/pkg/deployment/reconcile/plan_builder_storage.go index 4ac331975..d0f93440c 100644 --- a/pkg/deployment/reconcile/plan_builder_storage.go +++ b/pkg/deployment/reconcile/plan_builder_storage.go @@ -28,12 +28,14 @@ import ( api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" "github.com/arangodb/kube-arangodb/pkg/util" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" ) // createRotateServerStoragePlan creates plan to rotate a server and its volume because of a // different storage class or a difference in storage resource requirements. -func createRotateServerStoragePlan(log zerolog.Logger, spec api.DeploymentSpec, status api.DeploymentStatus, - getPVC func(pvcName string) (*v1.PersistentVolumeClaim, error)) api.Plan { +func createRotateServerStoragePlan(log zerolog.Logger, apiObject k8sutil.APIObject, spec api.DeploymentSpec, status api.DeploymentStatus, + getPVC func(pvcName string) (*v1.PersistentVolumeClaim, error), + createEvent func(evt *v1.Event)) api.Plan { if spec.GetMode() == api.DeploymentModeSingle { // Storage cannot be changed in single server deployments return nil @@ -53,10 +55,6 @@ func createRotateServerStoragePlan(log zerolog.Logger, spec api.DeploymentSpec, // Plan is irrelevant without PVC continue } - if group == api.ServerGroupSyncWorkers { - // SyncWorkers have no externally created TLS keyfile - continue - } groupSpec := spec.GetServerGroupSpec(group) storageClassName := groupSpec.GetStorageClassName() if storageClassName == "" { @@ -78,30 +76,36 @@ func createRotateServerStoragePlan(log zerolog.Logger, spec api.DeploymentSpec, replacementNeeded = true } if replacementNeeded { - if group != api.ServerGroupAgents { - plan = append(plan, - // Scale up, so we're sure that a new member is available - api.NewAction(api.ActionTypeAddMember, group, ""), - api.NewAction(api.ActionTypeWaitForMemberUp, group, api.MemberIDPreviousAction), - ) - } - if group == api.ServerGroupDBServers { - plan = append(plan, - // Cleanout - api.NewAction(api.ActionTypeCleanOutMember, group, m.ID), - ) - } - plan = append(plan, - // Shutdown & remove the server - api.NewAction(api.ActionTypeShutdownMember, group, m.ID), - api.NewAction(api.ActionTypeRemoveMember, group, m.ID), - ) - if group == api.ServerGroupAgents { + if group != api.ServerGroupAgents && group != api.ServerGroupDBServers { + // Only agents & dbservers are allowed to change their storage class. + createEvent(k8sutil.NewCannotChangeStorageClassEvent(apiObject, m.ID, group.AsRole(), "Not supported")) + continue + } else { + if group != api.ServerGroupAgents { + plan = append(plan, + // Scale up, so we're sure that a new member is available + api.NewAction(api.ActionTypeAddMember, group, ""), + api.NewAction(api.ActionTypeWaitForMemberUp, group, api.MemberIDPreviousAction), + ) + } + if group == api.ServerGroupDBServers { + plan = append(plan, + // Cleanout + api.NewAction(api.ActionTypeCleanOutMember, group, m.ID), + ) + } plan = append(plan, - // Scale up, so we're adding the removed agent (note: with the old ID) - api.NewAction(api.ActionTypeAddMember, group, m.ID), - api.NewAction(api.ActionTypeWaitForMemberUp, group, m.ID), + // Shutdown & remove the server + api.NewAction(api.ActionTypeShutdownMember, group, m.ID), + api.NewAction(api.ActionTypeRemoveMember, group, m.ID), ) + if group == api.ServerGroupAgents { + plan = append(plan, + // Scale up, so we're adding the removed agent (note: with the old ID) + api.NewAction(api.ActionTypeAddMember, group, m.ID), + api.NewAction(api.ActionTypeWaitForMemberUp, group, m.ID), + ) + } } } } diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index 9948ae722..833e1d3a7 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -29,6 +29,7 @@ import ( "github.com/rs/zerolog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" @@ -43,6 +44,10 @@ func TestCreatePlanSingleScale(t *testing.T) { getTLSCA := func(string) (string, string, bool, error) { return "", "", false, maskAny(fmt.Errorf("Not implemented")) } + getPVC := func(pvcName string) (*v1.PersistentVolumeClaim, error) { + return nil, maskAny(fmt.Errorf("Not implemented")) + } + createEvent := func(evt *v1.Event) {} log := zerolog.Nop() spec := api.DeploymentSpec{ Mode: api.NewMode(api.DeploymentModeSingle), @@ -58,7 +63,7 @@ func TestCreatePlanSingleScale(t *testing.T) { // Test with empty status var status api.DeploymentStatus - newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) + newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA, getPVC, createEvent) assert.True(t, changed) assert.Len(t, newPlan, 0) // Single mode does not scale @@ -69,7 +74,7 @@ func TestCreatePlanSingleScale(t *testing.T) { PodName: "something", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA, getPVC, createEvent) assert.True(t, changed) assert.Len(t, newPlan, 0) // Single mode does not scale @@ -84,7 +89,7 @@ func TestCreatePlanSingleScale(t *testing.T) { PodName: "something1", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA, getPVC, createEvent) assert.True(t, changed) assert.Len(t, newPlan, 0) // Single mode does not scale } @@ -97,6 +102,10 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) { getTLSCA := func(string) (string, string, bool, error) { return "", "", false, maskAny(fmt.Errorf("Not implemented")) } + getPVC := func(pvcName string) (*v1.PersistentVolumeClaim, error) { + return nil, maskAny(fmt.Errorf("Not implemented")) + } + createEvent := func(evt *v1.Event) {} log := zerolog.Nop() spec := api.DeploymentSpec{ Mode: api.NewMode(api.DeploymentModeActiveFailover), @@ -113,7 +122,7 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) { // Test with empty status var status api.DeploymentStatus - newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) + newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA, getPVC, createEvent) assert.True(t, changed) require.Len(t, newPlan, 2) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -126,7 +135,7 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) { PodName: "something", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA, getPVC, createEvent) assert.True(t, changed) require.Len(t, newPlan, 1) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -151,7 +160,7 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) { PodName: "something4", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA, getPVC, createEvent) assert.True(t, changed) require.Len(t, newPlan, 2) // Note: Downscaling is only down 1 at a time assert.Equal(t, api.ActionTypeShutdownMember, newPlan[0].Type) @@ -168,6 +177,10 @@ func TestCreatePlanClusterScale(t *testing.T) { getTLSCA := func(string) (string, string, bool, error) { return "", "", false, maskAny(fmt.Errorf("Not implemented")) } + getPVC := func(pvcName string) (*v1.PersistentVolumeClaim, error) { + return nil, maskAny(fmt.Errorf("Not implemented")) + } + createEvent := func(evt *v1.Event) {} log := zerolog.Nop() spec := api.DeploymentSpec{ Mode: api.NewMode(api.DeploymentModeCluster), @@ -183,7 +196,7 @@ func TestCreatePlanClusterScale(t *testing.T) { // Test with empty status var status api.DeploymentStatus - newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) + newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA, getPVC, createEvent) assert.True(t, changed) require.Len(t, newPlan, 6) // Adding 3 dbservers & 3 coordinators (note: agents do not scale now) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -216,7 +229,7 @@ func TestCreatePlanClusterScale(t *testing.T) { PodName: "coordinator1", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA, getPVC, createEvent) assert.True(t, changed) require.Len(t, newPlan, 3) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -253,7 +266,7 @@ func TestCreatePlanClusterScale(t *testing.T) { } spec.DBServers.Count = util.NewInt(1) spec.Coordinators.Count = util.NewInt(1) - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA, getPVC, createEvent) assert.True(t, changed) require.Len(t, newPlan, 5) // Note: Downscaling is done 1 at a time assert.Equal(t, api.ActionTypeCleanOutMember, newPlan[0].Type) diff --git a/pkg/util/k8sutil/events.go b/pkg/util/k8sutil/events.go index 599257fad..466fdf861 100644 --- a/pkg/util/k8sutil/events.go +++ b/pkg/util/k8sutil/events.go @@ -165,6 +165,16 @@ func NewPlanAbortedEvent(apiObject APIObject, itemType, memberID, role string) * return event } +// NewCannotChangeStorageClassEvent creates an event indicating that an item would need to use a different StorageClass, +// but this is not possible for the given reason. +func NewCannotChangeStorageClassEvent(apiObject APIObject, memberID, role, subReason string) *v1.Event { + event := newDeploymentEvent(apiObject) + event.Type = v1.EventTypeNormal + event.Reason = fmt.Sprintf("%s Member StorageClass Cannot Change", strings.Title(role)) + event.Message = fmt.Sprintf("Member %s with role %s should use a different StorageClass, but is cannot because: %s", memberID, role, subReason) + return event +} + // NewErrorEvent creates an even of type error. func NewErrorEvent(reason string, err error, apiObject APIObject) *v1.Event { event := newDeploymentEvent(apiObject)