From 1a415a5527f039e487153ebddb939e31dfcab126 Mon Sep 17 00:00:00 2001 From: Raghuram Devarakonda Date: Fri, 21 Feb 2020 11:43:16 -0500 Subject: [PATCH] Support non-DR restore. Previously, the only restore supported was in a DR scenario and required usage of a separate python package ("kubedrctl"). With this change, one can create a resource "MetadataRestore" and etcd snapshot (and optionally certificates) will be restored to the given PVC. --- Makefile | 2 +- docs/userguide/source/monitoring.rst | 5 + kubedr/PROJECT | 3 + .../v1alpha1/metadatabackuppolicy_types.go | 3 + kubedr/api/v1alpha1/metadatarestore_types.go | 83 +++++ kubedr/api/v1alpha1/zz_generated.deepcopy.go | 89 +++++ ...icsoftware.com_metadatabackuppolicies.yaml | 2 + ...atalogicsoftware.com_metadatarestores.yaml | 76 +++++ kubedr/config/crd/kustomization.yaml | 3 + .../cainjection_in_metadatarestores.yaml | 8 + .../patches/webhook_in_metadatarestores.yaml | 17 + .../kubedr_v1alpha1_metadatarestore.yaml | 7 + .../controllers/backuplocation_controller.go | 3 +- .../metadatabackuppolicy_controller.go | 1 + .../controllers/metadatarestore_controller.go | 306 ++++++++++++++++++ kubedr/controllers/suite_test.go | 3 + kubedr/main.go | 8 + tests/conftest.py | 3 + tests/src/common/kubeclient.py | 46 ++- tests/src/common/util.py | 47 +++ tests/src/test_backup.py | 52 +++ 21 files changed, 760 insertions(+), 7 deletions(-) create mode 100644 kubedr/api/v1alpha1/metadatarestore_types.go create mode 100644 kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatarestores.yaml create mode 100644 kubedr/config/crd/patches/cainjection_in_metadatarestores.yaml create mode 100644 kubedr/config/crd/patches/webhook_in_metadatarestores.yaml create mode 100644 kubedr/config/samples/kubedr_v1alpha1_metadatarestore.yaml create mode 100644 kubedr/controllers/metadatarestore_controller.go diff --git a/Makefile b/Makefile index 4904bce..59415e2 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ DOCKER_KUBEDR_IMAGE_TAG ?= latest DOCKER_KUBEDR_IMAGE_NAME_SHORT ?= kubedr DOCKER_KUBEDR_IMAGE_NAME_LONG ?= ${DOCKER_PREFIX}${DOCKER_KUBEDR_IMAGE_NAME_SHORT} -DOCKER_KUBEDRUTIL_IMAGE_TAG ?= 0.2.10 +DOCKER_KUBEDRUTIL_IMAGE_TAG ?= 0.2.11 DOCKER_KUBEDRUTIL_IMAGE_NAME_SHORT ?= kubedrutil DOCKER_KUBEDRUTIL_IMAGE_NAME_LONG ?= ${DOCKER_PREFIX}${DOCKER_KUBEDRUTIL_IMAGE_NAME_SHORT} diff --git a/docs/userguide/source/monitoring.rst b/docs/userguide/source/monitoring.rst index cc8a4b8..412c96b 100644 --- a/docs/userguide/source/monitoring.rst +++ b/docs/userguide/source/monitoring.rst @@ -103,10 +103,15 @@ An example:: dataAdded: 1573023 filesChanged: 1 filesNew: 0 + mbrName': mbr-4c1223d6 snapshotId: b0f347ef totalBytesProcessed: 15736864 totalDurationSecs: "0.318463127" +Apart from the stats regarding the backup, the status also contains +the name of the ``MetadataBackupRecord`` resource that is required to +perform restores. + Events ====== diff --git a/kubedr/PROJECT b/kubedr/PROJECT index ba05779..7123be5 100644 --- a/kubedr/PROJECT +++ b/kubedr/PROJECT @@ -11,3 +11,6 @@ resources: - group: kubedr version: v1alpha1 kind: MetadataBackupRecord +- group: kubedr + version: v1alpha1 + kind: MetadataRestore diff --git a/kubedr/api/v1alpha1/metadatabackuppolicy_types.go b/kubedr/api/v1alpha1/metadatabackuppolicy_types.go index dec1d8a..8ddf42f 100644 --- a/kubedr/api/v1alpha1/metadatabackuppolicy_types.go +++ b/kubedr/api/v1alpha1/metadatabackuppolicy_types.go @@ -94,6 +94,9 @@ type MetadataBackupPolicyStatus struct { // Name of the pod that performed the backup. // +kubebuilder:validation:Optional BackupPod string `json:"backupPod"` + + // +kubebuilder:validation:Optional + MBRName string `json:"mbrName"` } // +kubebuilder:object:root=true diff --git a/kubedr/api/v1alpha1/metadatarestore_types.go b/kubedr/api/v1alpha1/metadatarestore_types.go new file mode 100644 index 0000000..d6fffbe --- /dev/null +++ b/kubedr/api/v1alpha1/metadatarestore_types.go @@ -0,0 +1,83 @@ +/* +Copyright 2020 Catalogic Software + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// MetadataRestoreSpec defines the desired state of MetadataRestore +type MetadataRestoreSpec struct { + // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster + // Important: Run "make" to regenerate code after modifying this file + + // kubebuilder:validation:MinLength:=1 + MBRName string `json:"mbrName"` + + // kubebuilder:validation:MinLength:=1 + PVCName string `json:"pvcName"` +} + +// MetadataRestoreStatus defines the observed state of MetadataRestore +type MetadataRestoreStatus struct { + // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster + // Important: Run "make" to regenerate code after modifying this file + + // +kubebuilder:validation:Optional + ObservedGeneration int64 `json:"observedGeneration"` + + RestoreStatus string `json:"restoreStatus"` + + // +kubebuilder:validation:Optional + RestoreErrorMessage string `json:"restoreErrorMessage"` + + RestoreTime string `json:"restoreTime"` +} + +// The creation of this resource triggers full restore of the data +// (etcd snapshot and certificates (if they were part of the backup). +// It would have been ideal to use a custom subresource (such as +// "/restore" but custom subresources are not yet supported for +// custom resources. + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status + +// MetadataRestore is the Schema for the metadatarestores API +type MetadataRestore struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec MetadataRestoreSpec `json:"spec,omitempty"` + Status MetadataRestoreStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// MetadataRestoreList contains a list of MetadataRestore +type MetadataRestoreList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []MetadataRestore `json:"items"` +} + +func init() { + SchemeBuilder.Register(&MetadataRestore{}, &MetadataRestoreList{}) +} diff --git a/kubedr/api/v1alpha1/zz_generated.deepcopy.go b/kubedr/api/v1alpha1/zz_generated.deepcopy.go index c0de015..ddfadc8 100644 --- a/kubedr/api/v1alpha1/zz_generated.deepcopy.go +++ b/kubedr/api/v1alpha1/zz_generated.deepcopy.go @@ -308,3 +308,92 @@ func (in *MetadataBackupRecordStatus) DeepCopy() *MetadataBackupRecordStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetadataRestore) DeepCopyInto(out *MetadataRestore) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + out.Status = in.Status +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetadataRestore. +func (in *MetadataRestore) DeepCopy() *MetadataRestore { + if in == nil { + return nil + } + out := new(MetadataRestore) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetadataRestore) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetadataRestoreList) DeepCopyInto(out *MetadataRestoreList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MetadataRestore, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetadataRestoreList. +func (in *MetadataRestoreList) DeepCopy() *MetadataRestoreList { + if in == nil { + return nil + } + out := new(MetadataRestoreList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MetadataRestoreList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetadataRestoreSpec) DeepCopyInto(out *MetadataRestoreSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetadataRestoreSpec. +func (in *MetadataRestoreSpec) DeepCopy() *MetadataRestoreSpec { + if in == nil { + return nil + } + out := new(MetadataRestoreSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetadataRestoreStatus) DeepCopyInto(out *MetadataRestoreStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetadataRestoreStatus. +func (in *MetadataRestoreStatus) DeepCopy() *MetadataRestoreStatus { + if in == nil { + return nil + } + out := new(MetadataRestoreStatus) + in.DeepCopyInto(out) + return out +} diff --git a/kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatabackuppolicies.yaml b/kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatabackuppolicies.yaml index b45d707..4a79c61 100644 --- a/kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatabackuppolicies.yaml +++ b/kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatabackuppolicies.yaml @@ -91,6 +91,8 @@ spec: type: integer filesNew: type: integer + mbrName: + type: string snapshotId: type: string totalBytesProcessed: diff --git a/kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatarestores.yaml b/kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatarestores.yaml new file mode 100644 index 0000000..843f174 --- /dev/null +++ b/kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatarestores.yaml @@ -0,0 +1,76 @@ + +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.2.2 + creationTimestamp: null + name: metadatarestores.kubedr.catalogicsoftware.com +spec: + group: kubedr.catalogicsoftware.com + names: + kind: MetadataRestore + listKind: MetadataRestoreList + plural: metadatarestores + singular: metadatarestore + scope: "" + subresources: + status: {} + validation: + openAPIV3Schema: + description: MetadataRestore is the Schema for the metadatarestores API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: MetadataRestoreSpec defines the desired state of MetadataRestore + properties: + mbrName: + description: kubebuilder:validation:MinLength:=1 + type: string + pvcName: + description: kubebuilder:validation:MinLength:=1 + type: string + required: + - mbrName + - pvcName + type: object + status: + description: MetadataRestoreStatus defines the observed state of MetadataRestore + properties: + observedGeneration: + format: int64 + type: integer + restoreErrorMessage: + type: string + restoreStatus: + type: string + restoreTime: + type: string + required: + - restoreStatus + - restoreTime + type: object + type: object + version: v1alpha1 + versions: + - name: v1alpha1 + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/kubedr/config/crd/kustomization.yaml b/kubedr/config/crd/kustomization.yaml index 096a8a1..8a0fd7d 100644 --- a/kubedr/config/crd/kustomization.yaml +++ b/kubedr/config/crd/kustomization.yaml @@ -5,6 +5,7 @@ resources: - bases/kubedr.catalogicsoftware.com_backuplocations.yaml - bases/kubedr.catalogicsoftware.com_metadatabackuppolicies.yaml - bases/kubedr.catalogicsoftware.com_metadatabackuprecords.yaml +- bases/kubedr.catalogicsoftware.com_metadatarestores.yaml # +kubebuilder:scaffold:crdkustomizeresource patchesStrategicMerge: @@ -13,6 +14,7 @@ patchesStrategicMerge: # - patches/webhook_in_backuplocations.yaml # - patches/webhook_in_metadatabackuppolicies.yaml # - patches/webhook_in_metadatabackuprecords.yaml +#- patches/webhook_in_metadatarestores.yaml # +kubebuilder:scaffold:crdkustomizewebhookpatch # [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix. @@ -20,6 +22,7 @@ patchesStrategicMerge: # - patches/cainjection_in_backuplocations.yaml # - patches/cainjection_in_metadatabackuppolicies.yaml # - patches/cainjection_in_metadatabackuprecords.yaml +#- patches/cainjection_in_metadatarestores.yaml # +kubebuilder:scaffold:crdkustomizecainjectionpatch # the following config is for teaching kustomize how to do kustomization for CRDs. diff --git a/kubedr/config/crd/patches/cainjection_in_metadatarestores.yaml b/kubedr/config/crd/patches/cainjection_in_metadatarestores.yaml new file mode 100644 index 0000000..2718196 --- /dev/null +++ b/kubedr/config/crd/patches/cainjection_in_metadatarestores.yaml @@ -0,0 +1,8 @@ +# The following patch adds a directive for certmanager to inject CA into the CRD +# CRD conversion requires k8s 1.13 or later. +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + name: metadatarestores.kubedr.catalogicsoftware.com diff --git a/kubedr/config/crd/patches/webhook_in_metadatarestores.yaml b/kubedr/config/crd/patches/webhook_in_metadatarestores.yaml new file mode 100644 index 0000000..d66ee69 --- /dev/null +++ b/kubedr/config/crd/patches/webhook_in_metadatarestores.yaml @@ -0,0 +1,17 @@ +# The following patch enables conversion webhook for CRD +# CRD conversion requires k8s 1.13 or later. +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: metadatarestores.kubedr.catalogicsoftware.com +spec: + conversion: + strategy: Webhook + webhookClientConfig: + # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, + # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) + caBundle: Cg== + service: + namespace: system + name: webhook-service + path: /convert diff --git a/kubedr/config/samples/kubedr_v1alpha1_metadatarestore.yaml b/kubedr/config/samples/kubedr_v1alpha1_metadatarestore.yaml new file mode 100644 index 0000000..ab97ba2 --- /dev/null +++ b/kubedr/config/samples/kubedr_v1alpha1_metadatarestore.yaml @@ -0,0 +1,7 @@ +apiVersion: kubedr.catalogicsoftware.com/v1alpha1 +kind: MetadataRestore +metadata: + name: metadatarestore-sample +spec: + # Add fields here + foo: bar diff --git a/kubedr/controllers/backuplocation_controller.go b/kubedr/controllers/backuplocation_controller.go index 8dcb4f2..7489022 100644 --- a/kubedr/controllers/backuplocation_controller.go +++ b/kubedr/controllers/backuplocation_controller.go @@ -212,9 +212,10 @@ func (r *BackupLocationReconciler) Reconcile(req ctrl.Request) (ctrl.Result, err return ctrl.Result{}, err } - log.Info("Creating a new Pod", "Pod.Namespace", initPod.Namespace, "Pod.Name", initPod.Name) + log.Info("Starting a new Pod", "Pod.Namespace", initPod.Namespace, "Pod.Name", initPod.Name) err = r.Create(ctx, initPod) if err != nil { + r.Log.Error(err, "Error in starting init pod") r.setStatus(&backupLoc, "Failed", err.Error()) return ctrl.Result{}, err } diff --git a/kubedr/controllers/metadatabackuppolicy_controller.go b/kubedr/controllers/metadatabackuppolicy_controller.go index f9b7a21..a0c2079 100644 --- a/kubedr/controllers/metadatabackuppolicy_controller.go +++ b/kubedr/controllers/metadatabackuppolicy_controller.go @@ -203,6 +203,7 @@ func (r *MetadataBackupPolicyReconciler) processSpecAndStatus(policy *kubedrv1al } func (r *MetadataBackupPolicyReconciler) setStatus(policy *kubedrv1alpha1.MetadataBackupPolicy) { + policy.Status.MBRName = "" policy.Status.BackupStatus = "Initializing" policy.Status.BackupTime = metav1.Now().String() diff --git a/kubedr/controllers/metadatarestore_controller.go b/kubedr/controllers/metadatarestore_controller.go new file mode 100644 index 0000000..76b3c49 --- /dev/null +++ b/kubedr/controllers/metadatarestore_controller.go @@ -0,0 +1,306 @@ +/* +Copyright 2020 Catalogic Software + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + "os" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + kubedrv1alpha1 "kubedr/api/v1alpha1" +) + +// MetadataRestoreReconciler reconciles a MetadataRestore object +type MetadataRestoreReconciler struct { + client.Client + Log logr.Logger + Scheme *runtime.Scheme +} + +func (r *MetadataRestoreReconciler) setStatus(mr *kubedrv1alpha1.MetadataRestore, status string, errmsg string) { + mr.Status.ObservedGeneration = mr.ObjectMeta.Generation + + mr.Status.RestoreStatus = status + mr.Status.RestoreErrorMessage = errmsg + mr.Status.RestoreTime = metav1.Now().String() + + r.Log.Info("Updating status...") + if err := r.Status().Update(context.Background(), mr); err != nil { + r.Log.Error(err, "unable to update MetadataRestore status") + } +} + +// +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=metadatarestores,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=metadatarestores/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=metadatabackuprecords/status,verbs=get +// +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=backuplocations/status,verbs=get +// +kubebuilder:rbac:groups=core,resources=pods,verbs=create;get + +/* + * Top level Reconcile logic + * + * - If generation number hasn't changed, do nothing. We don't want to process updates + * unless spec has changed. + * + * - Check if the annotation, which indicates that this restore resource is already + * processed, is present. If so, there is nothing more to do. If not, proceed with + * restore logic. + * + * - There is nothing to do for deletion so we don't add any finalizers. + * + * - If there is a previous restore pod for this resource, delete the pod. + * + * - Create the pod that will restore the data. The kubedrutil "restore" command + * will call restic to restore the data and then, it will set the annotation to + * indicate that this resource is processed. + * + * - The "restore" command will also set the status both in case of success and + * failure. + */ + +// Reconcile is the the main entry point called by the framework. +func (r *MetadataRestoreReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { + ctx := context.Background() + + var mr kubedrv1alpha1.MetadataRestore + if err := r.Get(ctx, req.NamespacedName, &mr); err != nil { + if apierrors.IsNotFound(err) { + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification). + r.Log.Info("MetadataRestore (" + req.NamespacedName.Name + ") is not found") + return ctrl.Result{}, nil + } + + r.Log.Error(err, "unable to fetch MetadataRestore") + return ctrl.Result{}, err + } + + // Skip if spec hasn't changed. This check prevents reconcile on status + // updates. + if mr.Status.ObservedGeneration == mr.ObjectMeta.Generation { + r.Log.Info("Skipping reconcile as generation number hasn't changed") + return ctrl.Result{}, nil + } + + // No deletion logic as we don't really have anything to do during + // deletion of a MetadataRestore resource. + + // Check annotations to see if this resource was already processed + // and restore was successful. + restoreAnnotation := "restored.annotations.kubedr.catalogicsoftware.com" + + restored, exists := mr.ObjectMeta.Annotations[restoreAnnotation] + if exists && (restored == "true") { + // No need to process the resource as restore was done already. + r.Log.Info("Restore was already done") + return ctrl.Result{}, nil + } + + // We are deliberately avoiding any attempt to make the name unique. + // The client is in a better position to come up with a unique name. + // If we do switch to generating a unique name, we need to make sure + // that any previous pods are cleaned up. + podName := mr.Name + "-mr" + + // Since we don't generate a unique name for the pod that initializes the repo, + // we need to explicitly check and delete the pod if it exists. + var prevPod corev1.Pod + if err := r.Get(ctx, types.NamespacedName{Namespace: req.Namespace, Name: podName}, &prevPod); err == nil { + r.Log.Info("Found a previous restore pod, will delete it and continue...") + if err := r.Delete(ctx, &prevPod); ignoreNotFound(err) != nil { + r.Log.Error(err, "Error in deleting init pod") + return ctrl.Result{}, err + } + } + + pod, err := r.buildRestorePod(&mr, req.Namespace, podName) + if err != nil { + r.Log.Error(err, "Error in creating restore pod") + if apierrors.IsNotFound(err) { + // This shouldn't really happen but if an invalid MBR is given or + // if backup location inside the MBR is wrong, there is nothing we can + // do. + r.setStatus(&mr, "Failed", "Error in creating restore pod") + return ctrl.Result{}, nil + } + + return ctrl.Result{}, err + } + + if err := ctrl.SetControllerReference(&mr, pod, r.Scheme); err != nil { + return ctrl.Result{}, err + } + + r.Log.Info("Starting a new Pod", "Pod.Namespace", pod.Namespace, "Pod.Name", pod.Name) + err = r.Create(ctx, pod) + if err != nil { + r.Log.Error(err, "Error in starting restore pod") + r.setStatus(&mr, "Failed", err.Error()) + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +// SetupWithManager hooks up this controller with the manager. +func (r *MetadataRestoreReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&kubedrv1alpha1.MetadataRestore{}). + Complete(r) +} + +func getRepoData(backupLocation *kubedrv1alpha1.BackupLocation) (string, *corev1.SecretKeySelector, + *corev1.SecretKeySelector, *corev1.SecretKeySelector) { + + s3EndPoint := "s3:" + backupLocation.Spec.Url + "/" + backupLocation.Spec.BucketName + + accessKey := corev1.SecretKeySelector{} + accessKey.Name = backupLocation.Spec.Credentials + accessKey.Key = "access_key" + + secretKey := corev1.SecretKeySelector{} + secretKey.Name = backupLocation.Spec.Credentials + secretKey.Key = "secret_key" + + resticPassword := corev1.SecretKeySelector{} + resticPassword.Name = backupLocation.Spec.Credentials + resticPassword.Key = "restic_repo_password" + + return s3EndPoint, &accessKey, &secretKey, &resticPassword +} + +func (r *MetadataRestoreReconciler) buildRestorePod(cr *kubedrv1alpha1.MetadataRestore, + namespace string, podName string) (*corev1.Pod, error) { + + kubedrUtilImage := os.Getenv("KUBEDR_UTIL_IMAGE") + if kubedrUtilImage == "" { + // This should really not happen. + err := fmt.Errorf("KUBEDR_UTIL_IMAGE is not set") + r.Log.Error(err, "") + return nil, err + } + + mbr := &kubedrv1alpha1.MetadataBackupRecord{} + mbrKey := types.NamespacedName{Namespace: namespace, Name: cr.Spec.MBRName} + if err := r.Get(context.TODO(), mbrKey, mbr); err != nil { + return nil, err + } + + backupLocation := &kubedrv1alpha1.BackupLocation{} + backupLocKey := types.NamespacedName{Namespace: namespace, Name: mbr.Spec.Backuploc} + if err := r.Get(context.TODO(), backupLocKey, backupLocation); err != nil { + return nil, err + } + s3EndPoint, accessKey, secretKey, resticPassword := getRepoData(backupLocation) + + labels := map[string]string{ + "kubedr.type": "restore", + "kubedr.restore-mbr": mbr.Name, + } + + targetDirVolume := corev1.Volume{Name: "restore-target"} + targetDirVolume.PersistentVolumeClaim = &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: cr.Spec.PVCName} + + volumes := []corev1.Volume{ + targetDirVolume, + } + + env := []corev1.EnvVar{ + { + Name: "MY_POD_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + { + Name: "AWS_ACCESS_KEY", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: accessKey, + }, + }, + { + Name: "AWS_SECRET_KEY", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: secretKey, + }, + }, + { + Name: "RESTIC_PASSWORD", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: resticPassword, + }, + }, + { + Name: "KDR_MR_NAME", + Value: cr.Name, + }, + { + Name: "RESTIC_REPO", + Value: s3EndPoint, + }, + { + Name: "KDR_RESTORE_DEST", + Value: "/restore", + }, + } + + volumeMounts := []corev1.VolumeMount{ + { + Name: "restore-target", + MountPath: "/restore", + }, + } + + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Namespace: cr.Namespace, + Labels: labels, + }, + + Spec: corev1.PodSpec{ + RestartPolicy: "Never", + + Volumes: volumes, + + Containers: []corev1.Container{ + { + Name: cr.Name, + Image: kubedrUtilImage, + VolumeMounts: volumeMounts, + Env: env, + + Args: []string{ + "/usr/local/bin/kubedrutil", "restore", + }, + }, + }, + }, + }, nil +} diff --git a/kubedr/controllers/suite_test.go b/kubedr/controllers/suite_test.go index f10a5aa..667b549 100644 --- a/kubedr/controllers/suite_test.go +++ b/kubedr/controllers/suite_test.go @@ -70,6 +70,9 @@ var _ = BeforeSuite(func(done Done) { err = kubedrv1alpha1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) + err = kubedrv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + // +kubebuilder:scaffold:scheme k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) diff --git a/kubedr/main.go b/kubedr/main.go index d6c1878..6a32da3 100644 --- a/kubedr/main.go +++ b/kubedr/main.go @@ -118,6 +118,14 @@ func main() { os.Exit(1) } */ + if err = (&controllers.MetadataRestoreReconciler{ + Client: mgr.GetClient(), + Log: ctrl.Log.WithName("controllers").WithName("MetadataRestore"), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "MetadataRestore") + os.Exit(1) + } // +kubebuilder:scaffold:builder setupLog.Info("starting manager") diff --git a/tests/conftest.py b/tests/conftest.py index 9142368..c0b39d2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -53,7 +53,10 @@ def _init_apis(self): self.pod_api = kubeclient.PodAPI(self.namespace) self.backuploc_api = kubeclient.BackupLocationAPI(self.namespace) self.mbp_api = kubeclient.MetadataBackupPolicyAPI(self.namespace) + self.mr_api = kubeclient.MetadataRestoreAPI(self.namespace) self.secret_api = kubeclient.SecretAPI(self.namespace) + self.pvc_api = kubeclient.PersistentVolumeClaimAPI(self.namespace) + self.pv_api = kubeclient.PersistentVolumeAPI() # This is being set as a global variable so that library code # such as "kubeclient" can easily access the configuration set diff --git a/tests/src/common/kubeclient.py b/tests/src/common/kubeclient.py index bbc745f..a858857 100644 --- a/tests/src/common/kubeclient.py +++ b/tests/src/common/kubeclient.py @@ -19,6 +19,7 @@ def __init__(self, namespace="default"): def create_metadata(self, name): metadata = client.V1ObjectMeta() metadata.name = name + metadata.namespace = self.namespace return metadata @@ -41,7 +42,7 @@ def create(self, name, spec): self.res["metadata"] = {"name": name} self.res["spec"] = spec - self.cr_api.create_namespaced_custom_object( + return self.cr_api.create_namespaced_custom_object( group=self.group, version=self.version, namespace=self.namespace, plural=self.plural, body=self.res) @@ -64,14 +65,43 @@ def create(self, name, data): body.data = data body.metadata = self.create_metadata(name) - body.metadata.namespace = self.namespace - self.v1api.create_namespaced_secret(body.metadata.namespace, body) + return self.v1api.create_namespaced_secret(body.metadata.namespace, body) def delete(self, name): self.v1api.delete_namespaced_secret(name, self.namespace, body=client.V1DeleteOptions()) +class PersistentVolumeAPI(KubeResourceAPI): + def __init__(self): + super().__init__() + + def create(self, name, spec): + body = client.V1PersistentVolume() + body.spec = spec + + body.metadata = self.create_metadata(name) + + return self.v1api.create_persistent_volume(body) + + def delete(self, name): + self.v1api.delete_persistent_volume(name, body=client.V1DeleteOptions()) + +class PersistentVolumeClaimAPI(KubeResourceAPI): + def __init__(self, namespace="default"): + super().__init__(namespace) + + def create(self, name, spec): + body = client.V1PersistentVolumeClaim() + body.spec = spec + + body.metadata = self.create_metadata(name) + + return self.v1api.create_namespaced_persistent_volume_claim(self.namespace, body) + + def delete(self, name): + self.v1api.delete_namespaced_persistent_volume_claim(name, self.namespace, body=client.V1DeleteOptions()) + class PodAPI(KubeResourceAPI): def __init__(self, namespace="default"): super().__init__(namespace) @@ -113,6 +143,12 @@ def __init__(self, namespace="default"): self.kind = "MetadataBackupPolicy" self.plural = "metadatabackuppolicies" +class MetadataRestoreAPI(KubedrV1AlphaResource): + def __init__(self, namespace="default"): + super().__init__(namespace) + self.kind = "MetadataRestore" + self.plural = "metadatarestores" + def create_backuploc_creds(name, access_key, secret_key, restic_password): creds_data = { "access_key": base64.b64encode(access_key.encode("utf-8")).decode("utf-8"), @@ -120,7 +156,7 @@ def create_backuploc_creds(name, access_key, secret_key, restic_password): "restic_repo_password": base64.b64encode(restic_password.encode("utf-8")).decode("utf-8") } secret_api = SecretAPI(namespace="kubedr-system") - secret_api.create(name, creds_data) + return secret_api.create(name, creds_data) def create_etcd_creds(name, ca_crt, client_crt, client_key): creds_data = { @@ -129,7 +165,7 @@ def create_etcd_creds(name, ca_crt, client_crt, client_key): "client.key": base64.b64encode(open(client_key, "rb").read()).decode("utf-8") } secret_api = SecretAPI(namespace="kubedr-system") - secret_api.create(name, creds_data) + return secret_api.create(name, creds_data) def wait_for_pod_to_appear(label_selector): num_attempts = conftest.envconfig.wait_for_res_to_appear_num_attempts diff --git a/tests/src/common/util.py b/tests/src/common/util.py index 3d093af..e8967b3 100644 --- a/tests/src/common/util.py +++ b/tests/src/common/util.py @@ -1,8 +1,12 @@ import logging import os +import pprint import tempfile +import time import traceback +from common import kubeclient + def ignore_errors(func): try: func() @@ -15,3 +19,46 @@ def ignore_errors_pred(predicate, func): func() except: logging.error(traceback.format_exc()) + +def timestamp(): + return int(time.time()) + +def create_hostpath_pv(): + pv_api = kubeclient.PersistentVolumeAPI() + pv_name = "{}-{}".format("pv", timestamp()) + pv_dir = tempfile.mkdtemp() + + pv_spec = { + "accessModes": ["ReadWriteOnce"], + "capacity": { + "storage": "2Gi" + }, + "hostPath": { + "path": pv_dir + }, + "persistentVolumeReclaimPolicy": "Delete", + "storageClassName": "standard", + "volumeMode": "Filesystem" + } + + return pv_api.create(pv_name, pv_spec) + +def create_pvc_for_pv(pv): + pprint.pprint(pv) + pvc_api = kubeclient.PersistentVolumeClaimAPI(namespace="kubedr-system") + name = "{}-{}".format("pvc", timestamp()) + + spec = { + "accessModes": ["ReadWriteOnce"], + "resources": { + "requests": { + "storage": pv.spec.capacity["storage"] + } + }, + "volumeMode": "Filesystem", + "volumeName": pv.metadata.name + } + + return pvc_api.create(name, spec) + + diff --git a/tests/src/test_backup.py b/tests/src/test_backup.py index c1d1367..abf36ac 100644 --- a/tests/src/test_backup.py +++ b/tests/src/test_backup.py @@ -1,5 +1,7 @@ +import os import pprint +import shutil import subprocess import time @@ -15,16 +17,24 @@ def log_state(namespace, resdata): # Capture the state before cleaning up resources. This will help in # debugging. print("Output of 'describe all'") + subprocess.call("kubectl describe persistentvolume", shell=True) subprocess.call("kubectl -n {} describe all".format(namespace), shell=True) subprocess.call("kubectl -n {} describe backuplocation".format(namespace), shell=True) subprocess.call("kubectl -n {} describe metadatabackuppolicy".format(namespace), shell=True) subprocess.call("kubectl -n {} describe metadatabackuprecord".format(namespace), shell=True) + subprocess.call("kubectl -n {} describe metadatarestore".format(namespace), shell=True) + subprocess.call("kubectl -n {} describe persistentvolumeclaim".format(namespace), shell=True) print("Output of 'logs'") for pod_name in resdata["pods"]: print("Output of 'logs' for {}".format(pod_name)) subprocess.call("kubectl -n {} logs --all-containers {}".format(namespace, pod_name), shell=True) + if "pv_path" in resdata: + print("contents of PV dir {}".format(resdata["pv_path"])) + cmd = "ls -lR {}".format(resdata["pv_path"]) + subprocess.call(cmd, shell=True) + # "resources" is used to store state as resources are being created. # This allows us to delete all the resources in one place and also # enables deletion even in case of test failures. @@ -48,9 +58,17 @@ def resources(globalconfig): util.ignore_errors(lambda: log_state(globalconfig.namespace, resdata)) + util.ignore_errors_pred("restore_name" in resdata, lambda: globalconfig.mr_api.delete(resdata["restore_name"])) util.ignore_errors_pred("backup_name" in resdata, lambda: globalconfig.mbp_api.delete(resdata["backup_name"])) util.ignore_errors_pred("etcd_creds" in resdata, lambda: globalconfig.secret_api.delete(resdata["etcd_creds"])) util.ignore_errors_pred("backuploc_name" in resdata, lambda: globalconfig.backuploc_api.delete(resdata["backuploc_name"])) + util.ignore_errors_pred("pvc_name" in resdata, lambda: globalconfig.pvc_api.delete(resdata["pvc_name"])) + + # PV should have been automatically deleted when PVC is deleted but just in case, + # PVC was not created or to take care of any corner cases, try to delete pV any way. + util.ignore_errors_pred("pv_name" in resdata, lambda: globalconfig.pvc_api.delete(resdata["pv_name"])) + util.ignore_errors_pred("pv_path" in resdata, lambda: shutil.rmtree(resdata["pv_path"])) + util.ignore_errors(lambda: globalconfig.secret_api.delete(backuploc_creds)) @pytest.mark.dependency() @@ -168,6 +186,40 @@ def test_backup_with_certificates(globalconfig, resources): policy = do_backup(globalconfig, resources, backup_name, backup_spec) status = policy["status"] + resources["mbr_with_certs"] = status["mbrName"] files_total = status["filesChanged"] + status["filesNew"] assert files_total > 1 +@pytest.mark.dependency(depends=["test_backup_with_certificates"]) +def test_restore(globalconfig, resources): + pv = util.create_hostpath_pv() + resources["pv_name"] = pv.metadata.name + resources["pv_path"] = pv.spec.host_path.path + + pvc = util.create_pvc_for_pv(pv) + resources["pvc_name"] = pvc.metadata.name + + mr_name = "{}-{}".format("mr", timestamp()) + mr_spec = { + "mbrName": resources["mbr_with_certs"], + "pvcName": resources["pvc_name"] + } + + globalconfig.mr_api.create(mr_name, mr_spec) + resources["restore_name"] = mr_name + + label_selector='kubedr.type=restore,kubedr.restore-mbr={}'.format(mr_spec["mbrName"]) + restore_pod = globalconfig.pod_api.get_by_watch(label_selector) + + pod_name = restore_pod.metadata.name + resources["pods"].append(pod_name) + + phase = restore_pod.status.phase + if phase == "Running" or phase == "Pending": + pod = kubeclient.wait_for_pod_to_be_done(pod_name) + restore_pod = globalconfig.pod_api.read(pod_name) + + assert restore_pod.status.phase == "Succeeded" + assert os.path.exists("{}/data/etcd-snapshot.db".format(resources["pv_path"])) + assert os.path.exists("{}/data/certificates".format(resources["pv_path"])) + assert os.listdir(resources["pv_path"])