Skip to content

Commit

Permalink
Add Instance Stop El Carro. (#323)
Browse files Browse the repository at this point in the history
In this commit we add in a stop/start feature. When an instance is stopped, the sts replicas will be scaled down to 0, the monitoring pod will be deleted, and all services relating to the instance will be deleted.

Change-Id: Ie9e0d6ba17d456d32c3dc8cd59fbe9cc1fad3f42

Co-authored-by: Tabatha Lewis <[email protected]>
  • Loading branch information
tabbyl21 and tabbyl21 authored Mar 6, 2023
1 parent c072f84 commit a8cc6df
Show file tree
Hide file tree
Showing 12 changed files with 230 additions and 12 deletions.
4 changes: 4 additions & 0 deletions common/api/v1alpha1/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ type InstanceSpec struct {
// AdminUser represents the admin user specification
// +optional
AdminUser *AdminUserSpec `json:"adminUser,omitempty"`

// IsStopped is true if an instance is stopped, false otherwise
// +optional
IsStopped *bool `json:"isStopped,omitempty"`
}

// DBLoadBalancerOptions contains customization options for the Kubernetes
Expand Down
5 changes: 5 additions & 0 deletions common/api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,9 @@ spec:
an optional map that allows a customer to specify GCR images different
from those chosen/provided.
type: object
isStopped:
description: IsStopped is true if an instance is stopped, false otherwise
type: boolean
maintenanceWindow:
description: MaintenanceWindow specifies the time windows during which
database downtimes are allowed for maintenance.
Expand Down
1 change: 1 addition & 0 deletions oracle/config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ rules:
- services
verbs:
- create
- delete
- get
- list
- patch
Expand Down
1 change: 1 addition & 0 deletions oracle/controllers/instancecontroller/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ go_library(
"@io_k8s_sigs_controller_runtime//pkg/predicate",
"@io_k8s_sigs_controller_runtime//pkg/reconcile",
"@io_k8s_sigs_controller_runtime//pkg/source",
"@io_k8s_utils//pointer",
"@org_golang_google_protobuf//types/known/timestamppb",
],
)
Expand Down
77 changes: 74 additions & 3 deletions oracle/controllers/instancecontroller/instance_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (r *InstanceReconciler) Scheme() *runtime.Scheme {
// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch
// +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=persistentvolumes,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=services,verbs=list;watch;get;patch;create
// +kubebuilder:rbac:groups=core,resources=services,verbs=list;watch;get;patch;create;delete
// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
// +kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;list;watch
Expand Down Expand Up @@ -125,8 +125,14 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_
}
}()

if !inst.DeletionTimestamp.IsZero() {
instanceReadyCond := k8s.FindCondition(inst.Status.Conditions, k8s.Ready)

if IsDeleting(&inst) {
return r.reconcileInstanceDeletion(ctx, req, log)
} else if IsStopped(&inst) && !k8s.ConditionReasonEquals(instanceReadyCond, k8s.InstanceStopped) {
return r.reconcileInstanceStop(ctx, req, log)
} else if !IsStopped(&inst) && k8s.ConditionReasonEquals(instanceReadyCond, k8s.InstanceStopped) {
k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.CreateInProgress, "Restarting Instance")
}

// Add finalizer to clean up underlying objects in case of deletion.
Expand All @@ -144,7 +150,7 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_
}
log.Info("common instance", "total allocated disk space across all instance disks [Gi]", diskSpace/1024/1024/1024)

instanceReadyCond := k8s.FindCondition(inst.Status.Conditions, k8s.Ready)
instanceReadyCond = k8s.FindCondition(inst.Status.Conditions, k8s.Ready)
dbInstanceCond := k8s.FindCondition(inst.Status.Conditions, k8s.DatabaseInstanceReady)

if inst.Spec.Mode == commonv1alpha1.Pause {
Expand Down Expand Up @@ -440,6 +446,64 @@ func (r *InstanceReconciler) reconcileInstanceDeletion(ctx context.Context, req
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}

func (r *InstanceReconciler) reconcileInstanceStop(ctx context.Context, req ctrl.Request, log logr.Logger) (ctrl.Result, error) {
log.Info("Stopping Instance...", "InstanceName", req.NamespacedName.Name)

var inst v1alpha1.Instance
if err := r.Get(ctx, req.NamespacedName, &inst); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
r.recordEventAndUpdateStatus(ctx, &inst, v1.ConditionFalse, k8s.InstanceStoppingInProgress, "Instance is being stopped", log)
if _, err := r.stopDBStatefulset(ctx, req, log); err != nil {
return ctrl.Result{}, err
}
//delete the monitoring pod
var monitor appsv1.Deployment
if err := r.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: fmt.Sprintf("%s-monitor", req.Name)}, &monitor); err == nil {
if err := r.Delete(ctx, &monitor); err != nil {
log.Error(err, "failed to delete monitoring deployment", "InstanceName", req.Name, "MonitorDeployment", monitor.Name)
return ctrl.Result{}, err
}
} else if !apierrors.IsNotFound(err) { // retry on other errors.
return ctrl.Result{}, err
}
if _, err := r.deleteDBLoadBalancer(ctx, &inst, log); err != nil {
return ctrl.Result{}, err
}
if _, err := r.deleteDBDSVC(ctx, &inst, log); err != nil {
return ctrl.Result{}, err
}
if _, err := r.deleteAgentSVC(ctx, &inst, log); err != nil {
return ctrl.Result{}, err
}

if err := r.Get(ctx, req.NamespacedName, &inst); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
r.recordEventAndUpdateStatus(ctx, &inst, v1.ConditionTrue, k8s.InstanceStopped, "Instance has been stopped", log)

return ctrl.Result{Requeue: false}, nil

}

func (r *InstanceReconciler) shutdownDB(ctx context.Context, inst v1alpha1.Instance) error {
dbClient, closeConn, err := r.DatabaseClientFactory.New(ctx, r, inst.GetNamespace(), inst.Name)
if err != nil {
return err
}
defer closeConn()

_, err = dbClient.BounceDatabase(ctx, &dbdpb.BounceDatabaseRequest{
Operation: dbdpb.BounceDatabaseRequest_SHUTDOWN,
DatabaseName: inst.Spec.CDBName,
Option: "immediate",
})
if err != nil {
return fmt.Errorf("BounceDatabase: error while shutting db: %v", err)
}
return nil
}

// reconcileDatabaseInstance reconciling the underlying database instance.
// Successful state transition for seeded instance:
// nil->BootstrapPending->BootstrapInProgress->CreateFailed/CreateComplete
Expand Down Expand Up @@ -704,3 +768,10 @@ func lroOperationID(opType string, instance *v1alpha1.Instance) string {
return fmt.Sprintf("%s_%s", opType, instance.GetUID())
}
}
func IsStopped(instance *v1alpha1.Instance) bool {
return instance.InstanceSpec().IsStopped != nil && *instance.InstanceSpec().IsStopped == true
}

func IsDeleting(instance *v1alpha1.Instance) bool {
return !instance.GetDeletionTimestamp().IsZero()
}
90 changes: 89 additions & 1 deletion oracle/controllers/instancecontroller/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
storagev1 "k8s.io/api/storage/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/utils/pointer"

"github.com/go-logr/logr"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -219,6 +220,59 @@ func (r *InstanceReconciler) reconcileMonitoring(ctx context.Context, inst *v1al
return ctrl.Result{RequeueAfter: requeueDuration}, nil
}

func (r *InstanceReconciler) stopDBStatefulset(ctx context.Context, req ctrl.Request, log logr.Logger) (ctrl.Result, error) {
var inst v1alpha1.Instance
if err := r.Get(ctx, req.NamespacedName, &inst); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
sts := &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: getSTSName(inst),
Namespace: inst.Namespace,
},
}
if err := r.Get(ctx, client.ObjectKeyFromObject(sts), sts); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to get statefulset: %v", err)
}

sts.Spec.Replicas = pointer.Int32(controllers.StoppedReplicaCnt)
baseSTS := &appsv1.StatefulSet{}
sts.DeepCopyInto(baseSTS)
if _, err := ctrl.CreateOrUpdate(ctx, r, baseSTS, func() error {
sts.Spec.DeepCopyInto(&baseSTS.Spec)
return nil
}); err != nil {
log.Error(err, "failed to update the StatefulSet", "sts.Status", sts.Status)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil

}

func (r *InstanceReconciler) deleteAgentSVC(ctx context.Context, inst *v1alpha1.Instance, log logr.Logger) (ctrl.Result, error) {
if err := r.Delete(ctx, AgentSVC(*inst)); err != nil && !apierrors.IsNotFound(err) {
log.Error(err, "failed to delete agent svc", "InstanceName", inst.Name)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}

func (r *InstanceReconciler) deleteDBDSVC(ctx context.Context, inst *v1alpha1.Instance, log logr.Logger) (ctrl.Result, error) {
if err := r.Delete(ctx, DbDaemonSVC(*inst)); err != nil && !apierrors.IsNotFound(err) {
log.Error(err, "failed to delete dbdaemon svc", "InstanceName", inst.Name)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}

func (r *InstanceReconciler) deleteDBLoadBalancer(ctx context.Context, inst *v1alpha1.Instance, log logr.Logger) (ctrl.Result, error) {
if err := r.Delete(ctx, InstanceLB(*inst)); err != nil && !apierrors.IsNotFound(err) {
log.Error(err, "failed to delete load balancer", "InstanceName", inst.Name)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}

// CreateDBLoadBalancer returns the service for the database.
func (r *InstanceReconciler) createDBLoadBalancer(ctx context.Context, inst *v1alpha1.Instance, applyOpts []client.PatchOption) (*corev1.Service, error) {
sourceCidrRanges := []string{"0.0.0.0/0"}
Expand All @@ -228,7 +282,7 @@ func (r *InstanceReconciler) createDBLoadBalancer(ctx context.Context, inst *v1a
var svcAnnotations map[string]string

lbType := corev1.ServiceTypeLoadBalancer
svcNameFull := fmt.Sprintf(controllers.SvcName, inst.Name)
svcNameFull := getSVCName(*inst)
svcAnnotations = utils.LoadBalancerAnnotations(inst.Spec.DBLoadBalancerOptions)

svc := &corev1.Service{
Expand Down Expand Up @@ -1016,3 +1070,37 @@ func isObjectChanged(ctx context.Context, patch client.Patch, obj client.Object)
specOrMetaChanged = len(result) > 0 && !(len(result) == 1 && statusChanged)
return specOrMetaChanged, statusChanged, nil
}

func getSTSName(instance v1alpha1.Instance) string {
return fmt.Sprintf(controllers.StsName, instance.Name)
}

func getSVCName(instance v1alpha1.Instance) string {
return fmt.Sprintf(controllers.SvcName, instance.Name)
}
func InstanceLB(inst v1alpha1.Instance) *corev1.Service {
return &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Namespace: inst.GetNamespace(),
Name: getSVCName(inst),
},
}
}

func AgentSVC(inst v1alpha1.Instance) *corev1.Service {
return &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Namespace: inst.GetNamespace(),
Name: fmt.Sprintf(controllers.AgentSvcName, inst.Name),
},
}
}

func DbDaemonSVC(inst v1alpha1.Instance) *corev1.Service {
return &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Namespace: inst.GetNamespace(),
Name: fmt.Sprintf(controllers.DbdaemonSvcName, inst.Name),
},
}
}
1 change: 1 addition & 0 deletions oracle/controllers/inttest/instancetest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ ginkgo_test(
"@io_k8s_client_go//plugin/pkg/client/auth/gcp",
"@io_k8s_sigs_controller_runtime//pkg/client",
"@io_k8s_sigs_controller_runtime//pkg/envtest/printer",
"@io_k8s_utils//pointer",
],
)

Expand Down
47 changes: 40 additions & 7 deletions oracle/controllers/inttest/instancetest/instance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ import (
. "github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"

apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/envtest/printer"

Expand Down Expand Up @@ -135,9 +137,46 @@ var _ = Describe("Instance and Database provisioning", func() {
var svc corev1.ServiceList
Expect(k8sClient.List(ctx, &svc, client.InNamespace(namespace))).Should(Succeed())
Expect(len(svc.Items)).Should(Equal(4)) // 2 services (LB, DBDaemon) per instance
By("By checking that the datadisk and log disk get resized")
By("By checking that the Instance can be stopped")
testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.CreateComplete, 25*time.Minute)
createdInstance1 := &v1alpha1.Instance{}
testhelpers.K8sUpdateWithRetry(k8sEnv.K8sClient, k8sEnv.Ctx,
instKey1,
createdInstance1,
func(obj *client.Object) {
instanceToUpdate := (*obj).(*v1alpha1.Instance)
instanceToUpdate.Spec.IsStopped = pointer.Bool(true)
})
testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.InstanceStopped, 5*time.Minute)
By("Checking that the sts replicas were scaled down to 0")
sts1 := &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf(controllers.StsName, createdInstance1.Name),
Namespace: namespace,
},
}
testhelpers.K8sGetWithLongRetry(k8sClient, ctx, client.ObjectKeyFromObject(sts1), sts1)

Expect(*sts1.Spec.Replicas).Should(Equal(int32(controllers.StoppedReplicaCnt)))
By("Checking that the instance can be started")
createdInstance1 = &v1alpha1.Instance{}
testhelpers.K8sUpdateWithRetry(k8sEnv.K8sClient, k8sEnv.Ctx,
instKey1,
createdInstance1,
func(obj *client.Object) {
instanceToUpdate := (*obj).(*v1alpha1.Instance)
instanceToUpdate.Spec.IsStopped = pointer.Bool(false)
})

By("Checking that the sts replicas were scaled up to 1")
testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.CreateComplete, 25*time.Minute)
testhelpers.K8sGetWithLongRetry(k8sClient, ctx, client.ObjectKeyFromObject(sts1), sts1)
Expect(*sts1.Spec.Replicas).Should(Equal(int32(controllers.DefaultReplicaCnt)))

By("By checking that the datadisk and log disk get resized")
testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.CreateComplete, 25*time.Minute)

createdInstance1 = &v1alpha1.Instance{}
testhelpers.K8sUpdateWithRetry(k8sEnv.K8sClient, k8sEnv.Ctx,
instKey1,
createdInstance1,
Expand All @@ -155,12 +194,6 @@ var _ = Describe("Instance and Database provisioning", func() {
//wait for status propagation
testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionFalse, k8s.ResizingInProgress, 5*time.Minute)
testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.CreateComplete, 25*time.Minute)
sts1 := &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf(controllers.StsName, createdInstance1.Name),
Namespace: namespace,
},
}
testhelpers.K8sGetWithLongRetry(k8sClient, ctx, client.ObjectKeyFromObject(sts1), sts1)
By("By Checking if DataDisk and Log Disk PVC is changed")
dataDiskPVCName := getPVCName(createdInstance1.Name, sts1.Name, "DataDisk")
Expand Down
4 changes: 3 additions & 1 deletion oracle/controllers/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ const (
podInfoMemRequestSubPath = "request_memory"
dbContainerName = "oracledb"
podInfoVolume = "podinfo"
StoppedReplicaCnt = 0
DefaultReplicaCnt = 1
)

var (
Expand Down Expand Up @@ -162,7 +164,7 @@ func NewConfigMap(inst *v1alpha1.Instance, scheme *runtime.Scheme, cmName string

// NewSts returns the statefulset for the database pod.
func NewSts(sp StsParams, pvcs []corev1.PersistentVolumeClaim, podTemplate corev1.PodTemplateSpec) (*appsv1.StatefulSet, error) {
var replicas int32 = 1
var replicas int32 = DefaultReplicaCnt
sts := &appsv1.StatefulSet{
// It looks like the version needs to be explicitly set to avoid the
// "incorrect version specified in apply patch" error.
Expand Down
4 changes: 4 additions & 0 deletions oracle/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2047,6 +2047,9 @@ spec:
an optional map that allows a customer to specify GCR images different
from those chosen/provided.
type: object
isStopped:
description: IsStopped is true if an instance is stopped, false otherwise
type: boolean
maintenanceWindow:
description: MaintenanceWindow specifies the time windows during which
database downtimes are allowed for maintenance.
Expand Down Expand Up @@ -2974,6 +2977,7 @@ rules:
- services
verbs:
- create
- delete
- get
- list
- patch
Expand Down
Loading

0 comments on commit a8cc6df

Please sign in to comment.