Skip to content

Commit

Permalink
support local pv failover (#394)
Browse files Browse the repository at this point in the history
  • Loading branch information
MegaByte875 authored Nov 12, 2023
1 parent de814ee commit 484a1b0
Show file tree
Hide file tree
Showing 29 changed files with 1,132 additions and 227 deletions.
4 changes: 4 additions & 0 deletions apis/apps/v1alpha1/nebulacluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ func (nc *NebulaCluster) ConcurrentTransfer() bool {
return pointer.BoolDeref(nc.Spec.Storaged.ConcurrentTransfer, false)
}

func (nc *NebulaCluster) IsAutoFailoverEnabled() bool {
return pointer.BoolDeref(nc.Spec.EnableAutoFailover, false)
}

func (nc *NebulaCluster) IsBREnabled() bool {
return pointer.BoolDeref(nc.Spec.EnableBR, false)
}
Expand Down
31 changes: 18 additions & 13 deletions apis/apps/v1alpha1/nebulacluster_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ import (
"strconv"
"strings"

kruisepub "github.com/openkruise/kruise-api/apps/pub"
kruisev1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1"
kruisev1beta1 "github.com/openkruise/kruise-api/apps/v1beta1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -201,23 +200,23 @@ func rollingUpdateDone(workloadStatus *WorkloadStatus) bool {
workloadStatus.CurrentRevision == workloadStatus.UpdateRevision
}

func upgradeStatefulSet(sts *appsv1.StatefulSet) (*kruisev1alpha1.StatefulSet, error) {
func upgradeStatefulSet(sts *appsv1.StatefulSet) (*kruisev1beta1.StatefulSet, error) {
data, err := json.Marshal(sts)
if err != nil {
return nil, err
}
newSts := &kruisev1alpha1.StatefulSet{}
newSts := &kruisev1beta1.StatefulSet{}
if err := json.Unmarshal(data, newSts); err != nil {
return nil, err
}

newSts.TypeMeta.APIVersion = kruisev1alpha1.SchemeGroupVersion.String()
newSts.Spec.Template.Spec.ReadinessGates = []corev1.PodReadinessGate{
{
ConditionType: kruisepub.InPlaceUpdateReady,
},
}
newSts.Spec.UpdateStrategy.RollingUpdate.PodUpdatePolicy = kruisev1alpha1.InPlaceIfPossiblePodUpdateStrategyType
newSts.TypeMeta.APIVersion = kruisev1beta1.SchemeGroupVersion.String()
//newSts.Spec.Template.Spec.ReadinessGates = []corev1.PodReadinessGate{
// {
// ConditionType: kruisepub.InPlaceUpdateReady,
// },
//}
newSts.Spec.UpdateStrategy.RollingUpdate.PodUpdatePolicy = kruisev1beta1.RecreatePodUpdateStrategyType

return newSts, nil
}
Expand Down Expand Up @@ -417,7 +416,7 @@ echo "export NODE_ZONE=${NODE_ZONE}" > /node/zone
image = pointer.StringDeref(nc.Spec.AlpineImage, "")
}

return corev1.Container{
container := corev1.Container{
Name: "node-labels",
Image: image,
Command: []string{"/bin/sh", "-c"},
Expand Down Expand Up @@ -451,6 +450,12 @@ echo "export NODE_ZONE=${NODE_ZONE}" > /node/zone
},
},
}

imagePullPolicy := nc.Spec.ImagePullPolicy
if imagePullPolicy != nil {
container.ImagePullPolicy = *imagePullPolicy
}
return container
}

func generateInitContainers(c NebulaClusterComponent) []corev1.Container {
Expand Down Expand Up @@ -705,7 +710,7 @@ func generateWorkload(
if err != nil {
return nil, err
}
case kruisev1alpha1.SchemeGroupVersion.WithKind("StatefulSet").String():
case kruisev1beta1.SchemeGroupVersion.WithKind("StatefulSet").String():
var set *appsv1.StatefulSet
set, err = generateStatefulSet(c, cm)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions apis/apps/v1alpha1/nebulacluster_component.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ type NebulaClusterComponent interface {
GetPhase() ComponentPhase
IsSuspending() bool
IsSuspended() bool
IsAutoFailovering() bool
SetWorkloadStatus(status *WorkloadStatus)
UpdateComponentStatus(status *ComponentStatus)
}
Expand Down
5 changes: 5 additions & 0 deletions apis/apps/v1alpha1/nebulacluster_graphd.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,8 @@ func (c *graphdComponent) IsSuspended() bool {
}
return true
}

func (c *graphdComponent) IsAutoFailovering() bool {
//TODO implement me
return false
}
5 changes: 5 additions & 0 deletions apis/apps/v1alpha1/nebulacluster_metad.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,3 +424,8 @@ func (c *metadComponent) IsSuspended() bool {
}
return true
}

func (c *metadComponent) IsAutoFailovering() bool {
//TODO implement me
return false
}
12 changes: 12 additions & 0 deletions apis/apps/v1alpha1/nebulacluster_storaged.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,18 @@ func (c *storagedComponent) IsSuspended() bool {
return true
}

func (c *storagedComponent) IsAutoFailovering() bool {
if len(c.nc.Status.Storaged.FailureHosts) == 0 {
return false
}
for _, failureHost := range c.nc.Status.Storaged.FailureHosts {
if !failureHost.ConfirmationTime.IsZero() {
return true
}
}
return false
}

func storageDataVolumeClaims(storageClaims []StorageClaim, componentType string) ([]corev1.PersistentVolumeClaim, error) {
var pvcs []corev1.PersistentVolumeClaim
for i := range storageClaims {
Expand Down
35 changes: 30 additions & 5 deletions apis/apps/v1alpha1/nebulacluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
)

// NebulaClusterConditionType represents a nebula cluster condition value.
Expand Down Expand Up @@ -95,6 +96,13 @@ type NebulaClusterSpec struct {
// +optional
EnableBR *bool `json:"enableBR,omitempty"`

// Flag to enable/disable auto fail over in use local PV scenario, default false.
// +optional
EnableAutoFailover *bool `json:"enableAutoFailover,omitempty"`

// +optional
FailoverPeriod metav1.Duration `json:"failoverPeriod,omitempty"`

// +optional
LogRotate *LogRotate `json:"logRotate,omitempty"`

Expand Down Expand Up @@ -134,11 +142,13 @@ type ComponentStatus struct {

// StoragedStatus describes the status and version of nebula storaged.
type StoragedStatus struct {
ComponentStatus `json:",inline"`
HostsAdded bool `json:"hostsAdded,omitempty"`
RemovedSpaces []int32 `json:"removedSpaces,omitempty"`
BalancedSpaces []int32 `json:"balancedSpaces,omitempty"`
LastBalanceJob *BalanceJob `json:"lastBalanceJob,omitempty"`
ComponentStatus `json:",inline"`
HostsAdded bool `json:"hostsAdded,omitempty"`
RemovedSpaces []int32 `json:"removedSpaces,omitempty"`
BalancedSpaces []int32 `json:"balancedSpaces,omitempty"`
LastBalanceJob *BalanceJob `json:"lastBalanceJob,omitempty"`
BalancedAfterFailover *bool `json:"balancedAfterFailover,omitempty"`
FailureHosts map[string]StoragedFailureHost `json:"failureHosts,omitempty"`
}

// BalanceJob describes the admin job for balance data.
Expand All @@ -147,6 +157,21 @@ type BalanceJob struct {
JobID int32 `json:"jobID,omitempty"`
}

type EmptyStruct struct{}

// StoragedFailureHost is the storaged failure host information.
type StoragedFailureHost struct {
Host string `json:"host,omitempty"`
PVCSet map[types.UID]EmptyStruct `json:"pvcSet,omitempty"`
HostDeleted bool `json:"hostDeleted,omitempty"`
PodRestarted bool `json:"podRestarted,omitempty"`
PodRebuilt bool `json:"podRebuilt,omitempty"`
NodeDown bool `json:"nodeDown,omitempty"`
CreationTime metav1.Time `json:"creationTime,omitempty"`
ConfirmationTime metav1.Time `json:"confirmationTime,omitempty"`
DeletionTime metav1.Time `json:"deletionTime,omitempty"`
}

// WorkloadStatus describes the status of a specified workload.
type WorkloadStatus struct {
// ObservedGeneration is the most recent generation observed for this Workload. It corresponds to the
Expand Down
59 changes: 59 additions & 0 deletions apis/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions charts/nebula-operator/crds/nebulaclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,8 @@ spec:
version:
type: string
type: object
enableAutoFailover:
type: boolean
enableBR:
type: boolean
enablePVReclaim:
Expand Down Expand Up @@ -3184,6 +3186,8 @@ spec:
required:
- image
type: object
failoverPeriod:
type: string
graphd:
properties:
affinity:
Expand Down Expand Up @@ -11718,11 +11722,41 @@ spec:
type: integer
storaged:
properties:
balancedAfterFailover:
type: boolean
balancedSpaces:
items:
format: int32
type: integer
type: array
failureHosts:
additionalProperties:
properties:
confirmationTime:
format: date-time
type: string
creationTime:
format: date-time
type: string
deletionTime:
format: date-time
type: string
host:
type: string
hostDeleted:
type: boolean
nodeDown:
type: boolean
podRebuilt:
type: boolean
podRestarted:
type: boolean
pvcSet:
additionalProperties:
type: object
type: object
type: object
type: object
hostsAdded:
type: boolean
lastBalanceJob:
Expand Down
2 changes: 1 addition & 1 deletion cmd/autoscaler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func NewOptions() *Options {
}

func (o *Options) AddFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.LeaderElection.LeaderElect, "leader-elect", true, "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.")
flags.BoolVar(&o.LeaderElection.LeaderElect, "leader-elect", false, "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.")
flags.StringVar(&o.LeaderElection.ResourceNamespace, "leader-elect-resource-namespace", NamespaceNebulaSystem, "The namespace of resource object that is used for locking during leader election.")
flags.DurationVar(&o.LeaderElection.LeaseDuration.Duration, "leader-elect-lease-duration", defaultElectionLeaseDuration.Duration, ""+
"The duration that non-leader candidates will wait after observing a leadership "+
Expand Down
5 changes: 3 additions & 2 deletions cmd/controller-manager/app/controller-manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
"flag"
"net/http"

kruiseapi "github.com/openkruise/kruise-api/apps/v1alpha1"
kruisev1beta1 "github.com/openkruise/kruise-api/apps/v1beta1"
"github.com/spf13/cobra"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand Down Expand Up @@ -92,7 +92,8 @@ func Run(ctx context.Context, opts *options.Options) error {
profileflag.ListenAndServe(opts.ProfileOpts)

if opts.EnableKruiseScheme {
utilruntime.Must(kruiseapi.AddToScheme(scheme))
utilruntime.Must(kruisev1beta1.AddToScheme(clientgoscheme.Scheme))
utilruntime.Must(kruisev1beta1.AddToScheme(scheme))
klog.Info("register openkruise scheme")
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/controller-manager/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func NewOptions() *Options {
}

func (o *Options) AddFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.LeaderElection.LeaderElect, "leader-elect", true, "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.")
flags.BoolVar(&o.LeaderElection.LeaderElect, "leader-elect", false, "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.")
flags.StringVar(&o.LeaderElection.ResourceNamespace, "leader-elect-resource-namespace", NamespaceNebulaSystem, "The namespace of resource object that is used for locking during leader election.")
flags.DurationVar(&o.LeaderElection.LeaseDuration.Duration, "leader-elect-lease-duration", defaultElectionLeaseDuration.Duration, ""+
"The duration that non-leader candidates will wait after observing a leadership "+
Expand Down
Loading

0 comments on commit 484a1b0

Please sign in to comment.