Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding pod disruption budget support for envoy proxies #3583

Merged
merged 18 commits into from
Jun 12, 2024
4 changes: 4 additions & 0 deletions api/v1alpha1/envoyproxy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,10 @@ type EnvoyProxyKubernetesProvider struct {
//
// +optional
UseListenerPortAsContainerPort *bool `json:"useListenerPortAsContainerPort,omitempty"`

// EnvoyPDB allows to control the pod disruption budget of an Envoy Proxy.
// +optional
EnvoyPDB *KubernetesPodDisruptionBudgetSpec `json:"envoyPDB,omitempty"`
}

// ProxyLogging defines logging parameters for managed proxies.
Expand Down
9 changes: 9 additions & 0 deletions api/v1alpha1/shared_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,15 @@ const (
StringMatchRegularExpression StringMatchType = "RegularExpression"
)

// KubernetesPodDisruptionBudgetSpec defines Kubernetes PodDisruptionBudget settings of Envoy Proxy Deployment.
type KubernetesPodDisruptionBudgetSpec struct {
// MinAvailable specifies the minimum number of pods that must be available at all times during voluntary disruptions,
// such as node drains or updates. This setting ensures that your envoy proxy maintains a certain level of availability
// and resilience during maintenance operations.
// +optional
MinAvailable *int32 `json:"minAvailable,omitempty"`
}

// KubernetesHorizontalPodAutoscalerSpec defines Kubernetes Horizontal Pod Autoscaler settings of Envoy Proxy Deployment.
// When HPA is enabled, it is recommended that the value in `KubernetesDeploymentSpec.replicas` be removed, otherwise
// Envoy Gateway will revert back to this value every time reconciliation occurs.
Expand Down
25 changes: 25 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -10057,6 +10057,18 @@ spec:
x-kubernetes-validations:
- message: maxReplicas cannot be less than minReplicas
rule: '!has(self.minReplicas) || self.maxReplicas >= self.minReplicas'
envoyPDB:
description: EnvoyPDB allows to control the pod disruption
budget of an Envoy Proxy.
properties:
minAvailable:
description: |-
MinAvailable specifies the minimum number of pods that must be available at all times during voluntary disruptions,
such as node drains or updates. This setting ensures that your envoy proxy maintains a certain level of availability
and resilience during maintenance operations.
format: int32
type: integer
type: object
envoyService:
description: |-
EnvoyService defines the desired state of the Envoy service resource.
Expand Down
2 changes: 2 additions & 0 deletions charts/gateway-helm/templates/infra-manager-rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ rules:
- patch
- apiGroups:
- autoscaling
- policy
resources:
- horizontalpodautoscalers
- poddisruptionbudgets
verbs:
- create
- get
Expand Down
10 changes: 10 additions & 0 deletions internal/infrastructure/kubernetes/infra.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/policy/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/envoyproxy/gateway/api/v1alpha1"
Expand All @@ -28,6 +29,7 @@
Deployment() (*appsv1.Deployment, error)
DaemonSet() (*appsv1.DaemonSet, error)
HorizontalPodAutoscaler() (*autoscalingv2.HorizontalPodAutoscaler, error)
PodDisruptionBudget() (*v1.PodDisruptionBudget, error)
}

// Infra manages the creation and deletion of Kubernetes infrastructure
Expand Down Expand Up @@ -79,6 +81,10 @@
return fmt.Errorf("failed to create or update hpa %s/%s: %w", i.Namespace, r.Name(), err)
}

if err := i.createOrUpdatePodDisruptionBudget(ctx, r); err != nil {
return fmt.Errorf("failed to create or update pdb %s/%s: %w", i.Namespace, r.Name(), err)

Check warning on line 85 in internal/infrastructure/kubernetes/infra.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/infra.go#L85

Added line #L85 was not covered by tests
}

return nil
}

Expand Down Expand Up @@ -108,5 +114,9 @@
return fmt.Errorf("failed to delete hpa %s/%s: %w", i.Namespace, r.Name(), err)
}

if err := i.deletePDB(ctx, r); err != nil {
return fmt.Errorf("failed to delete pdb %s/%s: %w", i.Namespace, r.Name(), err)

Check warning on line 118 in internal/infrastructure/kubernetes/infra.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/infra.go#L118

Added line #L118 was not covered by tests
}

return nil
}
69 changes: 69 additions & 0 deletions internal/infrastructure/kubernetes/infra_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/envoyproxy/gateway/internal/metrics"
Expand Down Expand Up @@ -167,6 +168,42 @@
return i.Client.ServerSideApply(ctx, daemonSet)
}

func (i *Infra) createOrUpdatePodDisruptionBudget(ctx context.Context, r ResourceRender) (err error) {
var (
pdb *v1.PodDisruptionBudget
startTime = time.Now()
labels = []metrics.LabelValue{
kindLabel.Value("PDB"),
nameLabel.Value(r.Name()),
namespaceLabel.Value(i.Namespace),
}
)

resourceApplyTotal.With(labels...).Increment()

if pdb, err = r.PodDisruptionBudget(); err != nil {
resourceApplyFailed.With(labels...).Increment()
return err

Check warning on line 186 in internal/infrastructure/kubernetes/infra_resource.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/infra_resource.go#L185-L186

Added lines #L185 - L186 were not covered by tests
}

// when pdb is not set,
// then delete the object in the kube api server if got any.
if pdb == nil {
arkodg marked this conversation as resolved.
Show resolved Hide resolved
return i.deletePDB(ctx, r)
}

defer func() {
if err == nil {
resourceApplyDurationSeconds.With(labels...).Record(time.Since(startTime).Seconds())
resourceApplySuccess.With(labels...).Increment()
} else {
resourceApplyFailed.With(labels...).Increment()

Check warning on line 200 in internal/infrastructure/kubernetes/infra_resource.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/infra_resource.go#L195-L200

Added lines #L195 - L200 were not covered by tests
}
}()

return i.Client.ServerSideApply(ctx, pdb)

Check warning on line 204 in internal/infrastructure/kubernetes/infra_resource.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/infra_resource.go#L204

Added line #L204 was not covered by tests
}

// createOrUpdateHPA creates HorizontalPodAutoscaler object in the kube api server based on
// the provided ResourceRender, if it doesn't exist and updates it if it does,
// and delete hpa if not set.
Expand Down Expand Up @@ -431,3 +468,35 @@

return i.Client.Delete(ctx, hpa)
}

// deletePDB deletes the PodDistribution budget associated to its renderer, if it exists.
func (i *Infra) deletePDB(ctx context.Context, r ResourceRender) (err error) {
var (
name, ns = r.Name(), i.Namespace
pdb = &v1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Namespace: ns,
Name: name,
},
}
startTime = time.Now()
labels = []metrics.LabelValue{
kindLabel.Value("PDB"),
nameLabel.Value(name),
namespaceLabel.Value(ns),
}
)

resourceDeleteTotal.With(labels...).Increment()

defer func() {
if err == nil {
resourceDeleteDurationSeconds.With(labels...).Record(time.Since(startTime).Seconds())
resourceDeleteSuccess.With(labels...).Increment()
} else {
resourceDeleteFailed.With(labels...).Increment()

Check warning on line 497 in internal/infrastructure/kubernetes/infra_resource.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/infra_resource.go#L497

Added line #L497 was not covered by tests
}
}()

return i.Client.Delete(ctx, pdb)
}
35 changes: 35 additions & 0 deletions internal/infrastructure/kubernetes/proxy/resource_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
Expand Down Expand Up @@ -357,6 +358,40 @@
return daemonSet, nil
}

func (r *ResourceRender) PodDisruptionBudget() (*policyv1.PodDisruptionBudget, error) {
provider := r.infra.GetProxyConfig().GetEnvoyProxyProvider()
if provider.Type != egv1a1.ProviderTypeKubernetes {
return nil, fmt.Errorf("invalid provider type %v for Kubernetes infra manager", provider.Type)

Check warning on line 364 in internal/infrastructure/kubernetes/proxy/resource_provider.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/proxy/resource_provider.go#L364

Added line #L364 was not covered by tests
}

podDisruptionBudget := provider.GetEnvoyProxyKubeProvider().EnvoyPDB
if podDisruptionBudget == nil || podDisruptionBudget.MinAvailable == nil {
return nil, nil

Check warning on line 369 in internal/infrastructure/kubernetes/proxy/resource_provider.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/proxy/resource_provider.go#L369

Added line #L369 was not covered by tests
}

labels, err := r.getLabels()
if err != nil {
return nil, err

Check warning on line 374 in internal/infrastructure/kubernetes/proxy/resource_provider.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/proxy/resource_provider.go#L374

Added line #L374 was not covered by tests
}

return &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: r.Name(),
Namespace: r.Namespace,
},
TypeMeta: metav1.TypeMeta{
APIVersion: "policy/v1",
Kind: "PodDisruptionBudget",
},
Spec: policyv1.PodDisruptionBudgetSpec{
MinAvailable: &intstr.IntOrString{IntVal: ptr.Deref(podDisruptionBudget.MinAvailable, 0)},
Selector: &metav1.LabelSelector{
MatchLabels: labels,
},
},
}, nil
}

func (r *ResourceRender) HorizontalPodAutoscaler() (*autoscalingv2.HorizontalPodAutoscaler, error) {
provider := r.infra.GetProxyConfig().GetEnvoyProxyProvider()
if provider.Type != egv1a1.ProviderTypeKubernetes {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
v12 "k8s.io/api/policy/v1"
v1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -1220,6 +1221,53 @@ func loadServiceAccount(tc string) (*corev1.ServiceAccount, error) {
return sa, nil
}

func TestPDB(t *testing.T) {
cfg, err := config.New()
require.NoError(t, err)

cases := []struct {
caseName string
infra *ir.Infra
pdb *egv1a1.KubernetesPodDisruptionBudgetSpec
deploy *egv1a1.KubernetesDeploymentSpec
}{
{
caseName: "default",
infra: newTestInfra(),
pdb: &egv1a1.KubernetesPodDisruptionBudgetSpec{
MinAvailable: ptr.To(int32(1)),
},
},
}

for _, tc := range cases {
t.Run(tc.caseName, func(t *testing.T) {
provider := tc.infra.GetProxyInfra().GetProxyConfig().GetEnvoyProxyProvider()
provider.Kubernetes = egv1a1.DefaultEnvoyProxyKubeProvider()

if tc.deploy != nil {
provider.Kubernetes.EnvoyDeployment = tc.deploy
}

if tc.pdb != nil {
provider.Kubernetes.EnvoyPDB = tc.pdb
}

provider.GetEnvoyProxyKubeProvider()

r := NewResourceRender(cfg.Namespace, tc.infra.GetProxyInfra(), cfg.EnvoyGateway)

pdb, err := r.PodDisruptionBudget()
require.NoError(t, err)

podPDBExpected, err := loadPDB(tc.caseName)
require.NoError(t, err)

assert.Equal(t, podPDBExpected, pdb)
})
}
}

func TestHorizontalPodAutoscaler(t *testing.T) {
cfg, err := config.New()
require.NoError(t, err)
Expand Down Expand Up @@ -1316,6 +1364,17 @@ func loadHPA(caseName string) (*autoscalingv2.HorizontalPodAutoscaler, error) {
return hpa, nil
}

func loadPDB(caseName string) (*v12.PodDisruptionBudget, error) {
pdbYAML, err := os.ReadFile(fmt.Sprintf("testdata/pdb/%s.yaml", caseName))
if err != nil {
return nil, err
}

pdb := &v12.PodDisruptionBudget{}
_ = yaml.Unmarshal(pdbYAML, pdb)
return pdb, nil
}

func TestOwningGatewayLabelsAbsent(t *testing.T) {
cases := []struct {
caseName string
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: envoy-default-37a8eec1
namespace: envoy-gateway-system
spec:
minAvailable: 1
selector:
matchLabels:
app.kubernetes.io/component: proxy
app.kubernetes.io/managed-by: envoy-gateway
app.kubernetes.io/name: envoy
gateway.envoyproxy.io/owning-gateway-name: default
gateway.envoyproxy.io/owning-gateway-namespace: default
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
Expand Down Expand Up @@ -271,3 +272,7 @@
func (r *ResourceRender) HorizontalPodAutoscaler() (*autoscalingv2.HorizontalPodAutoscaler, error) {
return nil, nil
}

func (r *ResourceRender) PodDisruptionBudget() (*v1.PodDisruptionBudget, error) {
return nil, nil

Check warning on line 277 in internal/infrastructure/kubernetes/ratelimit/resource_provider.go

View check run for this annotation

Codecov / codecov/patch

internal/infrastructure/kubernetes/ratelimit/resource_provider.go#L276-L277

Added lines #L276 - L277 were not covered by tests
}
Loading