Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC - Refactor Status Updater to be resource agnostic #1787

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 239 additions & 0 deletions internal/framework/status2/updater.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
package status2

import (
"context"
"errors"
"slices"
"sync"
"time"

"github.com/go-logr/logr"
apierrors "k8s.io/apimachinery/pkg/api/errors"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/nginxinc/nginx-gateway-fabric/internal/framework/conditions"
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/controller"
)

// K8sUpdater updates a resource from the k8s API.
// It allows us to mock the client.Reader.Status.Update method.
type K8sUpdater interface {
// Update is from client.StatusClient.SubResourceWriter.
Update(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error
}

type UpdateRequest struct {

Check failure on line 28 in internal/framework/status2/updater.go

View workflow job for this annotation

GitHub Actions / Lint

fieldalignment: struct with 56 pointer bytes could be 48 (govet)
NsName types.NamespacedName
ResourceType client.Object
Setter Setter
}

type Setter func(client.Object) bool

type Updater struct {
client client.Client
logger logr.Logger
}

func NewUpdater(client client.Client, logger logr.Logger) *Updater {
return &Updater{
client: client,
logger: logger,
}
}

func (u *Updater) Update(ctx context.Context, reqs ...UpdateRequest) {
for _, r := range reqs {
u.writeStatuses(ctx, r.NsName, r.ResourceType, r.Setter)
}
}

func (u *Updater) writeStatuses(
ctx context.Context,
nsname types.NamespacedName,
obj client.Object,
statusSetter Setter,
) {
err := wait.ExponentialBackoffWithContext(
ctx,
wait.Backoff{
Duration: time.Millisecond * 200,
Factor: 2,
Jitter: 0.5,
Steps: 4,
Cap: time.Millisecond * 3000,
},
// Function returns true if the condition is satisfied, or an error if the loop should be aborted.
NewRetryUpdateFunc(u.client, u.client.Status(), nsname, obj, u.logger, statusSetter),
)
if err != nil && !errors.Is(err, context.Canceled) {
u.logger.Error(
err,
"Failed to update status",
"namespace", nsname.Namespace,
"name", nsname.Name,
"kind", obj.GetObjectKind().GroupVersionKind().Kind)
}
}

// NewRetryUpdateFunc returns a function which will be used in wait.ExponentialBackoffWithContext.
// The function will attempt to Update a kubernetes resource and will be retried in
// wait.ExponentialBackoffWithContext if an error occurs. Exported for testing purposes.
//
// wait.ExponentialBackoffWithContext will retry if this function returns nil as its error,
// which is what we want if we encounter an error from the functions we call. However,
// the linter will complain if we return nil if an error was found.
//
//nolint:nilerr
func NewRetryUpdateFunc(
getter controller.Getter,
updater K8sUpdater,
nsname types.NamespacedName,
obj client.Object,
logger logr.Logger,
statusSetter func(client.Object) bool,
) func(ctx context.Context) (bool, error) {
return func(ctx context.Context) (bool, error) {
// The function handles errors by reporting them in the logs.
// We need to get the latest version of the resource.
// Otherwise, the Update status API call can fail.
// Note: the default client uses a cache for reads, so we're not making an unnecessary API call here.
// the default is configurable in the Manager options.
if err := getter.Get(ctx, nsname, obj); err != nil {
// apierrors.IsNotFound(err) can happen when the resource is deleted,
// so no need to retry or return an error.
if apierrors.IsNotFound(err) {
return true, nil
}

logger.V(1).Info(
"Encountered error when getting resource to update status",
"error", err,
"namespace", nsname.Namespace,
"name", nsname.Name,
"kind", obj.GetObjectKind().GroupVersionKind().Kind,
)

return false, nil
}

if !statusSetter(obj) {
logger.V(1).Info(
"Skipping status update because there's no change",
"namespace", nsname.Namespace,
"name", nsname.Name,
"kind", obj.GetObjectKind().GroupVersionKind().Kind,
)

return true, nil
}

if err := updater.Update(ctx, obj); err != nil {
logger.V(1).Info(
"Encountered error updating status",
"error", err,
"namespace", nsname.Namespace,
"name", nsname.Name,
"kind", obj.GetObjectKind().GroupVersionKind().Kind,
)

return false, nil
}

return true, nil
}
}

type GroupUpdateRequest struct {
Name string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are we expecting for name? Does each status update require a unique name? update-{idx}? Or is this field used to differentiate between types of status updates (gateway API resources v/s ngf resources).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

each group requires a unique name.

Or is this field used to differentiate between types of status updates (gateway API resources v/s ngf resources).

yep

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use a typed string or an enum? I think a string is too ambiguous

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is up to the client to decide how many groups to use. For this perspective, string looks reasonable to me

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I suggest adding the groups as consts on the client side so that they are documented somewhere. I also recommend describing the purpose of the group name as a comment in the final code. It's not clear what the group name is for.

Request []UpdateRequest
}

type CachingGroupUpdater struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure this is the best name. Yes, the updater is caching, but it is not clear from the name why you would want to use this updater. We are caching so that we can replay the statuses when a leader changes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about DelayedGroupUpdater
All updates are delayed until the Updater is disabled.
Note that once Enabled, it cannot be disabled.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All updates are delayed until the Updater is disabled.

Is that true? Updates are made immediately if enabled...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it a leader election updater? I also don't know if we need group in the name. The Updater can update statuses for multiple resources too. If I was writing a new controller, how would I decide which Updater to use?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it a leader election updater?

yep. but leader election is a signal to enable it. so that's why there is Enable method. and from that perspective, since the type isn't really coupled to leader election otherwise, I didn't put Leader in its name.

The Updater can update statuses for multiple resources too. If I was writing a new controller, how would I decide which Updater to use?

If the developer wants to update statuses without leader election - Updater (like in provisioner mode)

If the developer uses leader election - GroupUpdater

Note that we introduce Groups because the static mode handler updates statuses separately for:

  • Gateways
  • All Graph resources expect Gateways
  • NginxGateway resource
    ( Could be more groups later)

With having groups for each of those, we can replace the statuses of each group after each update by the handler. This is only needed until the Updater is enabled. We don't really need to store updates after the updater is Enabled - we can update right way. I can update the code to reflect that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sense, but I still think the name should reflect the purpose of the interface. CachingGroupUpdater doesn't mean anything in the context of our product. It doesn't indicate that it can be enabled or that it should be used for leader election. I understand not wanting to couple the updater to leader election, but when the purpose of the Updater is to only write status when it's leader, I'm not sure it can be avoided. Even if we leave the name the same, we will have to add comments to the code to indicate that it is safe for leader election.

I also feel that the term group is ambiguous, but I can't think of a better name.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you feel the same way about DelayedGroupUpdater?
What about DeferredGroupUpdater?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think DeferredGroupUpdater is a better description of what the updater does, but I still don't think it conveys that it is safe for leader election. I think you'll still end up with a comment.

Naming is hard, and I'm definitely not going to block the PR over this, but I don't see the harm in calling it what it is. I'd feel differently if there were another purpose for this Updater, but it is purpose-built for leader election. Why call it anything else? Coupling isn't always bad when two things are actually related.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about LeaderAwareGroupUpdater?

updater *Updater
lock *sync.Mutex
groups map[string]GroupUpdateRequest
enabled bool
}

func NewCachingGroupUpdater(updater *Updater) *CachingGroupUpdater {
return &CachingGroupUpdater{
updater: updater,
lock: &sync.Mutex{},
groups: make(map[string]GroupUpdateRequest),
}
}

func (u *CachingGroupUpdater) Update(ctx context.Context, update GroupUpdateRequest) {
u.lock.Lock()
defer u.lock.Unlock()

if len(update.Request) == 0 {
delete(u.groups, update.Name)
}

u.groups[update.Name] = update

if !u.enabled {
return
}

u.updater.Update(ctx, update.Request...)
}

func (u *CachingGroupUpdater) Enable(ctx context.Context) {
u.lock.Lock()
defer u.lock.Unlock()

u.enabled = true

for _, update := range u.groups {
u.updater.Update(ctx, update.Request...)
}
}

func ConditionsEqual(prev, cur []v1.Condition) bool {
return slices.EqualFunc(prev, cur, func(c1, c2 v1.Condition) bool {
if c1.ObservedGeneration != c2.ObservedGeneration {
return false
}

if c1.Type != c2.Type {
return false
}

if c1.Status != c2.Status {
return false
}

if c1.Message != c2.Message {
return false
}

return c1.Reason == c2.Reason
})
}

func ConvertConditions(
conds []conditions.Condition,
observedGeneration int64,
transitionTime v1.Time,
) []v1.Condition {
apiConds := make([]v1.Condition, len(conds))

for i := range conds {
apiConds[i] = v1.Condition{
Type: conds[i].Type,
Status: conds[i].Status,
ObservedGeneration: observedGeneration,
LastTransitionTime: transitionTime,
Reason: conds[i].Reason,
Message: conds[i].Message,
}
}

return apiConds
}
35 changes: 24 additions & 11 deletions internal/mode/provisioner/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ import (

"github.com/go-logr/logr"
v1 "k8s.io/api/apps/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"

"github.com/nginxinc/nginx-gateway-fabric/internal/framework/conditions"
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/events"
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/gatewayclass"
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/status"
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/status2"
)

// eventHandler ensures each Gateway for the specific GatewayClass has a corresponding Deployment
Expand All @@ -26,7 +28,7 @@ type eventHandler struct {
// provisions maps NamespacedName of Gateway to its corresponding Deployment
provisions map[types.NamespacedName]*v1.Deployment

statusUpdater status.Updater
statusUpdater *status2.Updater
k8sClient client.Client

staticModeDeploymentYAML []byte
Expand All @@ -36,7 +38,7 @@ type eventHandler struct {

func newEventHandler(
gcName string,
statusUpdater status.Updater,
statusUpdater *status2.Updater,
k8sClient client.Client,
staticModeDeploymentYAML []byte,
) *eventHandler {
Expand All @@ -52,9 +54,7 @@ func newEventHandler(
}

func (h *eventHandler) setGatewayClassStatuses(ctx context.Context) {
statuses := status.GatewayAPIStatuses{
GatewayClassStatuses: make(status.GatewayClassStatuses),
}
var reqs []status2.UpdateRequest

var gcExists bool

Expand All @@ -74,17 +74,30 @@ func (h *eventHandler) setGatewayClassStatuses(ctx context.Context) {
supportedVersionConds, _ := gatewayclass.ValidateCRDVersions(h.store.crdMetadata)
conds = append(conds, supportedVersionConds...)

statuses.GatewayClassStatuses[nsname] = status.GatewayClassStatus{
Conditions: conditions.DeduplicateConditions(conds),
ObservedGeneration: gc.Generation,
}
reqs = append(reqs, status2.UpdateRequest{
NsName: nsname,
ResourceType: &gatewayv1.GatewayClass{},
Setter: func(object client.Object) bool {
gcs := gatewayv1.GatewayClassStatus{
Conditions: status2.ConvertConditions(conditions.DeduplicateConditions(conds), gc.Generation, metav1.Now()),
}

if status2.ConditionsEqual(gc.Status.Conditions, gcs.Conditions) {
return false
}

gc.Status = gcs

return true
},
})
}

if !gcExists {
panic(fmt.Errorf("GatewayClass %s must exist", h.gcName))
}

h.statusUpdater.Update(ctx, statuses)
h.statusUpdater.Update(ctx, reqs...)
}

func (h *eventHandler) ensureDeploymentsMatchGateways(ctx context.Context, logger logr.Logger) {
Expand Down
13 changes: 4 additions & 9 deletions internal/mode/provisioner/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/controller/predicate"
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/events"
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/gatewayclass"
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/status"
"github.com/nginxinc/nginx-gateway-fabric/internal/framework/status2"
)

// Config is configuration for the provisioner mode.
Expand Down Expand Up @@ -121,14 +121,9 @@ func StartManager(cfg Config) error {
},
)

statusUpdater := status.NewUpdater(
status.UpdaterConfig{
Client: mgr.GetClient(),
Clock: status.NewRealClock(),
Logger: cfg.Logger.WithName("statusUpdater"),
GatewayClassName: cfg.GatewayClassName,
UpdateGatewayClassStatus: true,
},
statusUpdater := status2.NewUpdater(
mgr.GetClient(),
cfg.Logger.WithName("statusUpdater"),
)

handler := newEventHandler(
Expand Down
Loading
Loading