From dd336e36c016d5111950ed407942f1b3ec5c79ce Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 15 Jul 2024 23:39:04 +0000 Subject: [PATCH 1/8] Use pagination when retrieving etcd snapshot list Signed-off-by: Brad Davidson (cherry picked from commit c2216a62ad92b55feb835e92d55b95e952ecd596) Signed-off-by: Brad Davidson --- pkg/etcd/snapshot.go | 39 +++++++++++++++++++++------------ pkg/etcd/snapshot_controller.go | 24 +++++++++++++------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/pkg/etcd/snapshot.go b/pkg/etcd/snapshot.go index 3fccfe37e868..f2999f093080 100644 --- a/pkg/etcd/snapshot.go +++ b/pkg/etcd/snapshot.go @@ -32,13 +32,16 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + k8sruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/tools/pager" "k8s.io/client-go/util/retry" ) const ( - errorTTL = 24 * time.Hour + errorTTL = 24 * time.Hour + snapshotListPageSize = 20 ) var ( @@ -720,18 +723,20 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { return err } - // List all snapshots matching the selector snapshots := e.config.Runtime.K3s.K3s().V1().ETCDSnapshotFile() - esfList, err := snapshots.List(metav1.ListOptions{LabelSelector: selector.String()}) - if err != nil { - return err - } + snapshotPager := pager.New(pager.SimplePageFunc(func(opts metav1.ListOptions) (k8sruntime.Object, error) { return snapshots.List(opts) })) + snapshotPager.PageSize = snapshotListPageSize + // List all snapshots matching the selector // If a snapshot from Kubernetes was found on disk/s3, it is in sync and we can remove it from the map to sync. // If a snapshot from Kubernetes was not found on disk/s3, is is gone and can be removed from Kubernetes. // The one exception to the last rule is failed snapshots - these must be retained for a period of time. - for _, esf := range esfList.Items { - sfKey := generateETCDSnapshotFileConfigMapKey(esf) + if err := snapshotPager.EachListItem(ctx, metav1.ListOptions{LabelSelector: selector.String()}, func(obj k8sruntime.Object) error { + esf, ok := obj.(*k3s.ETCDSnapshotFile) + if !ok { + return errors.New("failed to convert object to ETCDSnapshotFile") + } + sfKey := generateETCDSnapshotFileConfigMapKey(*esf) logrus.Debugf("Found ETCDSnapshotFile for %s with key %s", esf.Spec.SnapshotName, sfKey) if sf, ok := snapshotFiles[sfKey]; ok && sf.GenerateName() == esf.Name { // exists in both and names match, don't need to sync @@ -741,7 +746,7 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { if esf.Status.Error != nil && esf.Status.Error.Time != nil { expires := esf.Status.Error.Time.Add(errorTTL) if time.Now().Before(expires) { - continue + return nil } } if ok { @@ -754,6 +759,9 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { logrus.Errorf("Failed to delete ETCDSnapshotFile: %v", err) } } + return nil + }); err != nil { + return err } // Any snapshots remaining in the map from disk/s3 were not found in Kubernetes and need to be created @@ -794,15 +802,18 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { } // List and remove all snapshots stored on nodes that do not match the selector - esfList, err = snapshots.List(metav1.ListOptions{LabelSelector: selector.String()}) - if err != nil { - return err - } + if err := snapshotPager.EachListItem(ctx, metav1.ListOptions{LabelSelector: selector.String()}, func(obj k8sruntime.Object) error { + esf, ok := obj.(*k3s.ETCDSnapshotFile) + if !ok { + return errors.New("failed to convert object to ETCDSnapshotFile") + } - for _, esf := range esfList.Items { if err := snapshots.Delete(esf.Name, &metav1.DeleteOptions{}); err != nil { logrus.Errorf("Failed to delete ETCDSnapshotFile for non-etcd node %s: %v", esf.Spec.NodeName, err) } + return nil + }); err != nil { + return err } // Update our Node object to note the timestamp of the snapshot storages that have been reconciled diff --git a/pkg/etcd/snapshot_controller.go b/pkg/etcd/snapshot_controller.go index 9c62cc9c5022..37939e5c9620 100644 --- a/pkg/etcd/snapshot_controller.go +++ b/pkg/etcd/snapshot_controller.go @@ -9,6 +9,7 @@ import ( "time" apisv1 "github.com/k3s-io/k3s/pkg/apis/k3s.cattle.io/v1" + k3s "github.com/k3s-io/k3s/pkg/apis/k3s.cattle.io/v1" "github.com/k3s-io/k3s/pkg/etcd/snapshot" controllersv1 "github.com/k3s-io/k3s/pkg/generated/controllers/k3s.cattle.io/v1" "github.com/k3s-io/k3s/pkg/util" @@ -20,7 +21,9 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + k8sruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/tools/pager" "k8s.io/client-go/util/retry" "github.com/sirupsen/logrus" @@ -216,20 +219,25 @@ func (e *etcdSnapshotHandler) reconcile() error { logrus.Infof("Reconciling snapshot ConfigMap data") // Get a list of existing snapshots - snapshotList, err := e.snapshots.List(metav1.ListOptions{}) - if err != nil { - return err - } - snapshots := map[string]*apisv1.ETCDSnapshotFile{} - for i := range snapshotList.Items { - esf := &snapshotList.Items[i] + snapshotPager := pager.New(pager.SimplePageFunc(func(opts metav1.ListOptions) (k8sruntime.Object, error) { return e.snapshots.List(opts) })) + snapshotPager.PageSize = snapshotListPageSize + + if err := snapshotPager.EachListItem(e.ctx, metav1.ListOptions{}, func(obj k8sruntime.Object) error { + esf, ok := obj.(*k3s.ETCDSnapshotFile) + if !ok { + return errors.New("failed to convert object to ETCDSnapshotFile") + } + // Do not create entries for snapshots that have been deleted or do not have extra metadata if !esf.DeletionTimestamp.IsZero() || len(esf.Spec.Metadata) == 0 { - continue + return nil } sfKey := generateETCDSnapshotFileConfigMapKey(*esf) snapshots[sfKey] = esf + return nil + }); err != nil { + return err } snapshotConfigMap, err := e.configmaps.Get(metav1.NamespaceSystem, snapshotConfigMapName, metav1.GetOptions{}) From c8af5f11f2faa048fd6b5f2b8c761a60bf4531c7 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 15 Jul 2024 23:51:16 +0000 Subject: [PATCH 2/8] Update secretsencrypt pagination Make secretsencrypt page size and iteration consistent with other paginators Signed-off-by: Brad Davidson (cherry picked from commit 891e72f90fa7735c64692212fb757b83588484d6) Signed-off-by: Brad Davidson --- pkg/secretsencrypt/controller.go | 46 +++++++++++++++----------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/pkg/secretsencrypt/controller.go b/pkg/secretsencrypt/controller.go index 7fb76f6ce6b7..3a9d7018ec03 100644 --- a/pkg/secretsencrypt/controller.go +++ b/pkg/secretsencrypt/controller.go @@ -2,6 +2,7 @@ package secretsencrypt import ( "context" + "errors" "fmt" "strings" @@ -12,7 +13,6 @@ import ( "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" @@ -29,6 +29,8 @@ const ( secretsProgressEvent string = "SecretsProgress" secretsUpdateCompleteEvent string = "SecretsUpdateComplete" secretsUpdateErrorEvent string = "SecretsUpdateError" + + secretListPageSize = 20 ) type handler struct { @@ -106,7 +108,7 @@ func (h *handler) onChangeNode(nodeName string, node *corev1.Node) (*corev1.Node return node, err } - if err := h.updateSecrets(node); err != nil { + if err := h.updateSecrets(nodeRef); err != nil { h.recorder.Event(nodeRef, corev1.EventTypeWarning, secretsUpdateErrorEvent, err.Error()) return node, err } @@ -206,36 +208,30 @@ func (h *handler) validateReencryptStage(node *corev1.Node, annotation string) ( return true, nil } -func (h *handler) updateSecrets(node *corev1.Node) error { - nodeRef := &corev1.ObjectReference{ - Kind: "Node", - Name: node.Name, - UID: types.UID(node.Name), - Namespace: "", - } +func (h *handler) updateSecrets(nodeRef *corev1.ObjectReference) error { secretPager := pager.New(pager.SimplePageFunc(func(opts metav1.ListOptions) (runtime.Object, error) { - return h.secrets.List("", opts) + return h.secrets.List(metav1.NamespaceAll, opts) })) - secretsList, _, err := secretPager.List(h.ctx, metav1.ListOptions{}) - if err != nil { - return err - } + secretPager.PageSize = secretListPageSize + i := 0 - err = meta.EachListItem(secretsList, func(obj runtime.Object) error { - if secret, ok := obj.(*corev1.Secret); ok { - if _, err := h.secrets.Update(secret); err != nil && !apierrors.IsConflict(err) { - return fmt.Errorf("failed to update secret: %v", err) - } - if i != 0 && i%10 == 0 { - h.recorder.Eventf(nodeRef, corev1.EventTypeNormal, secretsProgressEvent, "reencrypted %d secrets", i) - } - i++ + if err := secretPager.EachListItem(h.ctx, metav1.ListOptions{}, func(obj runtime.Object) error { + secret, ok := obj.(*corev1.Secret) + if !ok { + return errors.New("failed to convert object to Secret") + } + if _, err := h.secrets.Update(secret); err != nil && !apierrors.IsConflict(err) { + return fmt.Errorf("failed to update secret: %v", err) } + if i != 0 && i%10 == 0 { + h.recorder.Eventf(nodeRef, corev1.EventTypeNormal, secretsProgressEvent, "reencrypted %d secrets", i) + } + i++ return nil - }) - if err != nil { + }); err != nil { return err } + h.recorder.Eventf(nodeRef, corev1.EventTypeNormal, secretsUpdateCompleteEvent, "completed reencrypt of %d secrets", i) return nil } From 99bcf53311b65f87fb4ce5ef3bf0c0b2e5f70fbf Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 22 Jul 2024 22:44:27 +0000 Subject: [PATCH 3/8] Cap length of generated name used for servicelb daemonset Signed-off-by: Brad Davidson (cherry picked from commit 21611c566561827eed45a0e81dcbee0699b88380) Signed-off-by: Brad Davidson --- pkg/cloudprovider/servicelb.go | 12 ++++++- pkg/cloudprovider/servicelb_test.go | 49 +++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/pkg/cloudprovider/servicelb.go b/pkg/cloudprovider/servicelb.go index 6d4b4fa33476..e741cfa40fa3 100644 --- a/pkg/cloudprovider/servicelb.go +++ b/pkg/cloudprovider/servicelb.go @@ -697,7 +697,17 @@ func (k *k3s) getPriorityClassName(svc *core.Service) string { // generateName generates a distinct name for the DaemonSet based on the service name and UID func generateName(svc *core.Service) string { - return fmt.Sprintf("svclb-%s-%s", svc.Name, svc.UID[:8]) + name := svc.Name + // ensure that the service name plus prefix and uuid aren't overly long, but + // don't cut the service name at a trailing hyphen. + if len(name) > 48 { + trimlen := 48 + for name[trimlen-1] == '-' { + trimlen-- + } + name = name[0:trimlen] + } + return fmt.Sprintf("svclb-%s-%s", name, svc.UID[:8]) } // ingressToString converts a list of LoadBalancerIngress entries to strings diff --git a/pkg/cloudprovider/servicelb_test.go b/pkg/cloudprovider/servicelb_test.go index 6a25ade126b8..288803ea8194 100644 --- a/pkg/cloudprovider/servicelb_test.go +++ b/pkg/cloudprovider/servicelb_test.go @@ -6,6 +6,8 @@ import ( "testing" core "k8s.io/api/core/v1" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ) const ( @@ -111,3 +113,50 @@ func Test_UnitFilterByIPFamily_Ordering(t *testing.T) { t.Errorf("filterByIPFamily() = %+v\nWant = %+v", got, want) } } + +func Test_UnitGenerateName(t *testing.T) { + uid := types.UID("35a5ccb3-4a82-40b7-8d83-cda9582e4251") + tests := []struct { + name string + svc *core.Service + want string + }{ + { + name: "Short name", + svc: &core.Service{ + ObjectMeta: meta.ObjectMeta{ + Name: "a-service", + UID: uid, + }, + }, + want: "svclb-a-service-35a5ccb3", + }, + { + name: "Long name", + svc: &core.Service{ + ObjectMeta: meta.ObjectMeta{ + Name: "a-service-with-a-very-veeeeeery-long-yet-valid-name", + UID: uid, + }, + }, + want: "svclb-a-service-with-a-very-veeeeeery-long-yet-valid-n-35a5ccb3", + }, + { + name: "Long hypenated name", + svc: &core.Service{ + ObjectMeta: meta.ObjectMeta{ + Name: "a-service-with-a-name-with-inconvenient------------hypens", + UID: uid, + }, + }, + want: "svclb-a-service-with-a-name-with-inconvenient-35a5ccb3", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := generateName(tt.svc); got != tt.want { + t.Errorf("generateName() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} From c6aad5e54811ab499dccad1c36480716f8e6592e Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 22 Jul 2024 23:20:49 +0000 Subject: [PATCH 4/8] Fix ipv6 sysctl required by non-ipv6 LoadBalancer service This is a partial revert of 095ecdb0346c038b0c16c39f6f66ad4f67ad10b9, with the workaround moved into klipper-lb. Signed-off-by: Brad Davidson (cherry picked from commit d4c3422a85ccfe2f00218e88050d072df2e50577) Signed-off-by: Brad Davidson --- pkg/cloudprovider/servicelb.go | 24 +++++++++++++----------- scripts/airgap/image-list.txt | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pkg/cloudprovider/servicelb.go b/pkg/cloudprovider/servicelb.go index e741cfa40fa3..c4c053a949b7 100644 --- a/pkg/cloudprovider/servicelb.go +++ b/pkg/cloudprovider/servicelb.go @@ -50,7 +50,7 @@ const ( ) var ( - DefaultLBImage = "rancher/klipper-lb:v0.4.7" + DefaultLBImage = "rancher/klipper-lb:v0.4.9" ) func (k *k3s) Register(ctx context.Context, @@ -435,12 +435,19 @@ func (k *k3s) newDaemonSet(svc *core.Service) (*apps.DaemonSet, error) { return nil, err } sourceRanges := strings.Join(sourceRangesSet.StringSlice(), ",") + securityContext := &core.PodSecurityContext{} for _, ipFamily := range svc.Spec.IPFamilies { - if ipFamily == core.IPv6Protocol && sourceRanges == "0.0.0.0/0" { - // The upstream default load-balancer source range only includes IPv4, even if the service is IPv6-only or dual-stack. - // If using the default range, and IPv6 is enabled, also allow IPv6. - sourceRanges += ",::/0" + switch ipFamily { + case core.IPv4Protocol: + securityContext.Sysctls = append(securityContext.Sysctls, core.Sysctl{Name: "net.ipv4.ip_forward", Value: "1"}) + case core.IPv6Protocol: + securityContext.Sysctls = append(securityContext.Sysctls, core.Sysctl{Name: "net.ipv6.conf.all.forwarding", Value: "1"}) + if sourceRanges == "0.0.0.0/0" { + // The upstream default load-balancer source range only includes IPv4, even if the service is IPv6-only or dual-stack. + // If using the default range, and IPv6 is enabled, also allow IPv6. + sourceRanges += ",::/0" + } } } @@ -476,12 +483,7 @@ func (k *k3s) newDaemonSet(svc *core.Service) (*apps.DaemonSet, error) { PriorityClassName: priorityClassName, ServiceAccountName: "svclb", AutomountServiceAccountToken: utilsptr.To(false), - SecurityContext: &core.PodSecurityContext{ - Sysctls: []core.Sysctl{ - {Name: "net.ipv4.ip_forward", Value: "1"}, - {Name: "net.ipv6.conf.all.forwarding", Value: "1"}, - }, - }, + SecurityContext: securityContext, Tolerations: []core.Toleration{ { Key: util.MasterRoleLabelKey, diff --git a/scripts/airgap/image-list.txt b/scripts/airgap/image-list.txt index 407d1b55a5e3..02240eee0cbf 100644 --- a/scripts/airgap/image-list.txt +++ b/scripts/airgap/image-list.txt @@ -1,5 +1,5 @@ docker.io/rancher/klipper-helm:v0.8.4-build20240523 -docker.io/rancher/klipper-lb:v0.4.7 +docker.io/rancher/klipper-lb:v0.4.9 docker.io/rancher/local-path-provisioner:v0.0.28 docker.io/rancher/mirrored-coredns-coredns:1.10.1 docker.io/rancher/mirrored-library-busybox:1.36.1 From 85129fd946c5a603564f31b78fbc4ea78a4a0094 Mon Sep 17 00:00:00 2001 From: Will Date: Thu, 18 Jul 2024 20:45:19 +0100 Subject: [PATCH 5/8] remove deprecated use of wait functions Signed-off-by: Will (cherry picked from commit e4f3cc7b54ae2be481184c2312644c51f094cf79) Signed-off-by: Brad Davidson --- pkg/agent/config/config.go | 6 ++++-- pkg/agent/netpol/netpol.go | 2 +- pkg/agent/tunnel/tunnel.go | 2 +- pkg/cluster/cluster.go | 2 +- pkg/cluster/managed.go | 2 +- pkg/cluster/storage.go | 6 +++--- pkg/etcd/etcd.go | 2 +- pkg/server/router.go | 2 +- pkg/spegel/bootstrap.go | 2 +- pkg/spegel/spegel.go | 2 +- pkg/util/api.go | 4 ++-- 11 files changed, 17 insertions(+), 15 deletions(-) diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index 05b0cd9b5d57..0e4ed32b3ca9 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -59,6 +59,8 @@ func Get(ctx context.Context, agent cmds.Agent, proxy proxy.Proxy) (*config.Node // does not support jittering, so we instead use wait.JitterUntilWithContext, and cancel // the context on success. ctx, cancel := context.WithCancel(ctx) + defer cancel() + wait.JitterUntilWithContext(ctx, func(ctx context.Context) { agentConfig, err = get(ctx, &agent, proxy) if err != nil { @@ -78,7 +80,7 @@ func KubeProxyDisabled(ctx context.Context, node *config.Node, proxy proxy.Proxy var disabled bool var err error - wait.PollImmediateUntilWithContext(ctx, 5*time.Second, func(ctx context.Context) (bool, error) { + _ = wait.PollUntilContextCancel(ctx, 5*time.Second, true, func(ctx context.Context) (bool, error) { disabled, err = getKubeProxyDisabled(ctx, node, proxy) if err != nil { logrus.Infof("Waiting to retrieve kube-proxy configuration; server is not ready: %v", err) @@ -96,7 +98,7 @@ func APIServers(ctx context.Context, node *config.Node, proxy proxy.Proxy) []str var addresses []string var err error - wait.PollImmediateUntilWithContext(ctx, 5*time.Second, func(ctx context.Context) (bool, error) { + _ = wait.PollUntilContextCancel(ctx, 5*time.Second, true, func(ctx context.Context) (bool, error) { addresses, err = getAPIServers(ctx, node, proxy) if err != nil { logrus.Infof("Failed to retrieve list of apiservers from server: %v", err) diff --git a/pkg/agent/netpol/netpol.go b/pkg/agent/netpol/netpol.go index f09d47d11e5b..5c892a668f36 100644 --- a/pkg/agent/netpol/netpol.go +++ b/pkg/agent/netpol/netpol.go @@ -70,7 +70,7 @@ func Run(ctx context.Context, nodeConfig *config.Node) error { // kube-router netpol requires addresses to be available in the node object. // Wait until the uninitialized taint has been removed, at which point the addresses should be set. // TODO: Replace with non-deprecated PollUntilContextTimeout when our and Kubernetes code migrate to it - if err := wait.PollImmediateInfiniteWithContext(ctx, 2*time.Second, func(ctx context.Context) (bool, error) { + if err := wait.PollUntilContextCancel(ctx, 2*time.Second, true, func(ctx context.Context) (bool, error) { // Get the node object node, err := client.CoreV1().Nodes().Get(ctx, nodeConfig.AgentConfig.NodeName, metav1.GetOptions{}) if err != nil { diff --git a/pkg/agent/tunnel/tunnel.go b/pkg/agent/tunnel/tunnel.go index 479288e0fb28..a5df415c7343 100644 --- a/pkg/agent/tunnel/tunnel.go +++ b/pkg/agent/tunnel/tunnel.go @@ -181,7 +181,7 @@ func Setup(ctx context.Context, config *daemonconfig.Node, proxy proxy.Proxy) er func (a *agentTunnel) setKubeletPort(ctx context.Context, apiServerReady <-chan struct{}) { <-apiServerReady - wait.PollImmediateWithContext(ctx, time.Second, util.DefaultAPIServerReadyTimeout, func(ctx context.Context) (bool, error) { + wait.PollUntilContextTimeout(ctx, time.Second, util.DefaultAPIServerReadyTimeout, true, func(ctx context.Context) (bool, error) { var readyTime metav1.Time nodeName := os.Getenv("NODE_NAME") node, err := a.client.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 66f98ca165d0..365fd3568868 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -123,7 +123,7 @@ func (c *Cluster) Start(ctx context.Context) (<-chan struct{}, error) { } if !c.config.EtcdDisableSnapshots { - wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { + _ = wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (bool, error) { err := c.managedDB.ReconcileSnapshotData(ctx) if err != nil { logrus.Errorf("Failed to record snapshots for cluster: %v", err) diff --git a/pkg/cluster/managed.go b/pkg/cluster/managed.go index b0e6f71861dc..f3065cf1e191 100644 --- a/pkg/cluster/managed.go +++ b/pkg/cluster/managed.go @@ -136,7 +136,7 @@ func (c *Cluster) setupEtcdProxy(ctx context.Context, etcdProxy etcd.Proxy) { return } // We use Poll here instead of Until because we want to wait the interval before running the function. - go wait.PollUntilWithContext(ctx, 30*time.Second, func(ctx context.Context) (bool, error) { + go wait.PollUntilContextCancel(ctx, 30*time.Second, true, func(ctx context.Context) (bool, error) { clientURLs, err := c.managedDB.GetMembersClientURLs(ctx) if err != nil { logrus.Warnf("Failed to get etcd ClientURLs: %v", err) diff --git a/pkg/cluster/storage.go b/pkg/cluster/storage.go index e99766152631..b555ac976349 100644 --- a/pkg/cluster/storage.go +++ b/pkg/cluster/storage.go @@ -39,7 +39,7 @@ func RotateBootstrapToken(ctx context.Context, config *config.Control, oldToken tokenKey := storageKey(normalizedToken) var bootstrapList []client.Value - if err := wait.PollImmediateUntilWithContext(ctx, 5*time.Second, func(ctx context.Context) (bool, error) { + if err := wait.PollUntilContextCancel(ctx, 5*time.Second, true, func(ctx context.Context) (bool, error) { bootstrapList, err = storageClient.List(ctx, "/bootstrap", 0) if err != nil { return false, err @@ -198,7 +198,7 @@ func (c *Cluster) storageBootstrap(ctx context.Context) error { attempts := 0 tokenKey := storageKey(normalizedToken) - return wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { + return wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (bool, error) { attempts++ value, saveBootstrap, err := getBootstrapKeyFromStorage(ctx, storageClient, normalizedToken, token) c.saveBootstrap = saveBootstrap @@ -258,7 +258,7 @@ func getBootstrapKeyFromStorage(ctx context.Context, storageClient client.Client var bootstrapList []client.Value var err error - if err := wait.PollImmediateUntilWithContext(ctx, 5*time.Second, func(ctx context.Context) (bool, error) { + if err := wait.PollUntilContextCancel(ctx, 5*time.Second, true, func(ctx context.Context) (bool, error) { bootstrapList, err = storageClient.List(ctx, "/bootstrap", 0) if err != nil { if errors.Is(err, rpctypes.ErrGPRCNotSupportedForLearner) { diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index b4b58ec7a3fa..8410da49ffb7 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -482,7 +482,7 @@ func (e *ETCD) Start(ctx context.Context, clientAccessInfo *clientaccess.Info) e case <-time.After(30 * time.Second): logrus.Infof("Waiting for container runtime to become ready before joining etcd cluster") case <-e.config.Runtime.ContainerRuntimeReady: - if err := wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { + if err := wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (bool, error) { if err := e.join(ctx, clientAccessInfo); err != nil { // Retry the join if waiting for another member to be promoted, or waiting for peers to connect after promotion if errors.Is(err, rpctypes.ErrTooManyLearners) || errors.Is(err, rpctypes.ErrUnhealthy) { diff --git a/pkg/server/router.go b/pkg/server/router.go index a12215a4748a..1edbe6e785da 100644 --- a/pkg/server/router.go +++ b/pkg/server/router.go @@ -502,7 +502,7 @@ func verifyNode(ctx context.Context, nodeClient coreclient.NodeController, node func ensureSecret(ctx context.Context, config *Config, node *nodeInfo) { runtime := config.ControlConfig.Runtime - wait.PollImmediateUntilWithContext(ctx, time.Second*5, func(ctx context.Context) (bool, error) { + _ = wait.PollUntilContextCancel(ctx, time.Second*5, true, func(ctx context.Context) (bool, error) { if runtime.Core != nil { secretClient := runtime.Core.Core().V1().Secret() // This is consistent with events attached to the node generated by the kubelet diff --git a/pkg/spegel/bootstrap.go b/pkg/spegel/bootstrap.go index 1acfcc29f429..df2c2ac07edb 100644 --- a/pkg/spegel/bootstrap.go +++ b/pkg/spegel/bootstrap.go @@ -103,7 +103,7 @@ func NewServerBootstrapper(controlConfig *config.Control) routing.Bootstrapper { func (s *serverBootstrapper) Run(_ context.Context, id string) error { s.controlConfig.Runtime.ClusterControllerStarts["spegel-p2p"] = func(ctx context.Context) { nodes := s.controlConfig.Runtime.Core.Core().V1().Node() - wait.PollImmediateUntilWithContext(ctx, 1*time.Second, func(ctx context.Context) (bool, error) { + _ = wait.PollUntilContextCancel(ctx, 1*time.Second, true, func(ctx context.Context) (bool, error) { nodeName := os.Getenv("NODE_NAME") if nodeName == "" { return false, nil diff --git a/pkg/spegel/spegel.go b/pkg/spegel/spegel.go index 785a31c915a4..aacdadca84c7 100644 --- a/pkg/spegel/spegel.go +++ b/pkg/spegel/spegel.go @@ -236,7 +236,7 @@ func (c *Config) Start(ctx context.Context, nodeConfig *config.Node) error { // Wait up to 5 seconds for the p2p network to find peers. This will return // immediately if the node is bootstrapping from itself. - wait.PollImmediateWithContext(ctx, time.Second, resolveTimeout, func(_ context.Context) (bool, error) { + _ = wait.PollUntilContextTimeout(ctx, time.Second, resolveTimeout, true, func(_ context.Context) (bool, error) { return router.Ready() }) diff --git a/pkg/util/api.go b/pkg/util/api.go index a22447ad450e..9000ee998fc8 100644 --- a/pkg/util/api.go +++ b/pkg/util/api.go @@ -80,7 +80,7 @@ func WaitForAPIServerReady(ctx context.Context, kubeconfigPath string, timeout t return err } - err = wait.PollImmediateWithContext(ctx, time.Second, timeout, func(ctx context.Context) (bool, error) { + err = wait.PollUntilContextTimeout(ctx, time.Second, timeout, true, func(ctx context.Context) (bool, error) { healthStatus := 0 result := restClient.Get().AbsPath("/readyz").Do(ctx).StatusCode(&healthStatus) if rerr := result.Error(); rerr != nil { @@ -128,7 +128,7 @@ func WaitForRBACReady(ctx context.Context, kubeconfigPath string, timeout time.D reviewFunc = subjectAccessReview(authClient, ra, user, groups) } - err = wait.PollImmediateWithContext(ctx, time.Second, timeout, func(ctx context.Context) (bool, error) { + err = wait.PollUntilContextTimeout(ctx, time.Second, timeout, true, func(ctx context.Context) (bool, error) { status, rerr := reviewFunc(ctx) if rerr != nil { lastErr = rerr From 306f312178a675886b90cf00266f49f5d1fb21c9 Mon Sep 17 00:00:00 2001 From: Will Andrews Date: Thu, 25 Jul 2024 18:37:11 +0100 Subject: [PATCH 6/8] Update pkg/cluster/managed.go Co-authored-by: Derek Nola Signed-off-by: Will Andrews (cherry picked from commit e2179aa957a02d4b357bef9aabb163f043471023) Signed-off-by: Brad Davidson --- pkg/cluster/managed.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cluster/managed.go b/pkg/cluster/managed.go index f3065cf1e191..dc206ff8c396 100644 --- a/pkg/cluster/managed.go +++ b/pkg/cluster/managed.go @@ -136,7 +136,7 @@ func (c *Cluster) setupEtcdProxy(ctx context.Context, etcdProxy etcd.Proxy) { return } // We use Poll here instead of Until because we want to wait the interval before running the function. - go wait.PollUntilContextCancel(ctx, 30*time.Second, true, func(ctx context.Context) (bool, error) { + go wait.PollUntilContextCancel(ctx, 30*time.Second, false, func(ctx context.Context) (bool, error) { clientURLs, err := c.managedDB.GetMembersClientURLs(ctx) if err != nil { logrus.Warnf("Failed to get etcd ClientURLs: %v", err) From 591f912cf9cba2fbd3330e89f4d9758545801af7 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Tue, 16 Jul 2024 00:46:43 +0000 Subject: [PATCH 7/8] Wire lasso metrics up to common gatherer Signed-off-by: Brad Davidson (cherry picked from commit e168438d4439a27a89ca462cc8a62495b7473499) Signed-off-by: Brad Davidson --- go.mod | 16 +++++++++------- go.sum | 16 ++++++++-------- pkg/metrics/metrics.go | 6 ++++++ 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/go.mod b/go.mod index de33176776bc..b626e0928aa4 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/k3s-io/k3s -go 1.21.11 +go 1.22.0 + +toolchain go1.22.2 replace ( github.com/Microsoft/hcsshim => github.com/Microsoft/hcsshim v0.11.0 @@ -134,7 +136,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.19.1 github.com/rancher/dynamiclistener v0.3.6 - github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29 + github.com/rancher/lasso v0.0.0-20240724174736-24ab3dbf26f0 github.com/rancher/permissions v0.0.0-20240523180510-4001d3d637f7 github.com/rancher/remotedialer v0.4.1 github.com/rancher/wharfie v0.6.4 @@ -159,8 +161,8 @@ require ( google.golang.org/grpc v1.63.2 gopkg.in/yaml.v2 v2.4.0 inet.af/tcpproxy v0.0.0-20200125044825-b6bb9b5b8252 - k8s.io/api v0.29.3 - k8s.io/apimachinery v0.29.3 + k8s.io/api v0.30.0 + k8s.io/apimachinery v0.30.0 k8s.io/apiserver v0.27.16 k8s.io/cli-runtime v0.22.2 k8s.io/client-go v11.0.1-0.20190409021438-1a26190bd76a+incompatible @@ -302,7 +304,7 @@ require ( github.com/hashicorp/go-version v1.6.0 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/golang-lru/arc/v2 v2.0.5 // indirect - github.com/hashicorp/golang-lru/v2 v2.0.5 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/huin/goupnp v1.3.0 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -465,12 +467,12 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect - golang.org/x/mod v0.15.0 // indirect + golang.org/x/mod v0.17.0 // indirect golang.org/x/oauth2 v0.17.0 // indirect golang.org/x/term v0.20.0 // indirect golang.org/x/text v0.15.0 // indirect golang.org/x/time v0.5.0 // indirect - golang.org/x/tools v0.18.0 // indirect + golang.org/x/tools v0.20.0 // indirect golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b // indirect golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6 // indirect gonum.org/v1/gonum v0.13.0 // indirect diff --git a/go.sum b/go.sum index 55648fbf45fc..bb98463179d9 100644 --- a/go.sum +++ b/go.sum @@ -665,8 +665,8 @@ github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+l github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/golang-lru/arc/v2 v2.0.5 h1:l2zaLDubNhW4XO3LnliVj0GXO3+/CGNJAg1dcN2Fpfw= github.com/hashicorp/golang-lru/arc/v2 v2.0.5/go.mod h1:ny6zBSQZi2JxIeYcv7kt2sH2PXJtirBN7RDhRpxPkxU= -github.com/hashicorp/golang-lru/v2 v2.0.5 h1:wW7h1TG88eUIJ2i69gaE3uNVtEPIagzhGvHgwfx2Vm4= -github.com/hashicorp/golang-lru/v2 v2.0.5/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= @@ -1222,8 +1222,8 @@ github.com/quic-go/webtransport-go v0.6.0 h1:CvNsKqc4W2HljHJnoT+rMmbRJybShZ0YPFD github.com/quic-go/webtransport-go v0.6.0/go.mod h1:9KjU4AEBqEQidGHNDkZrb8CAa1abRaosM2yGOyiikEc= github.com/rancher/dynamiclistener v0.3.6 h1:iAFWeiFNra6tYlt4k+jINrK3hOxZ8mjW2S/9nA6sxKs= github.com/rancher/dynamiclistener v0.3.6/go.mod h1:VqBaJNi+bZmre0+gi+2Jb6jbn7ovHzRueW+M7QhVKsk= -github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29 h1:+kige/h8/LnzWgPjB5NUIHz/pWiW/lFpqcTUkN5uulY= -github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29/go.mod h1:kgk9kJVMj9FIrrXU0iyM6u/9Je4bEjPImqswkTVaKsQ= +github.com/rancher/lasso v0.0.0-20240724174736-24ab3dbf26f0 h1:rM56SuQHkXyzPqldBDMdBZ8H4tOzyfbKxMlGyKnTQF8= +github.com/rancher/lasso v0.0.0-20240724174736-24ab3dbf26f0/go.mod h1:A/y3BLQkxZXYD60MNDRwAG9WGxXfvd6Z6gWR/a8wPw8= github.com/rancher/permissions v0.0.0-20240523180510-4001d3d637f7 h1:0Kg2SGoMeU1ll4xPi4DE0+qNHLFO/U5MwtK0WrIdK+o= github.com/rancher/permissions v0.0.0-20240523180510-4001d3d637f7/go.mod h1:fsbs0YOsGn1ofPD5p+BuI4qDhbMbSJtTegKt6Ucna+c= github.com/rancher/remotedialer v0.4.1 h1:jwOf2kPRjBBpSFofv1OuZHWaYHeC9Eb6/XgDvbkoTgc= @@ -1553,8 +1553,8 @@ golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.11.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/mod v0.15.0 h1:SernR4v+D55NyBH2QiEQrlBAnj1ECL6AGrA5+dPaMY8= -golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -1709,8 +1709,8 @@ golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM golang.org/x/tools v0.11.0/go.mod h1:anzJrxPjNtfgiYQYirP2CPGzGLxrH2u2QBhn6Bf3qY8= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= -golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ= -golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg= +golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY= +golang.org/x/tools v0.20.0/go.mod h1:WvitBU7JJf6A4jOdg4S1tviW9bhUxkgeCui/0JHctQg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 4ebd1d529251..a769e6a38418 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -8,6 +8,7 @@ import ( "github.com/k3s-io/k3s/pkg/agent/https" "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/prometheus/client_golang/prometheus/promhttp" + lassometrics "github.com/rancher/lasso/pkg/metrics" "k8s.io/component-base/metrics/legacyregistry" ) @@ -28,6 +29,11 @@ var DefaultMetrics = &Config{ }, } +func init() { + // ensure that lasso exposes metrics through the same registry used by Kubernetes components + lassometrics.MustRegister(DefaultRegisterer) +} + // Config holds fields for the metrics listener type Config struct { // Router will be called to add the metrics API handler to an existing router. From 848126f37932c8dc017a941e4089ba33c221448b Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Tue, 16 Jul 2024 07:51:15 +0000 Subject: [PATCH 8/8] Fix cloudprovider controller name Looking at metrics revealed the cloudprovider controller name was anempty string. k8s.io/cloud-provider/names isn't available until 1.28, so just hardcode it for 1.27. ref: https://github.com/kubernetes/kubernetes/pull/115813 Signed-off-by: Brad Davidson (cherry picked from commit bffdf463e1e1380d13d95b0fdc1e8644a57ec0a3) Signed-off-by: Brad Davidson --- pkg/cloudprovider/servicelb.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/cloudprovider/servicelb.go b/pkg/cloudprovider/servicelb.go index c4c053a949b7..a811bdbb1378 100644 --- a/pkg/cloudprovider/servicelb.go +++ b/pkg/cloudprovider/servicelb.go @@ -26,7 +26,6 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" - ccmapp "k8s.io/cloud-provider/app" servicehelper "k8s.io/cloud-provider/service/helpers" utilsnet "k8s.io/utils/net" utilsptr "k8s.io/utils/ptr" @@ -40,7 +39,7 @@ var ( daemonsetNodePoolLabel = "svccontroller." + version.Program + ".cattle.io/lbpool" nodeSelectorLabel = "svccontroller." + version.Program + ".cattle.io/nodeselector" priorityAnnotation = "svccontroller." + version.Program + ".cattle.io/priorityclassname" - controllerName = ccmapp.DefaultInitFuncConstructors["service"].InitContext.ClientName + controllerName = "service-lb-controller" ) const (