From 7917434d38d14716b167de810baf74d4e03a3047 Mon Sep 17 00:00:00 2001 From: AhmedGrati Date: Sat, 15 Apr 2023 16:11:59 +0100 Subject: [PATCH] feat: add master resync period configurability This PR adds a config option for setting the NFD API controller resync period. The resync period is only activated when the NodeFeature API has been enabled (with -enable-nodefeature-api). Signed-off-by: AhmedGrati --- cmd/nfd-master/main.go | 8 +++- .../master-config/nfd-master.conf.example | 1 + .../templates/master.yaml | 3 ++ .../helm/node-feature-discovery/values.yaml | 2 + docs/deployment/helm.md | 1 + .../reference/master-commandline-reference.md | 17 +++++++++ .../master-configuration-reference.md | 17 +++++++++ pkg/nfd-master/nfd-api-controller.go | 18 +++++---- pkg/nfd-master/nfd-master-internal_test.go | 8 ++++ pkg/nfd-master/nfd-master.go | 12 +++++- pkg/nfd-worker/nfd-worker.go | 31 ++-------------- pkg/utils/flags.go | 37 +++++++++++++++++++ 12 files changed, 118 insertions(+), 37 deletions(-) diff --git a/cmd/nfd-master/main.go b/cmd/nfd-master/main.go index 340ffcf09e..dba7da4f1b 100644 --- a/cmd/nfd-master/main.go +++ b/cmd/nfd-master/main.go @@ -20,6 +20,7 @@ import ( "flag" "fmt" "os" + "time" "k8s.io/klog/v2" @@ -67,6 +68,8 @@ func main() { args.Overrides.EnableTaints = overrides.EnableTaints case "no-publish": args.Overrides.NoPublish = overrides.NoPublish + case "-resync-period": + args.Overrides.ResyncPeriod = overrides.ResyncPeriod } }) @@ -131,6 +134,7 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs) DenyLabelNs: &utils.StringSetVal{}, ExtraLabelNs: &utils.StringSetVal{}, ResourceLabels: &utils.StringSetVal{}, + ResyncPeriod: &utils.DurationVal{Duration: time.Duration(1) * time.Hour}, } flagset.Var(overrides.ExtraLabelNs, "extra-label-ns", "Comma separated list of allowed extra label namespaces") @@ -145,6 +149,8 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs) "Comma separated list of denied label namespaces") flagset.Var(overrides.ResourceLabels, "resource-labels", "Comma separated list of labels to be exposed as extended resources. DEPRECATED: use NodeFeatureRule objects instead") - + flagset.Var(overrides.ResyncPeriod, "resync-period", + "Specify the NFD API controller resync period."+ + "It has an effect when the NodeFeature API has been enabled (with -enable-nodefeature-api).") return args, overrides } diff --git a/deployment/components/master-config/nfd-master.conf.example b/deployment/components/master-config/nfd-master.conf.example index 4f5ffffa30..3760e825d0 100644 --- a/deployment/components/master-config/nfd-master.conf.example +++ b/deployment/components/master-config/nfd-master.conf.example @@ -4,3 +4,4 @@ # resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"] # enableTaints: false # labelWhiteList: "foo" +# resyncPeriod: "2h" diff --git a/deployment/helm/node-feature-discovery/templates/master.yaml b/deployment/helm/node-feature-discovery/templates/master.yaml index 943b5ba1bf..96b0ce1689 100644 --- a/deployment/helm/node-feature-discovery/templates/master.yaml +++ b/deployment/helm/node-feature-discovery/templates/master.yaml @@ -104,6 +104,9 @@ spec: {{- if .Values.master.featureRulesController | kindIs "invalid" | not }} - "-featurerules-controller={{ .Values.master.featureRulesController }}" {{- end }} + {{- if .Values.master.resyncPeriod }} + - "-resync-period={{ .Values.master.resyncPeriod }}" + {{- end }} {{- if .Values.tls.enable }} - "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt" - "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key" diff --git a/deployment/helm/node-feature-discovery/values.yaml b/deployment/helm/node-feature-discovery/values.yaml index fc98f4e928..2822a6de58 100644 --- a/deployment/helm/node-feature-discovery/values.yaml +++ b/deployment/helm/node-feature-discovery/values.yaml @@ -20,11 +20,13 @@ master: # resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"] # enableTaints: false # labelWhiteList: "foo" + # resyncPeriod: "2h" ### # The TCP port that nfd-master listens for incoming requests. Default: 8080 port: 8080 instance: featureApi: + resyncPeriod: denyLabelNs: [] extraLabelNs: [] resourceLabels: [] diff --git a/docs/deployment/helm.md b/docs/deployment/helm.md index f6961b6868..a2496f8e1c 100644 --- a/docs/deployment/helm.md +++ b/docs/deployment/helm.md @@ -110,6 +110,7 @@ We have introduced the following Chart parameters. | `master.*` | dict | | NFD master deployment configuration | | `master.port` | integer | | Specifies the TCP port that nfd-master listens for incoming requests. | | `master.instance` | string | | Instance name. Used to separate annotation namespaces for multiple parallel deployments | +| `master.resyncPeriod` | string | | NFD API controller resync period. | | `master.extraLabelNs` | array | [] | List of allowed extra label namespaces | | `master.resourceLabels` | array | [] | List of labels to be registered as extended resources | | `master.enableTaints` | bool | false | Specifies whether to enable or disable node tainting | diff --git a/docs/reference/master-commandline-reference.md b/docs/reference/master-commandline-reference.md index c1260e989c..90ca236a7f 100644 --- a/docs/reference/master-commandline-reference.md +++ b/docs/reference/master-commandline-reference.md @@ -376,3 +376,20 @@ Default: 0 Comma-separated list of `pattern=N` settings for file-filtered logging. Default: *empty* + +### -resync-period + +The `-resync-period` flag specifies the NFD API controller resync period. +The resync means nfd-master replaying all NodeFeature and NodeFeatureRule objects, +thus effectively re-syncing all nodes in the cluster (i.e. ensuring labels, annotations, +extended resources and taints are in place). +Only has effect when the [NodeFeature](../usage/custom-resources.md#nodefeature) +CRD API has been enabled with [`-enable-nodefeature-api`](master-commandline-reference.md#-enable-nodefeature-api). + +Default: 1 hour. + +Example: + +```bash +nfd-master -resync-period=2h +``` diff --git a/docs/reference/master-configuration-reference.md b/docs/reference/master-configuration-reference.md index b11bb4cb8a..5168acefd6 100644 --- a/docs/reference/master-configuration-reference.md +++ b/docs/reference/master-configuration-reference.md @@ -113,3 +113,20 @@ Example: ```yaml labelWhiteList: "foo" ``` + +### resyncPeriod + +The `resyncPeriod` option specifies the NFD API controller resync period. +The resync means nfd-master replaying all NodeFeature and NodeFeatureRule objects, +thus effectively re-syncing all nodes in the cluster (i.e. ensuring labels, annotations, +extended resources and taints are in place). +Only has effect when the [NodeFeature](../usage/custom-resources.md#nodefeature) +CRD API has been enabled with [`-enable-nodefeature-api`](master-commandline-reference.md#-enable-nodefeature-api). + +Default: 1 hour. + +Example: + +```yaml +resyncPeriod=2h +``` diff --git a/pkg/nfd-master/nfd-api-controller.go b/pkg/nfd-master/nfd-api-controller.go index 807f72b483..714037afb9 100644 --- a/pkg/nfd-master/nfd-api-controller.go +++ b/pkg/nfd-master/nfd-api-controller.go @@ -41,7 +41,12 @@ type nfdController struct { updateOneNodeChan chan string } -func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdController, error) { +type nfdApiControllerOptions struct { + disableNodeFeature bool + resyncPeriod time.Duration +} + +func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiControllerOptions) (*nfdController, error) { c := &nfdController{ stopChan: make(chan struct{}, 1), updateAllNodesChan: make(chan struct{}, 1), @@ -49,11 +54,10 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC } nfdClient := nfdclientset.NewForConfigOrDie(config) - - informerFactory := nfdinformers.NewSharedInformerFactory(nfdClient, 1*time.Hour) + informerFactory := nfdinformers.NewSharedInformerFactory(nfdClient, nfdApiControllerOptions.resyncPeriod) // Add informer for NodeFeature objects - if !disableNodeFeature { + if !nfdApiControllerOptions.disableNodeFeature { featureInformer := informerFactory.Nfd().V1alpha1().NodeFeatures() if _, err := featureInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { @@ -83,7 +87,7 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC AddFunc: func(object interface{}) { key, _ := cache.MetaNamespaceKeyFunc(object) klog.V(2).Infof("NodeFeatureRule %v added", key) - if !disableNodeFeature { + if !nfdApiControllerOptions.disableNodeFeature { c.updateAllNodes() } // else: rules will be processed only when gRPC requests are received @@ -91,7 +95,7 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC UpdateFunc: func(oldObject, newObject interface{}) { key, _ := cache.MetaNamespaceKeyFunc(newObject) klog.V(2).Infof("NodeFeatureRule %v updated", key) - if !disableNodeFeature { + if !nfdApiControllerOptions.disableNodeFeature { c.updateAllNodes() } // else: rules will be processed only when gRPC requests are received @@ -99,7 +103,7 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC DeleteFunc: func(object interface{}) { key, _ := cache.MetaNamespaceKeyFunc(object) klog.V(2).Infof("NodeFeatureRule %v deleted", key) - if !disableNodeFeature { + if !nfdApiControllerOptions.disableNodeFeature { c.updateAllNodes() } // else: rules will be processed only when gRPC requests are received diff --git a/pkg/nfd-master/nfd-master-internal_test.go b/pkg/nfd-master/nfd-master-internal_test.go index cf821c8e79..f59fd6d35b 100644 --- a/pkg/nfd-master/nfd-master-internal_test.go +++ b/pkg/nfd-master/nfd-master-internal_test.go @@ -640,24 +640,32 @@ extraLabelNs: ["added.ns.io"] // Update config and verify the effect writeConfig(` extraLabelNs: ["override.ns.io"] +resyncPeriod: '2h' `) So(func() interface{} { return master.config.ExtraLabelNs }, withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"override.ns.io": struct{}{}}) + So(func() interface{} { return master.config.ResyncPeriod.Duration }, + withTimeout, 2*time.Second, ShouldResemble, time.Duration(2)*time.Hour) // Removing config file should get back our defaults err = os.RemoveAll(tmpDir) So(err, ShouldBeNil) So(func() interface{} { return master.config.ExtraLabelNs }, withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{}) + So(func() interface{} { return master.config.ResyncPeriod.Duration }, + withTimeout, 2*time.Second, ShouldResemble, time.Duration(1)*time.Hour) // Re-creating config dir and file should change the config err = os.MkdirAll(configDir, 0755) So(err, ShouldBeNil) writeConfig(` extraLabelNs: ["another.override.ns"] +resyncPeriod: '3m' `) So(func() interface{} { return master.config.ExtraLabelNs }, withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"another.override.ns": struct{}{}}) + So(func() interface{} { return master.config.ResyncPeriod.Duration }, + withTimeout, 2*time.Second, ShouldResemble, time.Duration(3)*time.Minute) }) }) } diff --git a/pkg/nfd-master/nfd-master.go b/pkg/nfd-master/nfd-master.go index 811ad576d9..73d762343b 100644 --- a/pkg/nfd-master/nfd-master.go +++ b/pkg/nfd-master/nfd-master.go @@ -70,6 +70,7 @@ type NFDConfig struct { NoPublish bool ResourceLabels utils.StringSetVal EnableTaints bool + ResyncPeriod utils.DurationVal } // ConfigOverrideArgs are args that override config file options @@ -80,6 +81,7 @@ type ConfigOverrideArgs struct { ResourceLabels *utils.StringSetVal EnableTaints *bool NoPublish *bool + ResyncPeriod *utils.DurationVal } // Args holds command line arguments @@ -96,6 +98,7 @@ type Args struct { Prune bool VerifyNodeName bool Options string + ResyncPeriod utils.DurationVal Overrides ConfigOverrideArgs } @@ -172,6 +175,7 @@ func newDefaultConfig() *NFDConfig { NoPublish: false, ResourceLabels: utils.StringSetVal{}, EnableTaints: false, + ResyncPeriod: utils.DurationVal{Duration: time.Duration(1) * time.Hour}, } } @@ -195,7 +199,10 @@ func (m *nfdMaster) Run() error { return err } klog.Info("starting nfd api controller") - m.nfdController, err = newNfdController(kubeconfig, !m.args.EnableNodeFeatureApi) + m.nfdController, err = newNfdController(kubeconfig, nfdApiControllerOptions{ + disableNodeFeature: !m.args.EnableNodeFeatureApi, + resyncPeriod: m.args.ResyncPeriod.Duration, + }) if err != nil { return fmt.Errorf("failed to initialize CRD controller: %w", err) } @@ -1112,6 +1119,9 @@ func (m *nfdMaster) configure(filepath string, overrides string) error { if m.args.Overrides.LabelWhiteList != nil { c.LabelWhiteList = *m.args.Overrides.LabelWhiteList } + if m.args.Overrides.ResyncPeriod != nil { + c.ResyncPeriod = *m.args.Overrides.ResyncPeriod + } m.config = c if !c.NoPublish { diff --git a/pkg/nfd-worker/nfd-worker.go b/pkg/nfd-worker/nfd-worker.go index e7f9a46f53..0be8d0dae1 100644 --- a/pkg/nfd-worker/nfd-worker.go +++ b/pkg/nfd-worker/nfd-worker.go @@ -80,7 +80,7 @@ type coreConfig struct { FeatureSources []string Sources *[]string LabelSources []string - SleepInterval duration + SleepInterval utils.DurationVal } type sourcesConfig map[string]source.Config @@ -127,10 +127,6 @@ type nfdWorker struct { labelSources []source.LabelSource } -type duration struct { - time.Duration -} - // This ticker can represent infinite and normal intervals. type infiniteTicker struct { *time.Ticker @@ -169,7 +165,7 @@ func newDefaultConfig() *NFDConfig { return &NFDConfig{ Core: coreConfig{ LabelWhiteList: utils.RegexpVal{Regexp: *regexp.MustCompile("")}, - SleepInterval: duration{60 * time.Second}, + SleepInterval: utils.DurationVal{Duration: 60 * time.Second}, FeatureSources: []string{"all"}, LabelSources: []string{"all"}, Klog: make(map[string]string), @@ -369,7 +365,7 @@ func (c *coreConfig) sanitize() { if c.SleepInterval.Duration > 0 && c.SleepInterval.Duration < time.Second { klog.Warningf("too short sleep interval specified (%s), forcing to 1s", c.SleepInterval.Duration.String()) - c.SleepInterval = duration{time.Second} + c.SleepInterval = utils.DurationVal{Duration: time.Second} } } @@ -756,27 +752,6 @@ func (m *nfdWorker) getNfdClient() (*nfdclient.Clientset, error) { return c, nil } -// UnmarshalJSON implements the Unmarshaler interface from "encoding/json" -func (d *duration) UnmarshalJSON(data []byte) error { - var v interface{} - if err := json.Unmarshal(data, &v); err != nil { - return err - } - switch val := v.(type) { - case float64: - d.Duration = time.Duration(val) - case string: - var err error - d.Duration, err = time.ParseDuration(val) - if err != nil { - return err - } - default: - return fmt.Errorf("invalid duration %s", data) - } - return nil -} - // UnmarshalJSON implements the Unmarshaler interface from "encoding/json" func (c *sourcesConfig) UnmarshalJSON(data []byte) error { // First do a raw parse to get the per-source data diff --git a/pkg/utils/flags.go b/pkg/utils/flags.go index 47bd3f3df6..ab9379c052 100644 --- a/pkg/utils/flags.go +++ b/pkg/utils/flags.go @@ -23,6 +23,7 @@ import ( "regexp" "sort" "strings" + "time" ) // RegexpVal is a wrapper for regexp command line flags @@ -173,3 +174,39 @@ func NewKlogFlagVal(f *flag.Flag) *KlogFlagVal { type boolFlag interface { IsBoolFlag() bool } + +// DurationVal is a wrapper for handling time.Duration as a command line flag +type DurationVal struct { + time.Duration +} + +// UnmarshalJSON implements the Unmarshaler interface from "encoding/json" +func (d *DurationVal) UnmarshalJSON(data []byte) error { + var v interface{} + if err := json.Unmarshal(data, &v); err != nil { + return err + } + switch val := v.(type) { + case float64: + d.Duration = time.Duration(val) + case string: + var err error + d.Duration, err = time.ParseDuration(val) + if err != nil { + return err + } + default: + return fmt.Errorf("invalid duration %s", data) + } + return nil +} + +// Set implements the flag.Value interface +func (d *DurationVal) Set(val string) error { + m, err := time.ParseDuration(val) + if err != nil { + return err + } + *d = DurationVal{m} + return nil +}