Skip to content

Commit

Permalink
feat: add master resync period configurability
Browse files Browse the repository at this point in the history
This PR adds a config option for setting the NFD API controller resync period.
The resync period is only activated when the NodeFeature API has been
enabled (with -enable-nodefeature-api).

Signed-off-by: AhmedGrati <[email protected]>
  • Loading branch information
TessaIO committed Apr 22, 2023
1 parent 018cd33 commit 6753fde
Show file tree
Hide file tree
Showing 12 changed files with 117 additions and 37 deletions.
8 changes: 7 additions & 1 deletion cmd/nfd-master/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"flag"
"fmt"
"os"
"time"

"k8s.io/klog/v2"

Expand Down Expand Up @@ -67,6 +68,8 @@ func main() {
args.Overrides.EnableTaints = overrides.EnableTaints
case "no-publish":
args.Overrides.NoPublish = overrides.NoPublish
case "-resync-period":
args.Overrides.ResyncPeriod = overrides.ResyncPeriod
}
})

Expand Down Expand Up @@ -131,6 +134,7 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs)
DenyLabelNs: &utils.StringSetVal{},
ExtraLabelNs: &utils.StringSetVal{},
ResourceLabels: &utils.StringSetVal{},
ResyncPeriod: &utils.DurationVal{Duration: time.Duration(1) * time.Hour},
}
flagset.Var(overrides.ExtraLabelNs, "extra-label-ns",
"Comma separated list of allowed extra label namespaces")
Expand All @@ -145,6 +149,8 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs)
"Comma separated list of denied label namespaces")
flagset.Var(overrides.ResourceLabels, "resource-labels",
"Comma separated list of labels to be exposed as extended resources. DEPRECATED: use NodeFeatureRule objects instead")

flagset.Var(overrides.ResyncPeriod, "resync-period",
"Specify the NFD API controller resync period."+
"It has an effect when the NodeFeature API has been enabled (with -enable-nodefeature-api).")
return args, overrides
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
# resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"]
# enableTaints: false
# labelWhiteList: "foo"
# resyncPeriod: "2h"
3 changes: 3 additions & 0 deletions deployment/helm/node-feature-discovery/templates/master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ spec:
{{- if .Values.master.featureRulesController | kindIs "invalid" | not }}
- "-featurerules-controller={{ .Values.master.featureRulesController }}"
{{- end }}
{{- if .Values.master.resyncPeriod }}
- "-resync-period={{ .Values.master.resyncPeriod }}"
{{- end }}
{{- if .Values.tls.enable }}
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
Expand Down
2 changes: 2 additions & 0 deletions deployment/helm/node-feature-discovery/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ master:
# resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"]
# enableTaints: false
# labelWhiteList: "foo"
# resyncPeriod: "2h"
### <NFD-MASTER-CONF-END-DO-NOT-REMOVE>
# The TCP port that nfd-master listens for incoming requests. Default: 8080
port: 8080
instance:
featureApi:
resyncPeriod:
denyLabelNs: []
extraLabelNs: []
resourceLabels: []
Expand Down
1 change: 1 addition & 0 deletions docs/deployment/helm.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ We have introduced the following Chart parameters.
| `master.*` | dict | | NFD master deployment configuration |
| `master.port` | integer | | Specifies the TCP port that nfd-master listens for incoming requests. |
| `master.instance` | string | | Instance name. Used to separate annotation namespaces for multiple parallel deployments |
| `master.resyncPeriod` | string | | NFD API controller resync period. |
| `master.extraLabelNs` | array | [] | List of allowed extra label namespaces |
| `master.resourceLabels` | array | [] | List of labels to be registered as extended resources |
| `master.enableTaints` | bool | false | Specifies whether to enable or disable node tainting |
Expand Down
17 changes: 17 additions & 0 deletions docs/reference/master-commandline-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,20 @@ Default: 0
Comma-separated list of `pattern=N` settings for file-filtered logging.

Default: *empty*

### -resync-period

The `-resync-period` flag specifies the NFD API controller resync period.
The resync means nfd-master replaying all NodeFeature and NodeFeatureRule objects,
thus effectively re-syncing all nodes in the cluster (i.e. ensuring labels, annotations,
extended resources and taints are in place).
Only has effect when the [NodeFeature](../usage/custom-resources.md#nodefeature)
CRD API has been enabled with [`-enable-nodefeature-api`](master-commandline-reference.md#-enable-nodefeature-api).

Default: 1 hour.

Example:

```bash
nfd-master -resync-period=2h
```
17 changes: 17 additions & 0 deletions docs/reference/master-configuration-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,20 @@ Example:
```yaml
labelWhiteList: "foo"
```

### resyncPeriod

The `resyncPeriod` option specifies the NFD API controller resync period.
The resync means nfd-master replaying all NodeFeature and NodeFeatureRule objects,
thus effectively re-syncing all nodes in the cluster (i.e. ensuring labels, annotations,
extended resources and taints are in place).
Only has effect when the [NodeFeature](../usage/custom-resources.md#nodefeature)
CRD API has been enabled with [`-enable-nodefeature-api`](master-commandline-reference.md#-enable-nodefeature-api).

Default: 1 hour.

Example:

```yaml
resyncPeriod=2h
```
18 changes: 11 additions & 7 deletions pkg/nfd-master/nfd-api-controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,23 @@ type nfdController struct {
updateOneNodeChan chan string
}

func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdController, error) {
type NfdApiControllerOptions struct {
disableNodeFeature bool
resyncPeriod time.Duration
}

func newNfdController(config *restclient.Config, nfdApiControllerOptions NfdApiControllerOptions) (*nfdController, error) {
c := &nfdController{
stopChan: make(chan struct{}, 1),
updateAllNodesChan: make(chan struct{}, 1),
updateOneNodeChan: make(chan string),
}

nfdClient := nfdclientset.NewForConfigOrDie(config)

informerFactory := nfdinformers.NewSharedInformerFactory(nfdClient, 1*time.Hour)
informerFactory := nfdinformers.NewSharedInformerFactory(nfdClient, nfdApiControllerOptions.resyncPeriod)

// Add informer for NodeFeature objects
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
featureInformer := informerFactory.Nfd().V1alpha1().NodeFeatures()
if _, err := featureInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
Expand Down Expand Up @@ -83,23 +87,23 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC
AddFunc: func(object interface{}) {
key, _ := cache.MetaNamespaceKeyFunc(object)
klog.V(2).Infof("NodeFeatureRule %v added", key)
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
},
UpdateFunc: func(oldObject, newObject interface{}) {
key, _ := cache.MetaNamespaceKeyFunc(newObject)
klog.V(2).Infof("NodeFeatureRule %v updated", key)
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
},
DeleteFunc: func(object interface{}) {
key, _ := cache.MetaNamespaceKeyFunc(object)
klog.V(2).Infof("NodeFeatureRule %v deleted", key)
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
Expand Down
8 changes: 8 additions & 0 deletions pkg/nfd-master/nfd-master-internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -640,24 +640,32 @@ extraLabelNs: ["added.ns.io"]
// Update config and verify the effect
writeConfig(`
extraLabelNs: ["override.ns.io"]
resyncPeriod: '2h'
`)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"override.ns.io": struct{}{}})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(2)*time.Hour)

// Removing config file should get back our defaults
err = os.RemoveAll(tmpDir)
So(err, ShouldBeNil)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(1)*time.Hour)

// Re-creating config dir and file should change the config
err = os.MkdirAll(configDir, 0755)
So(err, ShouldBeNil)
writeConfig(`
extraLabelNs: ["another.override.ns"]
resyncPeriod: '3m'
`)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"another.override.ns": struct{}{}})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(3)*time.Minute)
})
})
}
Expand Down
12 changes: 11 additions & 1 deletion pkg/nfd-master/nfd-master.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ type NFDConfig struct {
NoPublish bool
ResourceLabels utils.StringSetVal
EnableTaints bool
ResyncPeriod utils.DurationVal
}

// ConfigOverrideArgs are args that override config file options
Expand All @@ -80,6 +81,7 @@ type ConfigOverrideArgs struct {
ResourceLabels *utils.StringSetVal
EnableTaints *bool
NoPublish *bool
ResyncPeriod *utils.DurationVal
}

// Args holds command line arguments
Expand All @@ -96,6 +98,7 @@ type Args struct {
Prune bool
VerifyNodeName bool
Options string
ResyncPeriod utils.DurationVal

Overrides ConfigOverrideArgs
}
Expand Down Expand Up @@ -172,6 +175,7 @@ func newDefaultConfig() *NFDConfig {
NoPublish: false,
ResourceLabels: utils.StringSetVal{},
EnableTaints: false,
ResyncPeriod: utils.DurationVal{Duration: time.Duration(1) * time.Hour},
}
}

Expand All @@ -195,7 +199,10 @@ func (m *nfdMaster) Run() error {
return err
}
klog.Info("starting nfd api controller")
m.nfdController, err = newNfdController(kubeconfig, !m.args.EnableNodeFeatureApi)
m.nfdController, err = newNfdController(kubeconfig, NfdApiControllerOptions{
disableNodeFeature: !m.args.EnableNodeFeatureApi,
resyncPeriod: m.args.ResyncPeriod.Duration,
})
if err != nil {
return fmt.Errorf("failed to initialize CRD controller: %w", err)
}
Expand Down Expand Up @@ -1112,6 +1119,9 @@ func (m *nfdMaster) configure(filepath string, overrides string) error {
if m.args.Overrides.LabelWhiteList != nil {
c.LabelWhiteList = *m.args.Overrides.LabelWhiteList
}
if m.args.Overrides.ResyncPeriod != nil {
c.ResyncPeriod = *m.args.Overrides.ResyncPeriod
}

m.config = c
if !c.NoPublish {
Expand Down
31 changes: 3 additions & 28 deletions pkg/nfd-worker/nfd-worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ type coreConfig struct {
FeatureSources []string
Sources *[]string
LabelSources []string
SleepInterval duration
SleepInterval utils.DurationVal
}

type sourcesConfig map[string]source.Config
Expand Down Expand Up @@ -127,10 +127,6 @@ type nfdWorker struct {
labelSources []source.LabelSource
}

type duration struct {
time.Duration
}

// This ticker can represent infinite and normal intervals.
type infiniteTicker struct {
*time.Ticker
Expand Down Expand Up @@ -169,7 +165,7 @@ func newDefaultConfig() *NFDConfig {
return &NFDConfig{
Core: coreConfig{
LabelWhiteList: utils.RegexpVal{Regexp: *regexp.MustCompile("")},
SleepInterval: duration{60 * time.Second},
SleepInterval: utils.DurationVal{Duration: 60 * time.Second},
FeatureSources: []string{"all"},
LabelSources: []string{"all"},
Klog: make(map[string]string),
Expand Down Expand Up @@ -369,7 +365,7 @@ func (c *coreConfig) sanitize() {
if c.SleepInterval.Duration > 0 && c.SleepInterval.Duration < time.Second {
klog.Warningf("too short sleep interval specified (%s), forcing to 1s",
c.SleepInterval.Duration.String())
c.SleepInterval = duration{time.Second}
c.SleepInterval = utils.DurationVal{Duration: time.Second}
}
}

Expand Down Expand Up @@ -756,27 +752,6 @@ func (m *nfdWorker) getNfdClient() (*nfdclient.Clientset, error) {
return c, nil
}

// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (d *duration) UnmarshalJSON(data []byte) error {
var v interface{}
if err := json.Unmarshal(data, &v); err != nil {
return err
}
switch val := v.(type) {
case float64:
d.Duration = time.Duration(val)
case string:
var err error
d.Duration, err = time.ParseDuration(val)
if err != nil {
return err
}
default:
return fmt.Errorf("invalid duration %s", data)
}
return nil
}

// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (c *sourcesConfig) UnmarshalJSON(data []byte) error {
// First do a raw parse to get the per-source data
Expand Down
36 changes: 36 additions & 0 deletions pkg/utils/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"regexp"
"sort"
"strings"
"time"
)

// RegexpVal is a wrapper for regexp command line flags
Expand Down Expand Up @@ -173,3 +174,38 @@ func NewKlogFlagVal(f *flag.Flag) *KlogFlagVal {
type boolFlag interface {
IsBoolFlag() bool
}

type DurationVal struct {
time.Duration
}

// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (d *DurationVal) UnmarshalJSON(data []byte) error {
var v interface{}
if err := json.Unmarshal(data, &v); err != nil {
return err
}
switch val := v.(type) {
case float64:
d.Duration = time.Duration(val)
case string:
var err error
d.Duration, err = time.ParseDuration(val)
if err != nil {
return err
}
default:
return fmt.Errorf("invalid duration %s", data)
}
return nil
}

// Set implements the flag.Value interface
func (d *DurationVal) Set(val string) error {
m, err := time.ParseDuration(val)
if err != nil {
return err
}
*d = DurationVal{m}
return nil
}

0 comments on commit 6753fde

Please sign in to comment.