diff --git a/cmd/nfd-master/main.go b/cmd/nfd-master/main.go index 07c41c50f0..b18223563c 100644 --- a/cmd/nfd-master/main.go +++ b/cmd/nfd-master/main.go @@ -96,6 +96,8 @@ func initFlags(flagset *flag.FlagSet) *master.Args { "NB: the label namespace is omitted i.e. the filter is only applied to the name part after '/'.") flagset.BoolVar(&args.NoPublish, "no-publish", false, "Do not publish feature labels") + flagset.BoolVar(&args.EnableTaints, "enable-taints", false, + "Enable node tainting feature") flagset.BoolVar(&args.FeatureRulesController, "featurerules-controller", true, "Enable controller for NodeFeatureRule objects. Generates node labels based on the rules in these CRs.") flagset.IntVar(&args.Port, "port", 8080, diff --git a/deployment/base/nfd-crds/cr-sample.yaml b/deployment/base/nfd-crds/cr-sample.yaml index dad9405d08..539757209b 100644 --- a/deployment/base/nfd-crds/cr-sample.yaml +++ b/deployment/base/nfd-crds/cr-sample.yaml @@ -7,6 +7,12 @@ spec: # The following feature demonstrates the capabilities of the matchFeatures and # matchAny matchers. - name: "my feature rule" + taints: + - effect: PreferNoSchedule + key: "feature.node.kubernetes.io/special-node" + value: "true" + - effect: NoExecute + key: "feature.node.kubernetes.io/dedicated-node" labels: "my-complex-feature": "my-value" # matchFeatures implements a logical AND over feature matchers. diff --git a/deployment/base/nfd-crds/nodefeaturerule-crd.yaml b/deployment/base/nfd-crds/nodefeaturerule-crd.yaml index e8e6004d13..9a1fc53c83 100644 --- a/deployment/base/nfd-crds/nodefeaturerule-crd.yaml +++ b/deployment/base/nfd-crds/nodefeaturerule-crd.yaml @@ -189,6 +189,35 @@ spec: name: description: Name of the rule. type: string + taints: + description: Taints to create if the rule matches. + items: + description: The node this Taint is attached to has the "effect" + on any pod that does not tolerate the Taint. + properties: + effect: + description: Required. The effect of the taint on pods + that do not tolerate the taint. Valid effects are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to + a node. + type: string + timeAdded: + description: TimeAdded represents the time at which the + taint was added. It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint + key. + type: string + required: + - effect + - key + type: object + type: array vars: additionalProperties: type: string diff --git a/deployment/helm/node-feature-discovery/crds/nodefeaturerule-crd.yaml b/deployment/helm/node-feature-discovery/crds/nodefeaturerule-crd.yaml index e8e6004d13..9a1fc53c83 100644 --- a/deployment/helm/node-feature-discovery/crds/nodefeaturerule-crd.yaml +++ b/deployment/helm/node-feature-discovery/crds/nodefeaturerule-crd.yaml @@ -189,6 +189,35 @@ spec: name: description: Name of the rule. type: string + taints: + description: Taints to create if the rule matches. + items: + description: The node this Taint is attached to has the "effect" + on any pod that does not tolerate the Taint. + properties: + effect: + description: Required. The effect of the taint on pods + that do not tolerate the taint. Valid effects are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to + a node. + type: string + timeAdded: + description: TimeAdded represents the time at which the + taint was added. It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint + key. + type: string + required: + - effect + - key + type: object + type: array vars: additionalProperties: type: string diff --git a/docs/reference/master-commandline-reference.md b/docs/reference/master-commandline-reference.md index aed7378578..8542742d10 100644 --- a/docs/reference/master-commandline-reference.md +++ b/docs/reference/master-commandline-reference.md @@ -99,6 +99,18 @@ Example: nfd-master -cert-file=/opt/nfd/master.crt -key-file=/opt/nfd/master.key -ca-file=/opt/nfd/ca.crt ``` +### -enable-taints + +The `-enable-taints` flag enables/disables node tainting feature of NFD. + +Default: *false* + +Example: + +```bash +nfd-master -enable-taints=true +``` + ### -key-file The `-key-file` is one of the three flags (together with `-ca-file` and diff --git a/docs/usage/customization-guide.md b/docs/usage/customization-guide.md index f6ad998a87..a9dba14eb5 100644 --- a/docs/usage/customization-guide.md +++ b/docs/usage/customization-guide.md @@ -30,8 +30,8 @@ labeling: ## NodeFeatureRule custom resource `NodeFeatureRule` objects provide an easy way to create vendor or application -specific labels. It uses a flexible rule-based mechanism for creating labels -based on node feature. +specific labels and taints. It uses a flexible rule-based mechanism for creating +labels and optionally taints based on node features. ### A NodeFeatureRule example @@ -76,6 +76,54 @@ re-labeling delay up to the sleep-interval of nfd-worker (1 minute by default). See [Label rule format](#label-rule-format) for detailed description of available fields and how to write labeling rules. +### NodeFeatureRule tainting feature + +This feature is experimental. + +In some circumstances it is desirable keep nodes with specialized hardware away from +running general workload and instead leave them for workloads that need the specialized +hardware. One way to achieve it is to taint the nodes with the specialized hardware +and add corresponding toleration to pods that require the special hardware. NFD +offers node tainting functionality which is disabled by default. User can define +one or more custom taints via the `taints` field of the NodeFeatureRule CR. The +same rule-based mechanism is applied here and the NFD taints only rule matching nodes. + +To enable the tainting feature, `--enable-taints` flag needs to be set to `true`. +If the flag `--enable-taints` is set to `false` (i.e. disabled), taints defined in +the NodeFeatureRule CR have no effect and will be ignored by the NFD master. + +**NOTE**: Before enabling any taints, make sure to edit nfd-worker daemonset to +tolerate the taints to be created. Otherwise, already running pods that do not +tolerate the taint are evicted immediately from the node including the nfd-worker +pod. + +Example NodeFeatureRule with custom taints: + +```yaml +apiVersion: nfd.k8s-sigs.io/v1alpha1 +kind: NodeFeatureRule +metadata: + name: my-sample-rule-object +spec: + rules: + - name: "my sample taint rule" + taints: + - effect: PreferNoSchedule + key: "feature.node.kubernetes.io/special-node" + value: "true" + - effect: NoExecute + key: "feature.node.kubernetes.io/dedicated-node" + matchFeatures: + - feature: kernel.loadedmodule + matchExpressions: + dummy: {op: Exists} + - feature: kernel.config + matchExpressions: + X86: {op: In, value: ["y"]} +``` + +In this example, if the `my sample taint rule` rule is matched, `feature.node.kubernetes.io/pci-0300_1d0f.present=true:NoExecute` +and `feature.node.kubernetes.io/cpu-cpuid.ADX:NoExecute` taints are set on the node. ### NodeFeatureRule controller @@ -365,6 +413,15 @@ details. labels specified in the `labels` field will override anything originating from `labelsTemplate`. +### Taints + +*taints* is a list of taint entries and each entry can have `key`, `value` and `effect`, +where the `value` is optional. Effect could be `NoSchedule`, `PreferNoSchedule` +or `NoExecute`. To learn more about the meaning of these effects, check out k8s [documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). + +**NOTE** taints field is not available for the custom rules of nfd-worker and only +for NodeFeatureRule objects. + #### Vars The `.vars` field is a map of values (key-value pairs) to store for subsequent diff --git a/pkg/apis/nfd/v1alpha1/annotations_labels.go b/pkg/apis/nfd/v1alpha1/annotations_labels.go index 0dc82a6a9e..7e4cbee250 100644 --- a/pkg/apis/nfd/v1alpha1/annotations_labels.go +++ b/pkg/apis/nfd/v1alpha1/annotations_labels.go @@ -43,4 +43,7 @@ const ( // WorkerVersionAnnotation is the annotation that holds the version of nfd-worker running on the node WorkerVersionAnnotation = AnnotationNs + "/worker.version" + + // NodeTaintsAnnotation is the annotation that holds the taints that nfd-master set on the node + NodeTaintsAnnotation = AnnotationNs + "/taints" ) diff --git a/pkg/apis/nfd/v1alpha1/rule.go b/pkg/apis/nfd/v1alpha1/rule.go index c352b62d7f..d7e3cd20cb 100644 --- a/pkg/apis/nfd/v1alpha1/rule.go +++ b/pkg/apis/nfd/v1alpha1/rule.go @@ -22,8 +22,8 @@ import ( "strings" "text/template" + corev1 "k8s.io/api/core/v1" "k8s.io/klog/v2" - "sigs.k8s.io/node-feature-discovery/pkg/utils" ) @@ -32,6 +32,7 @@ import ( type RuleOutput struct { Labels map[string]string Vars map[string]string + Taints []corev1.Taint } // Execute the rule against a set of input features. @@ -94,9 +95,8 @@ func (r *Rule) Execute(features *Features) (RuleOutput, error) { vars[k] = v } - ret := RuleOutput{Labels: labels, Vars: vars} + ret := RuleOutput{Labels: labels, Vars: vars, Taints: r.Taints} utils.KlogDump(2, fmt.Sprintf("rule %q matched with: ", r.Name), " ", ret) - return ret, nil } diff --git a/pkg/apis/nfd/v1alpha1/types.go b/pkg/apis/nfd/v1alpha1/types.go index b3e4bd6027..c057cddf7e 100644 --- a/pkg/apis/nfd/v1alpha1/types.go +++ b/pkg/apis/nfd/v1alpha1/types.go @@ -17,6 +17,7 @@ limitations under the License. package v1alpha1 import ( + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -120,6 +121,10 @@ type Rule struct { // +optional VarsTemplate string `json:"varsTemplate"` + // Taints to create if the rule matches. + // +optional + Taints []corev1.Taint `json:"taints,omitempty"` + // MatchFeatures specifies a set of matcher terms all of which must match. // +optional MatchFeatures FeatureMatcher `json:"matchFeatures"` diff --git a/pkg/apis/nfd/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/nfd/v1alpha1/zz_generated.deepcopy.go index 1196b41394..47513f1335 100644 --- a/pkg/apis/nfd/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/nfd/v1alpha1/zz_generated.deepcopy.go @@ -6,6 +6,7 @@ package v1alpha1 import ( + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -438,6 +439,13 @@ func (in *Rule) DeepCopyInto(out *Rule) { (*out)[key] = val } } + if in.Taints != nil { + in, out := &in.Taints, &out.Taints + *out = make([]v1.Taint, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.MatchFeatures != nil { in, out := &in.MatchFeatures, &out.MatchFeatures *out = make(FeatureMatcher, len(*in)) diff --git a/pkg/nfd-master/nfd-master.go b/pkg/nfd-master/nfd-master.go index 5111d57730..d46f16bd32 100644 --- a/pkg/nfd-master/nfd-master.go +++ b/pkg/nfd-master/nfd-master.go @@ -39,10 +39,12 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" + label "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes" restclient "k8s.io/client-go/rest" "k8s.io/klog/v2" + controller "k8s.io/kubernetes/pkg/controller" + taintutils "k8s.io/kubernetes/pkg/util/taints" "sigs.k8s.io/node-feature-discovery/pkg/apihelper" nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/v1alpha1" @@ -72,6 +74,7 @@ type Args struct { LabelWhiteList utils.RegexpVal FeatureRulesController bool NoPublish bool + EnableTaints bool Port int Prune bool VerifyNodeName bool @@ -294,6 +297,13 @@ func (m *nfdMaster) prune() error { return fmt.Errorf("failed to prune labels from node %q: %v", node.Name, err) } + // Prune taints + err = m.setTaints(cli, []corev1.Taint{}, node.Name) + + if err != nil { + return fmt.Errorf("failed to prune taints from node %q: %v", node.Name, err) + } + // Prune annotations node, err := m.apihelper.GetNode(cli, node.Name) if err != nil { @@ -392,14 +402,13 @@ func verifyNodeName(cert *x509.Certificate, nodeName string) error { err := cert.VerifyHostname(nodeName) if err != nil { - return fmt.Errorf("Certificate %q not valid for node %q: %v", cert.Subject.CommonName, nodeName, err) + return fmt.Errorf("certificate %q not valid for node %q: %v", cert.Subject.CommonName, nodeName, err) } return nil } // SetLabels implements LabelerServer func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.SetLabelsReply, error) { - err := authorizeClient(c, m.args.VerifyNodeName, r.NodeName) if err != nil { return &pb.SetLabelsReply{}, err @@ -420,7 +429,9 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se // NOTE: we effectively mangle the request struct by not creating a deep copy of the map rawLabels = r.Labels } - for k, v := range m.crLabels(r) { + crLabels, crTaints := m.processNodeFeatureRule(r) + + for k, v := range crLabels { rawLabels[k] = v } @@ -440,10 +451,101 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se klog.Errorf("failed to advertise labels: %v", err) return &pb.SetLabelsReply{}, err } + + // set taints + var taints []corev1.Taint + if m.args.EnableTaints { + taints = crTaints + } + + // Call setTaints even though the feature flag is disabled. This + // ensures that we delete NFD owned stale taints when flag got + // turned off. + err = m.setTaints(cli, taints, r.NodeName) + if err != nil { + return &pb.SetLabelsReply{}, err + } } return &pb.SetLabelsReply{}, nil } +// setTaints sets node taints and annotations based on the taints passed via +// nodeFeatureRule custom resorce. If empty list of taints is passed, currently +// NFD owned taints and annotations are removed from the node. +func (m *nfdMaster) setTaints(cli *kubernetes.Clientset, taints []corev1.Taint, nodeName string) error { + // Fetch the node object. + node, err := m.apihelper.GetNode(cli, nodeName) + if err != nil { + return err + } + + // De-serialize the taints annotation into corev1.Taint type for comparision below. + oldTaints := []corev1.Taint{} + if val, ok := node.Annotations[nfdv1alpha1.NodeTaintsAnnotation]; ok { + sts := strings.Split(val, ",") + oldTaints, _, err = taintutils.ParseTaints(sts) + if err != nil { + return err + } + } + + // Delete old nfd-managed taints that are not found in the set of new taints. + taintsUpdated := false + newNode := node.DeepCopy() + for _, taintToRemove := range oldTaints { + if taintutils.TaintExists(taints, &taintToRemove) { + continue + } + + newTaints, removed := taintutils.DeleteTaint(newNode.Spec.Taints, &taintToRemove) + if !removed { + klog.V(1).Infof("taint %q already deleted from node", taintToRemove.ToString()) + } + taintsUpdated = taintsUpdated || removed + newNode.Spec.Taints = newTaints + } + + // Add new taints found in the set of new taints. + for _, taint := range taints { + var updated bool + newNode, updated, err = taintutils.AddOrUpdateTaint(newNode, &taint) + if err != nil { + return fmt.Errorf("failed to add %q taint on node %v", taint, node.Name) + } + taintsUpdated = taintsUpdated || updated + } + + if taintsUpdated { + err = controller.PatchNodeTaints(context.TODO(), cli, nodeName, node, newNode) + if err != nil { + return fmt.Errorf("failed to patch the node %v", node.Name) + } + klog.Infof("updated node %q taints", nodeName) + } + + // Update node annotation that holds the taints managed by us + newAnnotations := map[string]string{} + if len(taints) > 0 { + // Serialize the new taints into string and update the annotation + // with that string. + taintStrs := make([]string, 0, len(taints)) + for _, taint := range taints { + taintStrs = append(taintStrs, taint.ToString()) + } + newAnnotations[nfdv1alpha1.NodeTaintsAnnotation] = strings.Join(taintStrs, ",") + } + + patches := createPatches([]string{nfdv1alpha1.NodeTaintsAnnotation}, node.Annotations, newAnnotations, "/metadata/annotations") + if len(patches) > 0 { + err = m.apihelper.PatchNode(cli, node.Name, patches) + if err != nil { + return fmt.Errorf("error while patching node object: %v", err) + } + klog.V(1).Infof("patched node %q annotations for taints", nodeName) + } + return nil +} + func authorizeClient(c context.Context, checkNodeName bool, nodeName string) error { if checkNodeName { // Client authorization. @@ -493,20 +595,21 @@ func (m *nfdMaster) UpdateNodeTopology(c context.Context, r *topologypb.NodeTopo return &topologypb.NodeTopologyResponse{}, nil } -func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string { +func (m *nfdMaster) processNodeFeatureRule(r *pb.SetLabelsRequest) (map[string]string, []corev1.Taint) { if m.nfdController == nil { - return nil + return nil, nil } - l := make(map[string]string) - ruleSpecs, err := m.nfdController.ruleLister.List(labels.Everything()) + labels := make(map[string]string) + var taints []corev1.Taint + ruleSpecs, err := m.nfdController.ruleLister.List(label.Everything()) sort.Slice(ruleSpecs, func(i, j int) bool { return ruleSpecs[i].Name < ruleSpecs[j].Name }) if err != nil { klog.Errorf("failed to list NodeFeatureRule resources: %v", err) - return nil + return nil, nil } // Helper struct for rule processing @@ -527,9 +630,9 @@ func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string { klog.Errorf("failed to process Rule %q: %v", rule.Name, err) continue } - + taints = append(taints, ruleOut.Taints...) for k, v := range ruleOut.Labels { - l[k] = v + labels[k] = v } // Feed back rule output to features map for subsequent rules to match @@ -538,7 +641,7 @@ func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string { } } - return l + return labels, taints } // updateNodeFeatures ensures the Kubernetes node object is up to date,