Skip to content

Commit

Permalink
nfd-master: disallow unprefixed and kubernetes taints
Browse files Browse the repository at this point in the history
Disallow taints having a key with "kubernetes.io/" or "*.kubernetes.io/"
prefix. This is a precaution to protect the user from messing up with
the "official" well-known taints from Kubernetes itself. The only
exception is that the "nfd.node.kubernetes.io/" prefix is allowed.

However, there is one allowed NFD-specific namespace (and its
sub-namespaces) i.e. "feature.node.kubernetes.io" under the
kubernetes.io domain that can be used for NFD-managed taints.

Also disallow unprefixed taint keys. We don't add a default prefix to
unprefixed taints (like we do for labels) from NodeFeatureRules. This is
to prevent unpleasant surprises to users that need to manage matching
tolerations for their workloads.
  • Loading branch information
marquiz committed Apr 6, 2023
1 parent 621823f commit cc6c20f
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 45 deletions.
10 changes: 10 additions & 0 deletions docs/usage/customization-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ re-labeling delay up to the sleep-interval of nfd-worker (1 minute by default).

See [Label rule format](#label-rule-format) for detailed description of
available fields and how to write labeling rules.

### NodeFeatureRule tainting feature

This feature is experimental.
Expand Down Expand Up @@ -209,6 +210,15 @@ spec:
In this example, if the `my sample taint rule` rule is matched, `feature.node.kubernetes.io/pci-0300_1d0f.present=true:NoExecute`
and `feature.node.kubernetes.io/cpu-cpuid.ADX:NoExecute` taints are set on the node.

There are some limitations to the namespace part (i.e. prefix/) of the taint
key:

- `kubernetes.io/` and its sub-namespaces (like `sub.ns.kubernetes.io/`) cannot
generally be used
- the only exception is `feature.node.kubernetes.io/` and its sub-namespaces
(like `sub.ns.feature.node.kubernetes.io`)
- unprefixed keys (like `foo`) keys are disallowed

## Local feature source

NFD-Worker has a special feature source named `local` which is an integration
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/nfd/v1alpha1/annotations_labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ const (
// ProfileLabelSubNsSuffix is the suffix for allowed profile label sub-namespaces.
ProfileLabelSubNsSuffix = "." + ProfileLabelNs

// TaintNs is the k8s.io namespace that can be used for NFD-managed taints.
TaintNs = "feature.node.kubernetes.io"

// TaintSubNsSuffix is the suffix for allowed sub-namespaces for NFD-managed taints.
TaintSubNsSuffix = "." + TaintNs

// AnnotationNs namespace for all NFD-related annotations.
AnnotationNs = "nfd.node.kubernetes.io"

Expand Down
24 changes: 23 additions & 1 deletion pkg/nfd-master/nfd-master.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,28 @@ func (m *nfdMaster) filterFeatureLabels(labels Labels) (Labels, ExtendedResource
return outLabels, extendedResources
}

func filterTaints(taints []corev1.Taint) []corev1.Taint {
outTaints := []corev1.Taint{}

for _, taint := range taints {
ns, _ := splitNs(taint.Key)

// Check prefix of the key, filter out disallowed ones
if ns == "" {
klog.Errorf("taint keys without namespace (prefix/) are not allowed. Ignoring taint %v", ns, taint)
continue
}
if ns != nfdv1alpha1.TaintNs && !strings.HasSuffix(ns, nfdv1alpha1.TaintSubNsSuffix) &&
(ns == "kubernetes.io" || strings.HasSuffix(ns, ".kubernetes.io")) {
klog.Errorf("Prefix %q is not allowed for taint key. Ignoring taint %v", ns, taint)
continue
}
outTaints = append(outTaints, taint)
}

return outTaints
}

func verifyNodeName(cert *x509.Certificate, nodeName string) error {
if cert.Subject.CommonName == nodeName {
return nil
Expand Down Expand Up @@ -656,7 +678,7 @@ func (m *nfdMaster) refreshNodeFeatures(cli *kubernetes.Clientset, nodeName stri

var taints []corev1.Taint
if m.config.EnableTaints {
taints = crTaints
taints = filterTaints(crTaints)
}

err := m.updateNodeObject(cli, nodeName, labels, annotations, extendedResources, taints)
Expand Down
13 changes: 10 additions & 3 deletions test/e2e/data/nodefeaturerule-3-updated.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,18 @@ spec:
- name: "e2e-taint-test-1"
taints:
- effect: PreferNoSchedule
key: "nfd.node.kubernetes.io/fake-special-node"
key: "feature.node.kubernetes.io/fake-special-node"
value: "exists"
- effect: NoExecute
key: "nfd.node.kubernetes.io/foo"
key: "feature.node.kubernetes.io/foo"
value: "true"
# The following taints should be filtered out by nfd-master
- effect: PreferNoSchedule
key: "kubernetes.io/denied-1"
- effect: PreferNoSchedule
key: "node.kubernetes.io/denied-2"
- effect: PreferNoSchedule
key: "unprefixed-taint"
matchFeatures:
- feature: "fake.attribute"
matchExpressions:
Expand All @@ -23,7 +30,7 @@ spec:
- name: "e2e-taint-test-2"
taints:
- effect: PreferNoSchedule
key: "nfd.node.kubernetes.io/fake-cpu"
key: "feature.node.kubernetes.io/fake-cpu"
value: "true"
matchFeatures:
- feature: "fake.attribute"
Expand Down
8 changes: 4 additions & 4 deletions test/e2e/data/nodefeaturerule-3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ spec:
- name: "e2e-taint-test-1"
taints:
- effect: PreferNoSchedule
key: "nfd.node.kubernetes.io/fake-special-node"
key: "feature.node.kubernetes.io/fake-special-node"
value: "exists"
- effect: NoExecute
key: "nfd.node.kubernetes.io/fake-dedicated-node"
key: "feature.node.kubernetes.io/fake-dedicated-node"
value: "true"
- effect: "NoExecute"
key: "nfd.node.kubernetes.io/performance-optimized-node"
key: "feature.node.kubernetes.io/performance-optimized-node"
value: "true"
matchFeatures:
- feature: "fake.attribute"
Expand All @@ -26,7 +26,7 @@ spec:
- name: "e2e-taint-test-2"
taints:
- effect: PreferNoSchedule
key: "nfd.node.kubernetes.io/fake-special-cpu"
key: "feature.node.kubernetes.io/fake-special-cpu"
value: "true"
matchFeatures:
- feature: "fake.attribute"
Expand Down
60 changes: 23 additions & 37 deletions test/e2e/node_feature_discovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ import (
testpod "sigs.k8s.io/node-feature-discovery/test/e2e/utils/pod"
)

const TestTaintNs = "nfd.node.kubernetes.io"

// cleanupNode deletes all NFD-related metadata from the Node object, i.e.
// labels and annotations
func cleanupNode(cs clientset.Interface) {
Expand Down Expand Up @@ -93,7 +91,7 @@ func cleanupNode(cs clientset.Interface) {

// Remove taints
for _, taint := range node.Spec.Taints {
if strings.HasPrefix(taint.Key, TestTaintNs) {
if strings.HasPrefix(taint.Key, nfdv1alpha1.TaintNs) {
newTaints, removed := taintutils.DeleteTaint(node.Spec.Taints, &taint)
if removed {
node.Spec.Taints = newTaints
Expand Down Expand Up @@ -671,22 +669,22 @@ var _ = SIGDescribe("NFD master and worker", func() {
Context("and nfd-worker and NodeFeatureRules objects deployed", func() {
testTolerations := []corev1.Toleration{
{
Key: "nfd.node.kubernetes.io/fake-special-node",
Key: "feature.node.kubernetes.io/fake-special-node",
Value: "exists",
Effect: "NoExecute",
},
{
Key: "nfd.node.kubernetes.io/fake-dedicated-node",
Key: "feature.node.kubernetes.io/fake-dedicated-node",
Value: "true",
Effect: "NoExecute",
},
{
Key: "nfd.node.kubernetes.io/performance-optimized-node",
Key: "feature.node.kubernetes.io/performance-optimized-node",
Value: "true",
Effect: "NoExecute",
},
{
Key: "nfd.node.kubernetes.io/foo",
Key: "feature.node.kubernetes.io/foo",
Value: "true",
Effect: "NoExecute",
},
Expand Down Expand Up @@ -766,45 +764,45 @@ core:
By("Verifying node taints and annotation from NodeFeatureRules #3")
expectedTaints := []corev1.Taint{
{
Key: "nfd.node.kubernetes.io/fake-special-node",
Key: "feature.node.kubernetes.io/fake-special-node",
Value: "exists",
Effect: "PreferNoSchedule",
},
{
Key: "nfd.node.kubernetes.io/fake-dedicated-node",
Key: "feature.node.kubernetes.io/fake-dedicated-node",
Value: "true",
Effect: "NoExecute",
},
{
Key: "nfd.node.kubernetes.io/performance-optimized-node",
Key: "feature.node.kubernetes.io/performance-optimized-node",
Value: "true",
Effect: "NoExecute",
},
}
expectedAnnotation := map[string]string{
"nfd.node.kubernetes.io/taints": "nfd.node.kubernetes.io/fake-special-node=exists:PreferNoSchedule,nfd.node.kubernetes.io/fake-dedicated-node=true:NoExecute,nfd.node.kubernetes.io/performance-optimized-node=true:NoExecute"}
Expect(waitForNfdNodeTaints(f.ClientSet, expectedTaints)).NotTo(HaveOccurred())
"nfd.node.kubernetes.io/taints": "feature.node.kubernetes.io/fake-special-node=exists:PreferNoSchedule,feature.node.kubernetes.io/fake-dedicated-node=true:NoExecute,feature.node.kubernetes.io/performance-optimized-node=true:NoExecute"}
Expect(waitForNfdNodeTaints(f.ClientSet, expectedTaints, nodes)).NotTo(HaveOccurred())
Expect(waitForNfdNodeAnnotations(f.ClientSet, expectedAnnotation)).NotTo(HaveOccurred())

By("Re-applying NodeFeatureRules #3 with updated taints")
Expect(testutils.UpdateNodeFeatureRulesFromFile(nfdClient, "nodefeaturerule-3-updated.yaml")).NotTo(HaveOccurred())
expectedTaintsUpdated := []corev1.Taint{
{
Key: "nfd.node.kubernetes.io/fake-special-node",
Key: "feature.node.kubernetes.io/fake-special-node",
Value: "exists",
Effect: "PreferNoSchedule",
},
{
Key: "nfd.node.kubernetes.io/foo",
Key: "feature.node.kubernetes.io/foo",
Value: "true",
Effect: "NoExecute",
},
}
expectedAnnotationUpdated := map[string]string{
"nfd.node.kubernetes.io/taints": "nfd.node.kubernetes.io/fake-special-node=exists:PreferNoSchedule,nfd.node.kubernetes.io/foo=true:NoExecute"}
"nfd.node.kubernetes.io/taints": "feature.node.kubernetes.io/fake-special-node=exists:PreferNoSchedule,feature.node.kubernetes.io/foo=true:NoExecute"}

By("Verifying updated node taints and annotation from NodeFeatureRules #3")
Expect(waitForNfdNodeTaints(f.ClientSet, expectedTaintsUpdated)).NotTo(HaveOccurred())
Expect(waitForNfdNodeTaints(f.ClientSet, expectedTaintsUpdated, nodes)).NotTo(HaveOccurred())
Expect(waitForNfdNodeAnnotations(f.ClientSet, expectedAnnotationUpdated)).NotTo(HaveOccurred())

By("Deleting nfd-worker daemonset")
Expand Down Expand Up @@ -942,7 +940,7 @@ func checkForNodeLabels(cli clientset.Interface, expectedNewLabels map[string]k8
}
}

oldLabels := getNodeLabels(oldNodes, node.Name)
oldLabels := getNode(oldNodes, node.Name).Labels
expectedNewLabels := maps.Clone(oldLabels)
maps.Copy(expectedNewLabels, nodeExpected)

Expand All @@ -965,17 +963,17 @@ func checkForNodeLabels(cli clientset.Interface, expectedNewLabels map[string]k8
}

// waitForNfdNodeTaints waits for node to be tainted as expected.
func waitForNfdNodeTaints(cli clientset.Interface, expected []corev1.Taint) error {
func waitForNfdNodeTaints(cli clientset.Interface, expectedNewTaints []corev1.Taint, oldNodes []corev1.Node) error {
poll := func() error {
nodes, err := getNonControlPlaneNodes(cli)
if err != nil {
return err
}
for _, node := range nodes {
taints := nfdTaints(node.Spec.Taints)
if err != nil {
return fmt.Errorf("failed to fetch nfd owned taints for node: %s", node.Name)
}
oldNode := getNode(oldNodes, node.Name)
expected := oldNode.Spec.DeepCopy().Taints
expected = append(expected, expectedNewTaints...)
taints := node.Spec.Taints
if !cmp.Equal(expected, taints) {
return fmt.Errorf("node %q taints do not match expected, diff (expected vs. received): %s", node.Name, cmp.Diff(expected, taints))
}
Expand All @@ -994,18 +992,6 @@ func waitForNfdNodeTaints(cli clientset.Interface, expected []corev1.Taint) erro
return err
}

// nfdTaints returns taints that are owned by the nfd.
func nfdTaints(taints []corev1.Taint) []corev1.Taint {
nfdTaints := []corev1.Taint{}
for _, taint := range taints {
if strings.HasPrefix(taint.Key, TestTaintNs) {
nfdTaints = append(nfdTaints, taint)
}
}

return nfdTaints
}

// getNonControlPlaneNodes gets the nodes that are not tainted for exclusive control-plane usage
func getNonControlPlaneNodes(cli clientset.Interface) ([]corev1.Node, error) {
nodeList, err := cli.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
Expand Down Expand Up @@ -1033,11 +1019,11 @@ func getNonControlPlaneNodes(cli clientset.Interface) ([]corev1.Node, error) {
return out, nil
}

func getNodeLabels(nodes []corev1.Node, nodeName string) map[string]string {
func getNode(nodes []corev1.Node, nodeName string) corev1.Node {
for _, node := range nodes {
if node.Name == nodeName {
return node.Labels
return node
}
}
return nil
return corev1.Node{}
}

0 comments on commit cc6c20f

Please sign in to comment.