From 35c141d5f55f82b391cc2dde52f1daf1ddf9b03f Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Wed, 17 Apr 2024 14:33:01 +0200 Subject: [PATCH] Add NodeFeatureGroup CRD The NodeFeatureGroup is an NFD-specific custom resource that is designed for grouping nodes based on their features. NFD-Master watches for NodeFeatureGroup objects in the cluster and updates the status of the NodeFeatureGroup object with the list of nodes that match the feature group rules. The NodeFeatureGroup rules follow the same syntax as the NodeFeatureRule rules. Signed-off-by: Carlos Eduardo Arango Gutierrez --- Dockerfile_generator | 2 - .../nfd/v1alpha1/fake/fake_nfd_client.go | 4 + .../v1alpha1/fake/fake_nodefeaturegroup.go | 141 ++++++++++ .../typed/nfd/v1alpha1/generated_expansion.go | 2 + .../typed/nfd/v1alpha1/nfd_client.go | 5 + .../typed/nfd/v1alpha1/nodefeaturegroup.go | 195 +++++++++++++ .../informers/externalversions/generic.go | 2 + .../nfd/v1alpha1/interface.go | 7 + .../nfd/v1alpha1/nodefeaturegroup.go | 90 ++++++ .../nfd/v1alpha1/expansion_generated.go | 8 + .../listers/nfd/v1alpha1/nodefeaturegroup.go | 99 +++++++ api/nfd/v1alpha1/register.go | 1 + api/nfd/v1alpha1/types.go | 49 ++++ api/nfd/v1alpha1/zz_generated.deepcopy.go | 135 +++++++++ cmd/nfd-master/main.go | 10 +- deployment/base/nfd-crds/nfd-api-crds.yaml | 257 ++++++++++++++++++ deployment/base/rbac/master-clusterrole.yaml | 8 + .../crds/nfd-api-crds.yaml | 257 ++++++++++++++++++ .../templates/clusterrole.yaml | 8 + .../helm/node-feature-discovery/values.yaml | 1 + docs/reference/feature-gates.md | 9 + docs/usage/custom-resources.md | 22 ++ docs/usage/customization-guide.md | 43 +++ examples/nodefeaturegroup.yaml | 11 + hack/update_codegen.sh | 4 + pkg/apis/nfd/nodefeaturerule/rule.go | 35 +++ pkg/features/features.go | 10 +- pkg/nfd-master/metrics.go | 23 +- pkg/nfd-master/nfd-api-controller.go | 93 +++++-- pkg/nfd-master/nfd-master-internal_test.go | 6 +- pkg/nfd-master/nfd-master.go | 203 ++++++++++++-- pkg/nfd-master/node-updater-pool.go | 131 --------- pkg/nfd-master/updater-pool.go | 194 +++++++++++++ ...ater-pool_test.go => updater-pool_test.go} | 50 ++-- pkg/nfd-worker/nfd-worker_test.go | 1 + test/e2e/data/nodefeaturegroup-1.yaml | 11 + test/e2e/node_feature_discovery_test.go | 85 ++++++ test/e2e/utils/crd.go | 34 +++ test/e2e/utils/rbac.go | 10 + 39 files changed, 2038 insertions(+), 218 deletions(-) create mode 100644 api/generated/clientset/versioned/typed/nfd/v1alpha1/fake/fake_nodefeaturegroup.go create mode 100644 api/generated/clientset/versioned/typed/nfd/v1alpha1/nodefeaturegroup.go create mode 100644 api/generated/informers/externalversions/nfd/v1alpha1/nodefeaturegroup.go create mode 100644 api/generated/listers/nfd/v1alpha1/nodefeaturegroup.go create mode 100644 examples/nodefeaturegroup.yaml delete mode 100644 pkg/nfd-master/node-updater-pool.go create mode 100644 pkg/nfd-master/updater-pool.go rename pkg/nfd-master/{node-updater-pool_test.go => updater-pool_test.go} (60%) create mode 100644 test/e2e/data/nodefeaturegroup-1.yaml diff --git a/Dockerfile_generator b/Dockerfile_generator index ffddd02876..a41ebbd9ad 100644 --- a/Dockerfile_generator +++ b/Dockerfile_generator @@ -4,8 +4,6 @@ FROM ${BUILDER_IMAGE} as builder # Install tools RUN go install github.com/vektra/mockery/v2@v2.42.0 && \ go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.14.0 && \ - git clone https://github.com/kubernetes/code-generator -b v0.29.0 --depth 1 && \ - go install k8s.io/code-generator/cmd/go-to-protobuf/...@v0.29.0 && \ go install golang.org/x/tools/cmd/goimports@v0.11.0 && \ go install github.com/golang/protobuf/protoc-gen-go@v1.4.3 diff --git a/api/generated/clientset/versioned/typed/nfd/v1alpha1/fake/fake_nfd_client.go b/api/generated/clientset/versioned/typed/nfd/v1alpha1/fake/fake_nfd_client.go index 1a6e91c48c..282a6e717b 100644 --- a/api/generated/clientset/versioned/typed/nfd/v1alpha1/fake/fake_nfd_client.go +++ b/api/generated/clientset/versioned/typed/nfd/v1alpha1/fake/fake_nfd_client.go @@ -32,6 +32,10 @@ func (c *FakeNfdV1alpha1) NodeFeatures(namespace string) v1alpha1.NodeFeatureInt return &FakeNodeFeatures{c, namespace} } +func (c *FakeNfdV1alpha1) NodeFeatureGroups(namespace string) v1alpha1.NodeFeatureGroupInterface { + return &FakeNodeFeatureGroups{c, namespace} +} + func (c *FakeNfdV1alpha1) NodeFeatureRules() v1alpha1.NodeFeatureRuleInterface { return &FakeNodeFeatureRules{c} } diff --git a/api/generated/clientset/versioned/typed/nfd/v1alpha1/fake/fake_nodefeaturegroup.go b/api/generated/clientset/versioned/typed/nfd/v1alpha1/fake/fake_nodefeaturegroup.go new file mode 100644 index 0000000000..c104bb7348 --- /dev/null +++ b/api/generated/clientset/versioned/typed/nfd/v1alpha1/fake/fake_nodefeaturegroup.go @@ -0,0 +1,141 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + "context" + + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + labels "k8s.io/apimachinery/pkg/labels" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + testing "k8s.io/client-go/testing" + v1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1" +) + +// FakeNodeFeatureGroups implements NodeFeatureGroupInterface +type FakeNodeFeatureGroups struct { + Fake *FakeNfdV1alpha1 + ns string +} + +var nodefeaturegroupsResource = v1alpha1.SchemeGroupVersion.WithResource("nodefeaturegroups") + +var nodefeaturegroupsKind = v1alpha1.SchemeGroupVersion.WithKind("NodeFeatureGroup") + +// Get takes name of the nodeFeatureGroup, and returns the corresponding nodeFeatureGroup object, and an error if there is any. +func (c *FakeNodeFeatureGroups) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.NodeFeatureGroup, err error) { + obj, err := c.Fake. + Invokes(testing.NewGetAction(nodefeaturegroupsResource, c.ns, name), &v1alpha1.NodeFeatureGroup{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.NodeFeatureGroup), err +} + +// List takes label and field selectors, and returns the list of NodeFeatureGroups that match those selectors. +func (c *FakeNodeFeatureGroups) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.NodeFeatureGroupList, err error) { + obj, err := c.Fake. + Invokes(testing.NewListAction(nodefeaturegroupsResource, nodefeaturegroupsKind, c.ns, opts), &v1alpha1.NodeFeatureGroupList{}) + + if obj == nil { + return nil, err + } + + label, _, _ := testing.ExtractFromListOptions(opts) + if label == nil { + label = labels.Everything() + } + list := &v1alpha1.NodeFeatureGroupList{ListMeta: obj.(*v1alpha1.NodeFeatureGroupList).ListMeta} + for _, item := range obj.(*v1alpha1.NodeFeatureGroupList).Items { + if label.Matches(labels.Set(item.Labels)) { + list.Items = append(list.Items, item) + } + } + return list, err +} + +// Watch returns a watch.Interface that watches the requested nodeFeatureGroups. +func (c *FakeNodeFeatureGroups) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { + return c.Fake. + InvokesWatch(testing.NewWatchAction(nodefeaturegroupsResource, c.ns, opts)) + +} + +// Create takes the representation of a nodeFeatureGroup and creates it. Returns the server's representation of the nodeFeatureGroup, and an error, if there is any. +func (c *FakeNodeFeatureGroups) Create(ctx context.Context, nodeFeatureGroup *v1alpha1.NodeFeatureGroup, opts v1.CreateOptions) (result *v1alpha1.NodeFeatureGroup, err error) { + obj, err := c.Fake. + Invokes(testing.NewCreateAction(nodefeaturegroupsResource, c.ns, nodeFeatureGroup), &v1alpha1.NodeFeatureGroup{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.NodeFeatureGroup), err +} + +// Update takes the representation of a nodeFeatureGroup and updates it. Returns the server's representation of the nodeFeatureGroup, and an error, if there is any. +func (c *FakeNodeFeatureGroups) Update(ctx context.Context, nodeFeatureGroup *v1alpha1.NodeFeatureGroup, opts v1.UpdateOptions) (result *v1alpha1.NodeFeatureGroup, err error) { + obj, err := c.Fake. + Invokes(testing.NewUpdateAction(nodefeaturegroupsResource, c.ns, nodeFeatureGroup), &v1alpha1.NodeFeatureGroup{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.NodeFeatureGroup), err +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *FakeNodeFeatureGroups) UpdateStatus(ctx context.Context, nodeFeatureGroup *v1alpha1.NodeFeatureGroup, opts v1.UpdateOptions) (*v1alpha1.NodeFeatureGroup, error) { + obj, err := c.Fake. + Invokes(testing.NewUpdateSubresourceAction(nodefeaturegroupsResource, "status", c.ns, nodeFeatureGroup), &v1alpha1.NodeFeatureGroup{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.NodeFeatureGroup), err +} + +// Delete takes name of the nodeFeatureGroup and deletes it. Returns an error if one occurs. +func (c *FakeNodeFeatureGroups) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { + _, err := c.Fake. + Invokes(testing.NewDeleteActionWithOptions(nodefeaturegroupsResource, c.ns, name, opts), &v1alpha1.NodeFeatureGroup{}) + + return err +} + +// DeleteCollection deletes a collection of objects. +func (c *FakeNodeFeatureGroups) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { + action := testing.NewDeleteCollectionAction(nodefeaturegroupsResource, c.ns, listOpts) + + _, err := c.Fake.Invokes(action, &v1alpha1.NodeFeatureGroupList{}) + return err +} + +// Patch applies the patch and returns the patched nodeFeatureGroup. +func (c *FakeNodeFeatureGroups) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.NodeFeatureGroup, err error) { + obj, err := c.Fake. + Invokes(testing.NewPatchSubresourceAction(nodefeaturegroupsResource, c.ns, name, pt, data, subresources...), &v1alpha1.NodeFeatureGroup{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.NodeFeatureGroup), err +} diff --git a/api/generated/clientset/versioned/typed/nfd/v1alpha1/generated_expansion.go b/api/generated/clientset/versioned/typed/nfd/v1alpha1/generated_expansion.go index 65ac79f6c3..02d3e35186 100644 --- a/api/generated/clientset/versioned/typed/nfd/v1alpha1/generated_expansion.go +++ b/api/generated/clientset/versioned/typed/nfd/v1alpha1/generated_expansion.go @@ -20,4 +20,6 @@ package v1alpha1 type NodeFeatureExpansion interface{} +type NodeFeatureGroupExpansion interface{} + type NodeFeatureRuleExpansion interface{} diff --git a/api/generated/clientset/versioned/typed/nfd/v1alpha1/nfd_client.go b/api/generated/clientset/versioned/typed/nfd/v1alpha1/nfd_client.go index 0a733e9747..37a6f340aa 100644 --- a/api/generated/clientset/versioned/typed/nfd/v1alpha1/nfd_client.go +++ b/api/generated/clientset/versioned/typed/nfd/v1alpha1/nfd_client.go @@ -29,6 +29,7 @@ import ( type NfdV1alpha1Interface interface { RESTClient() rest.Interface NodeFeaturesGetter + NodeFeatureGroupsGetter NodeFeatureRulesGetter } @@ -41,6 +42,10 @@ func (c *NfdV1alpha1Client) NodeFeatures(namespace string) NodeFeatureInterface return newNodeFeatures(c, namespace) } +func (c *NfdV1alpha1Client) NodeFeatureGroups(namespace string) NodeFeatureGroupInterface { + return newNodeFeatureGroups(c, namespace) +} + func (c *NfdV1alpha1Client) NodeFeatureRules() NodeFeatureRuleInterface { return newNodeFeatureRules(c) } diff --git a/api/generated/clientset/versioned/typed/nfd/v1alpha1/nodefeaturegroup.go b/api/generated/clientset/versioned/typed/nfd/v1alpha1/nodefeaturegroup.go new file mode 100644 index 0000000000..a988f3c8e9 --- /dev/null +++ b/api/generated/clientset/versioned/typed/nfd/v1alpha1/nodefeaturegroup.go @@ -0,0 +1,195 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "context" + "time" + + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + rest "k8s.io/client-go/rest" + scheme "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned/scheme" + v1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1" +) + +// NodeFeatureGroupsGetter has a method to return a NodeFeatureGroupInterface. +// A group's client should implement this interface. +type NodeFeatureGroupsGetter interface { + NodeFeatureGroups(namespace string) NodeFeatureGroupInterface +} + +// NodeFeatureGroupInterface has methods to work with NodeFeatureGroup resources. +type NodeFeatureGroupInterface interface { + Create(ctx context.Context, nodeFeatureGroup *v1alpha1.NodeFeatureGroup, opts v1.CreateOptions) (*v1alpha1.NodeFeatureGroup, error) + Update(ctx context.Context, nodeFeatureGroup *v1alpha1.NodeFeatureGroup, opts v1.UpdateOptions) (*v1alpha1.NodeFeatureGroup, error) + UpdateStatus(ctx context.Context, nodeFeatureGroup *v1alpha1.NodeFeatureGroup, opts v1.UpdateOptions) (*v1alpha1.NodeFeatureGroup, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*v1alpha1.NodeFeatureGroup, error) + List(ctx context.Context, opts v1.ListOptions) (*v1alpha1.NodeFeatureGroupList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.NodeFeatureGroup, err error) + NodeFeatureGroupExpansion +} + +// nodeFeatureGroups implements NodeFeatureGroupInterface +type nodeFeatureGroups struct { + client rest.Interface + ns string +} + +// newNodeFeatureGroups returns a NodeFeatureGroups +func newNodeFeatureGroups(c *NfdV1alpha1Client, namespace string) *nodeFeatureGroups { + return &nodeFeatureGroups{ + client: c.RESTClient(), + ns: namespace, + } +} + +// Get takes name of the nodeFeatureGroup, and returns the corresponding nodeFeatureGroup object, and an error if there is any. +func (c *nodeFeatureGroups) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.NodeFeatureGroup, err error) { + result = &v1alpha1.NodeFeatureGroup{} + err = c.client.Get(). + Namespace(c.ns). + Resource("nodefeaturegroups"). + Name(name). + VersionedParams(&options, scheme.ParameterCodec). + Do(ctx). + Into(result) + return +} + +// List takes label and field selectors, and returns the list of NodeFeatureGroups that match those selectors. +func (c *nodeFeatureGroups) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.NodeFeatureGroupList, err error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + result = &v1alpha1.NodeFeatureGroupList{} + err = c.client.Get(). + Namespace(c.ns). + Resource("nodefeaturegroups"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Do(ctx). + Into(result) + return +} + +// Watch returns a watch.Interface that watches the requested nodeFeatureGroups. +func (c *nodeFeatureGroups) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + opts.Watch = true + return c.client.Get(). + Namespace(c.ns). + Resource("nodefeaturegroups"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Watch(ctx) +} + +// Create takes the representation of a nodeFeatureGroup and creates it. Returns the server's representation of the nodeFeatureGroup, and an error, if there is any. +func (c *nodeFeatureGroups) Create(ctx context.Context, nodeFeatureGroup *v1alpha1.NodeFeatureGroup, opts v1.CreateOptions) (result *v1alpha1.NodeFeatureGroup, err error) { + result = &v1alpha1.NodeFeatureGroup{} + err = c.client.Post(). + Namespace(c.ns). + Resource("nodefeaturegroups"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(nodeFeatureGroup). + Do(ctx). + Into(result) + return +} + +// Update takes the representation of a nodeFeatureGroup and updates it. Returns the server's representation of the nodeFeatureGroup, and an error, if there is any. +func (c *nodeFeatureGroups) Update(ctx context.Context, nodeFeatureGroup *v1alpha1.NodeFeatureGroup, opts v1.UpdateOptions) (result *v1alpha1.NodeFeatureGroup, err error) { + result = &v1alpha1.NodeFeatureGroup{} + err = c.client.Put(). + Namespace(c.ns). + Resource("nodefeaturegroups"). + Name(nodeFeatureGroup.Name). + VersionedParams(&opts, scheme.ParameterCodec). + Body(nodeFeatureGroup). + Do(ctx). + Into(result) + return +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *nodeFeatureGroups) UpdateStatus(ctx context.Context, nodeFeatureGroup *v1alpha1.NodeFeatureGroup, opts v1.UpdateOptions) (result *v1alpha1.NodeFeatureGroup, err error) { + result = &v1alpha1.NodeFeatureGroup{} + err = c.client.Put(). + Namespace(c.ns). + Resource("nodefeaturegroups"). + Name(nodeFeatureGroup.Name). + SubResource("status"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(nodeFeatureGroup). + Do(ctx). + Into(result) + return +} + +// Delete takes name of the nodeFeatureGroup and deletes it. Returns an error if one occurs. +func (c *nodeFeatureGroups) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { + return c.client.Delete(). + Namespace(c.ns). + Resource("nodefeaturegroups"). + Name(name). + Body(&opts). + Do(ctx). + Error() +} + +// DeleteCollection deletes a collection of objects. +func (c *nodeFeatureGroups) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { + var timeout time.Duration + if listOpts.TimeoutSeconds != nil { + timeout = time.Duration(*listOpts.TimeoutSeconds) * time.Second + } + return c.client.Delete(). + Namespace(c.ns). + Resource("nodefeaturegroups"). + VersionedParams(&listOpts, scheme.ParameterCodec). + Timeout(timeout). + Body(&opts). + Do(ctx). + Error() +} + +// Patch applies the patch and returns the patched nodeFeatureGroup. +func (c *nodeFeatureGroups) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.NodeFeatureGroup, err error) { + result = &v1alpha1.NodeFeatureGroup{} + err = c.client.Patch(pt). + Namespace(c.ns). + Resource("nodefeaturegroups"). + Name(name). + SubResource(subresources...). + VersionedParams(&opts, scheme.ParameterCodec). + Body(data). + Do(ctx). + Into(result) + return +} diff --git a/api/generated/informers/externalversions/generic.go b/api/generated/informers/externalversions/generic.go index ca1ed50a58..86577fff52 100644 --- a/api/generated/informers/externalversions/generic.go +++ b/api/generated/informers/externalversions/generic.go @@ -55,6 +55,8 @@ func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource // Group=nfd.k8s-sigs.io, Version=v1alpha1 case v1alpha1.SchemeGroupVersion.WithResource("nodefeatures"): return &genericInformer{resource: resource.GroupResource(), informer: f.Nfd().V1alpha1().NodeFeatures().Informer()}, nil + case v1alpha1.SchemeGroupVersion.WithResource("nodefeaturegroups"): + return &genericInformer{resource: resource.GroupResource(), informer: f.Nfd().V1alpha1().NodeFeatureGroups().Informer()}, nil case v1alpha1.SchemeGroupVersion.WithResource("nodefeaturerules"): return &genericInformer{resource: resource.GroupResource(), informer: f.Nfd().V1alpha1().NodeFeatureRules().Informer()}, nil diff --git a/api/generated/informers/externalversions/nfd/v1alpha1/interface.go b/api/generated/informers/externalversions/nfd/v1alpha1/interface.go index d92f42ab00..ea861a472f 100644 --- a/api/generated/informers/externalversions/nfd/v1alpha1/interface.go +++ b/api/generated/informers/externalversions/nfd/v1alpha1/interface.go @@ -26,6 +26,8 @@ import ( type Interface interface { // NodeFeatures returns a NodeFeatureInformer. NodeFeatures() NodeFeatureInformer + // NodeFeatureGroups returns a NodeFeatureGroupInformer. + NodeFeatureGroups() NodeFeatureGroupInformer // NodeFeatureRules returns a NodeFeatureRuleInformer. NodeFeatureRules() NodeFeatureRuleInformer } @@ -46,6 +48,11 @@ func (v *version) NodeFeatures() NodeFeatureInformer { return &nodeFeatureInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} } +// NodeFeatureGroups returns a NodeFeatureGroupInformer. +func (v *version) NodeFeatureGroups() NodeFeatureGroupInformer { + return &nodeFeatureGroupInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} +} + // NodeFeatureRules returns a NodeFeatureRuleInformer. func (v *version) NodeFeatureRules() NodeFeatureRuleInformer { return &nodeFeatureRuleInformer{factory: v.factory, tweakListOptions: v.tweakListOptions} diff --git a/api/generated/informers/externalversions/nfd/v1alpha1/nodefeaturegroup.go b/api/generated/informers/externalversions/nfd/v1alpha1/nodefeaturegroup.go new file mode 100644 index 0000000000..deebcae1f4 --- /dev/null +++ b/api/generated/informers/externalversions/nfd/v1alpha1/nodefeaturegroup.go @@ -0,0 +1,90 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by informer-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "context" + time "time" + + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" + versioned "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned" + internalinterfaces "sigs.k8s.io/node-feature-discovery/api/generated/informers/externalversions/internalinterfaces" + v1alpha1 "sigs.k8s.io/node-feature-discovery/api/generated/listers/nfd/v1alpha1" + nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1" +) + +// NodeFeatureGroupInformer provides access to a shared informer and lister for +// NodeFeatureGroups. +type NodeFeatureGroupInformer interface { + Informer() cache.SharedIndexInformer + Lister() v1alpha1.NodeFeatureGroupLister +} + +type nodeFeatureGroupInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc + namespace string +} + +// NewNodeFeatureGroupInformer constructs a new informer for NodeFeatureGroup type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewNodeFeatureGroupInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredNodeFeatureGroupInformer(client, namespace, resyncPeriod, indexers, nil) +} + +// NewFilteredNodeFeatureGroupInformer constructs a new informer for NodeFeatureGroup type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredNodeFeatureGroupInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.NfdV1alpha1().NodeFeatureGroups(namespace).List(context.TODO(), options) + }, + WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.NfdV1alpha1().NodeFeatureGroups(namespace).Watch(context.TODO(), options) + }, + }, + &nfdv1alpha1.NodeFeatureGroup{}, + resyncPeriod, + indexers, + ) +} + +func (f *nodeFeatureGroupInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredNodeFeatureGroupInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *nodeFeatureGroupInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&nfdv1alpha1.NodeFeatureGroup{}, f.defaultInformer) +} + +func (f *nodeFeatureGroupInformer) Lister() v1alpha1.NodeFeatureGroupLister { + return v1alpha1.NewNodeFeatureGroupLister(f.Informer().GetIndexer()) +} diff --git a/api/generated/listers/nfd/v1alpha1/expansion_generated.go b/api/generated/listers/nfd/v1alpha1/expansion_generated.go index 8236e8f4e1..5890048ee1 100644 --- a/api/generated/listers/nfd/v1alpha1/expansion_generated.go +++ b/api/generated/listers/nfd/v1alpha1/expansion_generated.go @@ -26,6 +26,14 @@ type NodeFeatureListerExpansion interface{} // NodeFeatureNamespaceLister. type NodeFeatureNamespaceListerExpansion interface{} +// NodeFeatureGroupListerExpansion allows custom methods to be added to +// NodeFeatureGroupLister. +type NodeFeatureGroupListerExpansion interface{} + +// NodeFeatureGroupNamespaceListerExpansion allows custom methods to be added to +// NodeFeatureGroupNamespaceLister. +type NodeFeatureGroupNamespaceListerExpansion interface{} + // NodeFeatureRuleListerExpansion allows custom methods to be added to // NodeFeatureRuleLister. type NodeFeatureRuleListerExpansion interface{} diff --git a/api/generated/listers/nfd/v1alpha1/nodefeaturegroup.go b/api/generated/listers/nfd/v1alpha1/nodefeaturegroup.go new file mode 100644 index 0000000000..48d87107aa --- /dev/null +++ b/api/generated/listers/nfd/v1alpha1/nodefeaturegroup.go @@ -0,0 +1,99 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by lister-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/tools/cache" + v1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1" +) + +// NodeFeatureGroupLister helps list NodeFeatureGroups. +// All objects returned here must be treated as read-only. +type NodeFeatureGroupLister interface { + // List lists all NodeFeatureGroups in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*v1alpha1.NodeFeatureGroup, err error) + // NodeFeatureGroups returns an object that can list and get NodeFeatureGroups. + NodeFeatureGroups(namespace string) NodeFeatureGroupNamespaceLister + NodeFeatureGroupListerExpansion +} + +// nodeFeatureGroupLister implements the NodeFeatureGroupLister interface. +type nodeFeatureGroupLister struct { + indexer cache.Indexer +} + +// NewNodeFeatureGroupLister returns a new NodeFeatureGroupLister. +func NewNodeFeatureGroupLister(indexer cache.Indexer) NodeFeatureGroupLister { + return &nodeFeatureGroupLister{indexer: indexer} +} + +// List lists all NodeFeatureGroups in the indexer. +func (s *nodeFeatureGroupLister) List(selector labels.Selector) (ret []*v1alpha1.NodeFeatureGroup, err error) { + err = cache.ListAll(s.indexer, selector, func(m interface{}) { + ret = append(ret, m.(*v1alpha1.NodeFeatureGroup)) + }) + return ret, err +} + +// NodeFeatureGroups returns an object that can list and get NodeFeatureGroups. +func (s *nodeFeatureGroupLister) NodeFeatureGroups(namespace string) NodeFeatureGroupNamespaceLister { + return nodeFeatureGroupNamespaceLister{indexer: s.indexer, namespace: namespace} +} + +// NodeFeatureGroupNamespaceLister helps list and get NodeFeatureGroups. +// All objects returned here must be treated as read-only. +type NodeFeatureGroupNamespaceLister interface { + // List lists all NodeFeatureGroups in the indexer for a given namespace. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*v1alpha1.NodeFeatureGroup, err error) + // Get retrieves the NodeFeatureGroup from the indexer for a given namespace and name. + // Objects returned here must be treated as read-only. + Get(name string) (*v1alpha1.NodeFeatureGroup, error) + NodeFeatureGroupNamespaceListerExpansion +} + +// nodeFeatureGroupNamespaceLister implements the NodeFeatureGroupNamespaceLister +// interface. +type nodeFeatureGroupNamespaceLister struct { + indexer cache.Indexer + namespace string +} + +// List lists all NodeFeatureGroups in the indexer for a given namespace. +func (s nodeFeatureGroupNamespaceLister) List(selector labels.Selector) (ret []*v1alpha1.NodeFeatureGroup, err error) { + err = cache.ListAllByNamespace(s.indexer, s.namespace, selector, func(m interface{}) { + ret = append(ret, m.(*v1alpha1.NodeFeatureGroup)) + }) + return ret, err +} + +// Get retrieves the NodeFeatureGroup from the indexer for a given namespace and name. +func (s nodeFeatureGroupNamespaceLister) Get(name string) (*v1alpha1.NodeFeatureGroup, error) { + obj, exists, err := s.indexer.GetByKey(s.namespace + "/" + name) + if err != nil { + return nil, err + } + if !exists { + return nil, errors.NewNotFound(v1alpha1.Resource("nodefeaturegroup"), name) + } + return obj.(*v1alpha1.NodeFeatureGroup), nil +} diff --git a/api/nfd/v1alpha1/register.go b/api/nfd/v1alpha1/register.go index 8c1d41f5b6..9ccfbba25c 100644 --- a/api/nfd/v1alpha1/register.go +++ b/api/nfd/v1alpha1/register.go @@ -42,6 +42,7 @@ func addKnownTypes(scheme *runtime.Scheme) error { scheme.AddKnownTypes(SchemeGroupVersion, &NodeFeature{}, &NodeFeatureRule{}, + &NodeFeatureGroup{}, ) metav1.AddToGroupVersion(scheme, SchemeGroupVersion) return nil diff --git a/api/nfd/v1alpha1/types.go b/api/nfd/v1alpha1/types.go index 66122ba97d..7f0a297086 100644 --- a/api/nfd/v1alpha1/types.go +++ b/api/nfd/v1alpha1/types.go @@ -131,6 +131,55 @@ type NodeFeatureRuleSpec struct { Rules []Rule `json:"rules"` } +// NodeFeatureGroup resource holds Node pools by featureGroup +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Namespaced,shortName=nfg +// +kubebuilder:subresource:status +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +genclient +type NodeFeatureGroup struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec NodeFeatureGroupSpec `json:"spec"` + Status NodeFeatureGroupStatus `json:"status,omitempty"` +} + +// NodeFeatureGroupSpec describes a NodeFeatureGroup object. +type NodeFeatureGroupSpec struct { + Rules []GroupRule `json:"featureGroupRules"` +} + +type NodeFeatureGroupStatus struct { + // Nodes is a list of nodes in the cluster that match the featureGroupRules + // +optional + Nodes []string `json:"nodes"` +} + +// NodeFeatureGroupList contains a list of NodeFeatureGroup objects. +// +kubebuilder:object:root=true +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type NodeFeatureGroupList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata"` + + Items []NodeFeatureGroup `json:"spec"` +} + +// GroupRule defines a rule for nodegroup filtering. +type GroupRule struct { + // Name of the rule. + Name string `json:"name"` + + // MatchFeatures specifies a set of matcher terms all of which must match. + // +optional + MatchFeatures FeatureMatcher `json:"matchFeatures"` + + // MatchAny specifies a list of matchers one of which must match. + // +optional + MatchAny []MatchAnyElem `json:"matchAny"` +} + // Rule defines a rule for node customization such as labeling. type Rule struct { // Name of the rule. diff --git a/api/nfd/v1alpha1/zz_generated.deepcopy.go b/api/nfd/v1alpha1/zz_generated.deepcopy.go index 94748d2ad2..f1f9e13241 100644 --- a/api/nfd/v1alpha1/zz_generated.deepcopy.go +++ b/api/nfd/v1alpha1/zz_generated.deepcopy.go @@ -171,6 +171,36 @@ func (in *FlagFeatureSet) DeepCopy() *FlagFeatureSet { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GroupRule) DeepCopyInto(out *GroupRule) { + *out = *in + if in.MatchFeatures != nil { + in, out := &in.MatchFeatures, &out.MatchFeatures + *out = make(FeatureMatcher, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.MatchAny != nil { + in, out := &in.MatchAny, &out.MatchAny + *out = make([]MatchAnyElem, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GroupRule. +func (in *GroupRule) DeepCopy() *GroupRule { + if in == nil { + return nil + } + out := new(GroupRule) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InstanceFeature) DeepCopyInto(out *InstanceFeature) { *out = *in @@ -354,6 +384,111 @@ func (in *NodeFeature) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeFeatureGroup) DeepCopyInto(out *NodeFeatureGroup) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeFeatureGroup. +func (in *NodeFeatureGroup) DeepCopy() *NodeFeatureGroup { + if in == nil { + return nil + } + out := new(NodeFeatureGroup) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NodeFeatureGroup) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeFeatureGroupList) DeepCopyInto(out *NodeFeatureGroupList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]NodeFeatureGroup, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeFeatureGroupList. +func (in *NodeFeatureGroupList) DeepCopy() *NodeFeatureGroupList { + if in == nil { + return nil + } + out := new(NodeFeatureGroupList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NodeFeatureGroupList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeFeatureGroupSpec) DeepCopyInto(out *NodeFeatureGroupSpec) { + *out = *in + if in.Rules != nil { + in, out := &in.Rules, &out.Rules + *out = make([]GroupRule, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeFeatureGroupSpec. +func (in *NodeFeatureGroupSpec) DeepCopy() *NodeFeatureGroupSpec { + if in == nil { + return nil + } + out := new(NodeFeatureGroupSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeFeatureGroupStatus) DeepCopyInto(out *NodeFeatureGroupStatus) { + *out = *in + if in.Nodes != nil { + in, out := &in.Nodes, &out.Nodes + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeFeatureGroupStatus. +func (in *NodeFeatureGroupStatus) DeepCopy() *NodeFeatureGroupStatus { + if in == nil { + return nil + } + out := new(NodeFeatureGroupStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeFeatureList) DeepCopyInto(out *NodeFeatureList) { *out = *in diff --git a/cmd/nfd-master/main.go b/cmd/nfd-master/main.go index a51309b4a3..f85d2ae44a 100644 --- a/cmd/nfd-master/main.go +++ b/cmd/nfd-master/main.go @@ -110,8 +110,16 @@ func main() { utils.ConfigureGrpcKlog() // Get new NfdMaster instance + kubeconfig, err := utils.GetKubeconfig(args.Kubeconfig) + if err != nil { + klog.ErrorS(err, "failed to get kubeconfig") + os.Exit(1) + } args.GrpcHealthPort = GrpcHealthPort - instance, err := master.NewNfdMaster(master.WithArgs(args)) + instance, err := master.NewNfdMaster( + master.WithArgs(args), + master.WithKubeConfig(kubeconfig), + ) if err != nil { klog.ErrorS(err, "failed to initialize NfdMaster instance") os.Exit(1) diff --git a/deployment/base/nfd-crds/nfd-api-crds.yaml b/deployment/base/nfd-crds/nfd-api-crds.yaml index f0a24e725e..13cbfe4832 100644 --- a/deployment/base/nfd-crds/nfd-api-crds.yaml +++ b/deployment/base/nfd-crds/nfd-api-crds.yaml @@ -117,6 +117,263 @@ spec: --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: nodefeaturegroups.nfd.k8s-sigs.io +spec: + group: nfd.k8s-sigs.io + names: + kind: NodeFeatureGroup + listKind: NodeFeatureGroupList + plural: nodefeaturegroups + shortNames: + - nfg + singular: nodefeaturegroup + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: NodeFeatureGroup resource holds Node pools by featureGroup + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: NodeFeatureGroupSpec describes a NodeFeatureGroup object. + properties: + featureGroupRules: + items: + description: GroupRule defines a rule for nodegroup filtering. + properties: + matchAny: + description: MatchAny specifies a list of matchers one of which + must match. + items: + description: MatchAnyElem specifies one sub-matcher of MatchAny. + properties: + matchFeatures: + description: MatchFeatures specifies a set of matcher + terms all of which must match. + items: + description: |- + FeatureMatcherTerm defines requirements against one feature set. All + requirements (specified as MatchExpressions) are evaluated against each + element in the feature set. + properties: + feature: + description: Feature is the name of the feature + set to match against. + type: string + matchExpressions: + additionalProperties: + description: |- + MatchExpression specifies an expression to evaluate against a set of input + values. It contains an operator that is applied when matching the input and + an array of values that the operator evaluates the input against. + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: |- + Value is the list of values that the operand evaluates the input + against. Value should be empty if the operator is Exists, DoesNotExist, + IsTrue or IsFalse. Value should contain exactly one element if the + operator is Gt or Lt and exactly two elements if the operator is GtLt. + In other cases Value should contain at least one element. + items: + type: string + type: array + required: + - op + type: object + description: |- + MatchExpressions is the set of per-element expressions evaluated. These + match against the value of the specified elements. + type: object + matchName: + description: |- + MatchName in an expression that is matched against the name of each + element in the feature set. + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: |- + Value is the list of values that the operand evaluates the input + against. Value should be empty if the operator is Exists, DoesNotExist, + IsTrue or IsFalse. Value should contain exactly one element if the + operator is Gt or Lt and exactly two elements if the operator is GtLt. + In other cases Value should contain at least one element. + items: + type: string + type: array + required: + - op + type: object + required: + - feature + type: object + type: array + required: + - matchFeatures + type: object + type: array + matchFeatures: + description: MatchFeatures specifies a set of matcher terms + all of which must match. + items: + description: |- + FeatureMatcherTerm defines requirements against one feature set. All + requirements (specified as MatchExpressions) are evaluated against each + element in the feature set. + properties: + feature: + description: Feature is the name of the feature set to + match against. + type: string + matchExpressions: + additionalProperties: + description: |- + MatchExpression specifies an expression to evaluate against a set of input + values. It contains an operator that is applied when matching the input and + an array of values that the operator evaluates the input against. + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: |- + Value is the list of values that the operand evaluates the input + against. Value should be empty if the operator is Exists, DoesNotExist, + IsTrue or IsFalse. Value should contain exactly one element if the + operator is Gt or Lt and exactly two elements if the operator is GtLt. + In other cases Value should contain at least one element. + items: + type: string + type: array + required: + - op + type: object + description: |- + MatchExpressions is the set of per-element expressions evaluated. These + match against the value of the specified elements. + type: object + matchName: + description: |- + MatchName in an expression that is matched against the name of each + element in the feature set. + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: |- + Value is the list of values that the operand evaluates the input + against. Value should be empty if the operator is Exists, DoesNotExist, + IsTrue or IsFalse. Value should contain exactly one element if the + operator is Gt or Lt and exactly two elements if the operator is GtLt. + In other cases Value should contain at least one element. + items: + type: string + type: array + required: + - op + type: object + required: + - feature + type: object + type: array + name: + description: Name of the rule. + type: string + required: + - name + type: object + type: array + required: + - featureGroupRules + type: object + status: + properties: + nodes: + description: Nodes is a list of nodes in the cluster that match the + featureGroupRules + items: + type: string + type: array + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition metadata: annotations: controller-gen.kubebuilder.io/version: v0.14.0 diff --git a/deployment/base/rbac/master-clusterrole.yaml b/deployment/base/rbac/master-clusterrole.yaml index e61b4dc065..529f87e383 100644 --- a/deployment/base/rbac/master-clusterrole.yaml +++ b/deployment/base/rbac/master-clusterrole.yaml @@ -18,10 +18,18 @@ rules: resources: - nodefeatures - nodefeaturerules + - nodefeaturegroups verbs: - get - list - watch +- apiGroups: + - nfd.k8s-sigs.io + resources: + - nodefeaturegroup/status + verbs: + - patch + - update - apiGroups: - coordination.k8s.io resources: diff --git a/deployment/helm/node-feature-discovery/crds/nfd-api-crds.yaml b/deployment/helm/node-feature-discovery/crds/nfd-api-crds.yaml index f0a24e725e..13cbfe4832 100644 --- a/deployment/helm/node-feature-discovery/crds/nfd-api-crds.yaml +++ b/deployment/helm/node-feature-discovery/crds/nfd-api-crds.yaml @@ -117,6 +117,263 @@ spec: --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: nodefeaturegroups.nfd.k8s-sigs.io +spec: + group: nfd.k8s-sigs.io + names: + kind: NodeFeatureGroup + listKind: NodeFeatureGroupList + plural: nodefeaturegroups + shortNames: + - nfg + singular: nodefeaturegroup + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: NodeFeatureGroup resource holds Node pools by featureGroup + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: NodeFeatureGroupSpec describes a NodeFeatureGroup object. + properties: + featureGroupRules: + items: + description: GroupRule defines a rule for nodegroup filtering. + properties: + matchAny: + description: MatchAny specifies a list of matchers one of which + must match. + items: + description: MatchAnyElem specifies one sub-matcher of MatchAny. + properties: + matchFeatures: + description: MatchFeatures specifies a set of matcher + terms all of which must match. + items: + description: |- + FeatureMatcherTerm defines requirements against one feature set. All + requirements (specified as MatchExpressions) are evaluated against each + element in the feature set. + properties: + feature: + description: Feature is the name of the feature + set to match against. + type: string + matchExpressions: + additionalProperties: + description: |- + MatchExpression specifies an expression to evaluate against a set of input + values. It contains an operator that is applied when matching the input and + an array of values that the operator evaluates the input against. + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: |- + Value is the list of values that the operand evaluates the input + against. Value should be empty if the operator is Exists, DoesNotExist, + IsTrue or IsFalse. Value should contain exactly one element if the + operator is Gt or Lt and exactly two elements if the operator is GtLt. + In other cases Value should contain at least one element. + items: + type: string + type: array + required: + - op + type: object + description: |- + MatchExpressions is the set of per-element expressions evaluated. These + match against the value of the specified elements. + type: object + matchName: + description: |- + MatchName in an expression that is matched against the name of each + element in the feature set. + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: |- + Value is the list of values that the operand evaluates the input + against. Value should be empty if the operator is Exists, DoesNotExist, + IsTrue or IsFalse. Value should contain exactly one element if the + operator is Gt or Lt and exactly two elements if the operator is GtLt. + In other cases Value should contain at least one element. + items: + type: string + type: array + required: + - op + type: object + required: + - feature + type: object + type: array + required: + - matchFeatures + type: object + type: array + matchFeatures: + description: MatchFeatures specifies a set of matcher terms + all of which must match. + items: + description: |- + FeatureMatcherTerm defines requirements against one feature set. All + requirements (specified as MatchExpressions) are evaluated against each + element in the feature set. + properties: + feature: + description: Feature is the name of the feature set to + match against. + type: string + matchExpressions: + additionalProperties: + description: |- + MatchExpression specifies an expression to evaluate against a set of input + values. It contains an operator that is applied when matching the input and + an array of values that the operator evaluates the input against. + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: |- + Value is the list of values that the operand evaluates the input + against. Value should be empty if the operator is Exists, DoesNotExist, + IsTrue or IsFalse. Value should contain exactly one element if the + operator is Gt or Lt and exactly two elements if the operator is GtLt. + In other cases Value should contain at least one element. + items: + type: string + type: array + required: + - op + type: object + description: |- + MatchExpressions is the set of per-element expressions evaluated. These + match against the value of the specified elements. + type: object + matchName: + description: |- + MatchName in an expression that is matched against the name of each + element in the feature set. + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: |- + Value is the list of values that the operand evaluates the input + against. Value should be empty if the operator is Exists, DoesNotExist, + IsTrue or IsFalse. Value should contain exactly one element if the + operator is Gt or Lt and exactly two elements if the operator is GtLt. + In other cases Value should contain at least one element. + items: + type: string + type: array + required: + - op + type: object + required: + - feature + type: object + type: array + name: + description: Name of the rule. + type: string + required: + - name + type: object + type: array + required: + - featureGroupRules + type: object + status: + properties: + nodes: + description: Nodes is a list of nodes in the cluster that match the + featureGroupRules + items: + type: string + type: array + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition metadata: annotations: controller-gen.kubebuilder.io/version: v0.14.0 diff --git a/deployment/helm/node-feature-discovery/templates/clusterrole.yaml b/deployment/helm/node-feature-discovery/templates/clusterrole.yaml index c6adcb543a..f935cfe415 100644 --- a/deployment/helm/node-feature-discovery/templates/clusterrole.yaml +++ b/deployment/helm/node-feature-discovery/templates/clusterrole.yaml @@ -21,10 +21,18 @@ rules: resources: - nodefeatures - nodefeaturerules + - nodefeaturegroups verbs: - get - list - watch +- apiGroups: + - nfd.k8s-sigs.io + resources: + - nodefeaturegroups/status + verbs: + - patch + - update - apiGroups: - coordination.k8s.io resources: diff --git a/deployment/helm/node-feature-discovery/values.yaml b/deployment/helm/node-feature-discovery/values.yaml index e465a9e800..5516ec3a91 100644 --- a/deployment/helm/node-feature-discovery/values.yaml +++ b/deployment/helm/node-feature-discovery/values.yaml @@ -14,6 +14,7 @@ enableNodeFeatureApi: true featureGates: NodeFeatureAPI: true + NodeFeatureGroupAPI: false priorityClassName: "" diff --git a/docs/reference/feature-gates.md b/docs/reference/feature-gates.md index d3e6ac18b6..ebb5997c3a 100644 --- a/docs/reference/feature-gates.md +++ b/docs/reference/feature-gates.md @@ -18,6 +18,7 @@ The feature gates are set using the `-feature-gates` command line flag or | --------------------- | ------- | ------ | ------- | ------ | | `NodeFeatureAPI` | true | Beta | V0.14 | | | `DisableAutoPrefix` | false | Alpha | V0.16 | | +| `NodeFeatureGroupAPI` | false | Alpha | V0.16 | | ## NodeFeatureAPI @@ -27,6 +28,14 @@ server. The Node Feature API is used to expose node-specific hardware and software features to the Kubernetes scheduler. The Node Feature API is a beta feature and is enabled by default. +## NodeFeatureGroupAPI + +The `NodeFeatureGroupAPI` feature gate enables the Node Feature Group API. +When enabled, NFD will register the Node Feature Group API with the Kubernetes API +server. The Node Feature Group API is used to create node groups based on +hardware and software features. The Node Feature Group API is an alpha feature +and is disabled by default. + ## DisableAutoPrefix The `DisableAutoPrefix` feature gate controls the automatic prefixing of names. diff --git a/docs/usage/custom-resources.md b/docs/usage/custom-resources.md index f21b7a534e..68e1b306e7 100644 --- a/docs/usage/custom-resources.md +++ b/docs/usage/custom-resources.md @@ -51,6 +51,28 @@ spec: vendor-xpu-present: "true" ``` +## NodeFeatureGroup + +NodeFeatureGroup is an NFD-specific custom resource that is designed for +grouping nodes based on their features. NFD-Master watches for NodeFeatureGroup +objects in the cluster and updates the status of the NodeFeatureGroup object +with the list of nodes that match the feature group rules. The NodeFeatureGroup +rules follow the same syntax as the NodeFeatureRule rules. + +```yaml +apiVersion: nfd.k8s-sigs.io/v1alpha1 +kind: NodeFeatureGroup +metadata: + name: node-feature-group-1 +spec: + featureGroupRules: + - name: "e2e-attribute-test-1" + matchFeatures: + - feature: kernel.version + matchExpressions: + major: {op: Exists} +``` + ## NodeFeatureRule NodeFeatureRule is an NFD-specific custom resource that is designed for diff --git a/docs/usage/customization-guide.md b/docs/usage/customization-guide.md index 35cc835399..49549f3b95 100644 --- a/docs/usage/customization-guide.md +++ b/docs/usage/customization-guide.md @@ -187,6 +187,49 @@ to specify taints in the NodeFeatureRule object. > not tolerate the taint are evicted immediately from the node including the > nfd-worker pod. +## NodeFeatureGroup custom resource + +`NodeFeatureGroup` objects provide a way to create node groups that share the +same set of features. The `NodeFeatureGroup` object spec consists of a list of +`NodeFeatureRule` that follow the same format as the `NodeFeatureRule`, +but the difference in this case is that nodes that match any of the rules in the +`NodeFeatureGroup` will be listed in the `NodeFeatureGroup` status. + +### A NodeFeatureGroup example + +Consider the following referential example: + +```yaml +apiVersion: nfd.k8s-sigs.io/v1alpha1 +kind: NodeFeatureGroup +metadata: + name: node-feature-group-1 +spec: + featureGroupRules: + - name: "e2e-attribute-test-1" + matchFeatures: + - feature: kernel.version + matchExpressions: + major: {op: Exists} +status: + nodes: + - name: node-1 + - name: node-2 + - name: node-3 +``` + +The object specifies a group of nodes that share the same +`kernel.version.major`. + +Create a `NodeFeatureGroup` with a yaml file: + +```bash +kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/node-feature-discovery/{{ site.release }}/examples/nodefeaturegroup.yaml +``` + +See [Feature rule format](#feature-rule-format) for detailed description of +available fields and how to write group filtering rules. + ## Local feature source NFD-Worker has a special feature source named `local` which is an integration diff --git a/examples/nodefeaturegroup.yaml b/examples/nodefeaturegroup.yaml new file mode 100644 index 0000000000..e7fd276509 --- /dev/null +++ b/examples/nodefeaturegroup.yaml @@ -0,0 +1,11 @@ +apiVersion: nfd.k8s-sigs.io/v1alpha1 +kind: NodeFeatureGroup +metadata: + name: node-feature-group-1 +spec: + featureGroupRules: + - name: "e2e-attribute-test-1" + matchFeatures: + - feature: kernel.version + matchExpressions: + major: {op: Exists} diff --git a/hack/update_codegen.sh b/hack/update_codegen.sh index fd261d3d3b..4b9a9b82b8 100755 --- a/hack/update_codegen.sh +++ b/hack/update_codegen.sh @@ -27,6 +27,10 @@ function cleanup() { "${GO_CMD}" mod tidy } +# Temporal work around until https://github.com/kubernetes/kubernetes/pull/125051 is merged +# and added to a release. +find api/generated/ -name 'nodefeature*' | xargs rm + trap cleanup EXIT GO_CMD=${1:-go} NFD_ROOT=$(realpath $(dirname ${BASH_SOURCE[0]})/..) diff --git a/pkg/apis/nfd/nodefeaturerule/rule.go b/pkg/apis/nfd/nodefeaturerule/rule.go index 7e9f7fa713..5a689da5d4 100644 --- a/pkg/apis/nfd/nodefeaturerule/rule.go +++ b/pkg/apis/nfd/nodefeaturerule/rule.go @@ -108,6 +108,41 @@ func Execute(r *nfdv1alpha1.Rule, features *nfdv1alpha1.Features) (RuleOutput, e return ret, nil } +// ExecuteGroupRule executes the GroupRule against a set of input features, and return true if the +// rule matches. +func ExecuteGroupRule(r *nfdv1alpha1.GroupRule, features *nfdv1alpha1.Features) (bool, error) { + matched := false + if len(r.MatchAny) > 0 { + // Logical OR over the matchAny matchers + for _, matcher := range r.MatchAny { + if isMatch, matches, err := evaluateMatchAnyElem(&matcher, features); err != nil { + return false, err + } else if isMatch { + matched = true + klog.V(4).InfoS("matchAny matched", "ruleName", r.Name, "matchedFeatures", utils.DelayedDumper(matches)) + // there's no need to evaluate other matchers in MatchAny + // One match is enough for MatchAny + break + } + } + if !matched { + return false, nil + } + } + + if len(r.MatchFeatures) > 0 { + if isMatch, _, err := evaluateFeatureMatcher(&r.MatchFeatures, features); err != nil { + return false, err + } else if !isMatch { + klog.V(2).InfoS("rule did not match", "ruleName", r.Name) + return false, nil + } + } + + klog.V(2).InfoS("rule matched", "ruleName", r.Name) + return true, nil +} + func executeLabelsTemplate(r *nfdv1alpha1.Rule, in matchedFeatures, out map[string]string) error { if r.LabelsTemplate == "" { return nil diff --git a/pkg/features/features.go b/pkg/features/features.go index b619aa15d7..bc644d7118 100644 --- a/pkg/features/features.go +++ b/pkg/features/features.go @@ -21,8 +21,9 @@ import ( ) const ( - NodeFeatureAPI featuregate.Feature = "NodeFeatureAPI" - DisableAutoPrefix featuregate.Feature = "DisableAutoPrefix" + NodeFeatureAPI featuregate.Feature = "NodeFeatureAPI" + DisableAutoPrefix featuregate.Feature = "DisableAutoPrefix" + NodeFeatureGroupAPI featuregate.Feature = "NodeFeatureGroupAPI" ) var ( @@ -34,6 +35,7 @@ var ( ) var DefaultNFDFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ - NodeFeatureAPI: {Default: true, PreRelease: featuregate.Beta}, - DisableAutoPrefix: {Default: false, PreRelease: featuregate.Alpha}, + NodeFeatureAPI: {Default: true, PreRelease: featuregate.Beta}, + DisableAutoPrefix: {Default: false, PreRelease: featuregate.Alpha}, + NodeFeatureGroupAPI: {Default: false, PreRelease: featuregate.Alpha}, } diff --git a/pkg/nfd-master/metrics.go b/pkg/nfd-master/metrics.go index a8afdc3097..20335f6f0a 100644 --- a/pkg/nfd-master/metrics.go +++ b/pkg/nfd-master/metrics.go @@ -23,15 +23,16 @@ import ( // When adding metric names, see https://prometheus.io/docs/practices/naming/#metric-names const ( - buildInfoQuery = "nfd_master_build_info" - nodeUpdateRequestsQuery = "nfd_node_update_requests_total" - nodeUpdatesQuery = "nfd_node_updates_total" - nodeUpdateFailuresQuery = "nfd_node_update_failures_total" - nodeLabelsRejectedQuery = "nfd_node_labels_rejected_total" - nodeERsRejectedQuery = "nfd_node_extendedresources_rejected_total" - nodeTaintsRejectedQuery = "nfd_node_taints_rejected_total" - nfrProcessingTimeQuery = "nfd_nodefeaturerule_processing_duration_seconds" - nfrProcessingErrorsQuery = "nfd_nodefeaturerule_processing_errors_total" + buildInfoQuery = "nfd_master_build_info" + nodeUpdateRequestsQuery = "nfd_node_update_requests_total" + nodeUpdatesQuery = "nfd_node_updates_total" + nodeFeatureGroupUpdateRequestsQuery = "nfd_node_feature_group_update_requests_total" + nodeUpdateFailuresQuery = "nfd_node_update_failures_total" + nodeLabelsRejectedQuery = "nfd_node_labels_rejected_total" + nodeERsRejectedQuery = "nfd_node_extendedresources_rejected_total" + nodeTaintsRejectedQuery = "nfd_node_taints_rejected_total" + nfrProcessingTimeQuery = "nfd_nodefeaturerule_processing_duration_seconds" + nfrProcessingErrorsQuery = "nfd_nodefeaturerule_processing_errors_total" ) var ( @@ -46,6 +47,10 @@ var ( Name: nodeUpdateRequestsQuery, Help: "Number of node update requests processed by the master.", }) + nodeFeatureGroupUpdateRequests = prometheus.NewCounter(prometheus.CounterOpts{ + Name: nodeFeatureGroupUpdateRequestsQuery, + Help: "Number of cluster feature update requests processed by the master.", + }) nodeUpdates = prometheus.NewCounter(prometheus.CounterOpts{ Name: nodeUpdatesQuery, Help: "Number of nodes updated by the master.", diff --git a/pkg/nfd-master/nfd-api-controller.go b/pkg/nfd-master/nfd-api-controller.go index 0ed3fe81c9..055bf0154d 100644 --- a/pkg/nfd-master/nfd-api-controller.go +++ b/pkg/nfd-master/nfd-api-controller.go @@ -35,18 +35,22 @@ import ( ) type nfdController struct { - featureLister nfdlisters.NodeFeatureLister - ruleLister nfdlisters.NodeFeatureRuleLister + featureLister nfdlisters.NodeFeatureLister + ruleLister nfdlisters.NodeFeatureRuleLister + featureGroupLister nfdlisters.NodeFeatureGroupLister stopChan chan struct{} - updateAllNodesChan chan struct{} - updateOneNodeChan chan string + updateAllNodesChan chan struct{} + updateOneNodeChan chan string + updateAllNodeFeatureGroupsChan chan struct{} + updateNodeFeatureGroupChan chan string } type nfdApiControllerOptions struct { - DisableNodeFeature bool - ResyncPeriod time.Duration + DisableNodeFeature bool + DisableNodeFeatureGroup bool + ResyncPeriod time.Duration } func init() { @@ -55,9 +59,11 @@ func init() { func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiControllerOptions) (*nfdController, error) { c := &nfdController{ - stopChan: make(chan struct{}), - updateAllNodesChan: make(chan struct{}, 1), - updateOneNodeChan: make(chan string), + stopChan: make(chan struct{}), + updateAllNodesChan: make(chan struct{}, 1), + updateOneNodeChan: make(chan string), + updateAllNodeFeatureGroupsChan: make(chan struct{}, 1), + updateNodeFeatureGroupChan: make(chan string), } nfdClient := nfdclientset.NewForConfigOrDie(config) @@ -73,16 +79,25 @@ func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiC nfr := obj.(*nfdv1alpha1.NodeFeature) klog.V(2).InfoS("NodeFeature added", "nodefeature", klog.KObj(nfr)) c.updateOneNode("NodeFeature", nfr) + if !nfdApiControllerOptions.DisableNodeFeatureGroup { + c.updateAllNodeFeatureGroups() + } }, UpdateFunc: func(oldObj, newObj interface{}) { nfr := newObj.(*nfdv1alpha1.NodeFeature) klog.V(2).InfoS("NodeFeature updated", "nodefeature", klog.KObj(nfr)) c.updateOneNode("NodeFeature", nfr) + if !nfdApiControllerOptions.DisableNodeFeatureGroup { + c.updateAllNodeFeatureGroups() + } }, DeleteFunc: func(obj interface{}) { nfr := obj.(*nfdv1alpha1.NodeFeature) klog.V(2).InfoS("NodeFeature deleted", "nodefeature", klog.KObj(nfr)) c.updateOneNode("NodeFeature", nfr) + if !nfdApiControllerOptions.DisableNodeFeatureGroup { + c.updateAllNodeFeatureGroups() + } }, }); err != nil { return nil, err @@ -91,8 +106,8 @@ func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiC } // Add informer for NodeFeatureRule objects - ruleInformer := informerFactory.Nfd().V1alpha1().NodeFeatureRules() - if _, err := ruleInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + nodeFeatureRuleInformer := informerFactory.Nfd().V1alpha1().NodeFeatureRules() + if _, err := nodeFeatureRuleInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(object interface{}) { klog.V(2).InfoS("NodeFeatureRule added", "nodefeaturerule", klog.KObj(object.(metav1.Object))) if !nfdApiControllerOptions.DisableNodeFeature { @@ -117,7 +132,32 @@ func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiC }); err != nil { return nil, err } - c.ruleLister = ruleInformer.Lister() + c.ruleLister = nodeFeatureRuleInformer.Lister() + + // Add informer for NodeFeatureGroup objects + if !nfdApiControllerOptions.DisableNodeFeatureGroup { + nodeFeatureGroupInformer := informerFactory.Nfd().V1alpha1().NodeFeatureGroups() + if _, err := nodeFeatureGroupInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + nfg := obj.(*nfdv1alpha1.NodeFeatureGroup) + klog.V(2).InfoS("NodeFeatureGroup added", "nodeFeatureGroup", klog.KObj(nfg)) + c.updateNodeFeatureGroup(nfg.Name) + }, + UpdateFunc: func(oldObj, newObj interface{}) { + nfg := newObj.(*nfdv1alpha1.NodeFeatureGroup) + klog.V(2).InfoS("NodeFeatureGroup updated", "nodeFeatureGroup", klog.KObj(nfg)) + c.updateNodeFeatureGroup(nfg.Name) + }, + DeleteFunc: func(obj interface{}) { + nfg := obj.(*nfdv1alpha1.NodeFeatureGroup) + klog.V(2).InfoS("NodeFeatureGroup deleted", "nodeFeatureGroup", klog.KObj(nfg)) + c.updateNodeFeatureGroup(nfg.Name) + }, + }); err != nil { + return nil, err + } + c.featureGroupLister = nodeFeatureGroupInformer.Lister() + } // Start informers informerFactory.Start(c.stopChan) @@ -129,15 +169,6 @@ func (c *nfdController) stop() { close(c.stopChan) } -func (c *nfdController) updateOneNode(typ string, obj metav1.Object) { - nodeName, err := getNodeNameForObj(obj) - if err != nil { - klog.ErrorS(err, "failed to determine node name for object", "type", typ, "object", klog.KObj(obj)) - return - } - c.updateOneNodeChan <- nodeName -} - func getNodeNameForObj(obj metav1.Object) (string, error) { nodeName, ok := obj.GetLabels()[nfdv1alpha1.NodeFeatureObjNodeNameLabel] if !ok { @@ -149,9 +180,29 @@ func getNodeNameForObj(obj metav1.Object) (string, error) { return nodeName, nil } +func (c *nfdController) updateOneNode(typ string, obj metav1.Object) { + nodeName, err := getNodeNameForObj(obj) + if err != nil { + klog.ErrorS(err, "failed to determine node name for object", "type", typ, "object", klog.KObj(obj)) + return + } + c.updateOneNodeChan <- nodeName +} + func (c *nfdController) updateAllNodes() { select { case c.updateAllNodesChan <- struct{}{}: default: } } + +func (c *nfdController) updateNodeFeatureGroup(nodeFeatureGroup string) { + c.updateNodeFeatureGroupChan <- nodeFeatureGroup +} + +func (c *nfdController) updateAllNodeFeatureGroups() { + select { + case c.updateAllNodeFeatureGroupsChan <- struct{}{}: + default: + } +} diff --git a/pkg/nfd-master/nfd-master-internal_test.go b/pkg/nfd-master/nfd-master-internal_test.go index 024a0b3913..498070a86b 100644 --- a/pkg/nfd-master/nfd-master-internal_test.go +++ b/pkg/nfd-master/nfd-master-internal_test.go @@ -775,10 +775,10 @@ func BenchmarkNfdAPIUpdateAllNodes(b *testing.B) { fakeMaster := newFakeMaster(WithKubernetesClient(fakeCli)) fakeMaster.nfdController = newFakeNfdAPIController(fakenfdclient.NewSimpleClientset()) - nodeUpdaterPool := newNodeUpdaterPool(fakeMaster) - fakeMaster.nodeUpdaterPool = nodeUpdaterPool + updaterPool := newUpdaterPool(fakeMaster) + fakeMaster.updaterPool = updaterPool - nodeUpdaterPool.start(10) + updaterPool.start(10) b.ResetTimer() diff --git a/pkg/nfd-master/nfd-master.go b/pkg/nfd-master/nfd-master.go index 8af903edd4..d5d5a263bb 100644 --- a/pkg/nfd-master/nfd-master.go +++ b/pkg/nfd-master/nfd-master.go @@ -40,6 +40,7 @@ import ( "google.golang.org/grpc/health/grpc_health_v1" "google.golang.org/grpc/peer" corev1 "k8s.io/api/core/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8sLabels "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" @@ -54,6 +55,7 @@ import ( taintutils "k8s.io/kubernetes/pkg/util/taints" "sigs.k8s.io/yaml" + nfdclientset "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned" nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1" "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/nodefeaturerule" "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/validate" @@ -144,17 +146,18 @@ type NfdMaster interface { type nfdMaster struct { *nfdController - args Args - namespace string - nodeName string - configFilePath string - server *grpc.Server - healthServer *grpc.Server - stop chan struct{} - ready chan struct{} - kubeconfig *restclient.Config - k8sClient k8sclient.Interface - nodeUpdaterPool *nodeUpdaterPool + args Args + namespace string + nodeName string + configFilePath string + server *grpc.Server + healthServer *grpc.Server + stop chan struct{} + ready chan struct{} + kubeconfig *restclient.Config + k8sClient k8sclient.Interface + nfdClient *nfdclientset.Clientset + updaterPool *updaterPool deniedNs config *NFDConfig } @@ -211,7 +214,21 @@ func NewNfdMaster(opts ...NfdMasterOption) (NfdMaster, error) { nfd.k8sClient = cli } - nfd.nodeUpdaterPool = newNodeUpdaterPool(nfd) + // nfdClient + if nfd.kubeconfig != nil { + kubeconfig, err := utils.GetKubeconfig(nfd.args.Kubeconfig) + if err != nil { + return nfd, err + } + nfd.kubeconfig = kubeconfig + nfdClient, err := nfdclientset.NewForConfig(nfd.kubeconfig) + if err != nil { + return nfd, err + } + nfd.nfdClient = nfdClient + } + + nfd.updaterPool = newUpdaterPool(nfd) return nfd, nil } @@ -232,6 +249,11 @@ func WithKubernetesClient(cli k8sclient.Interface) NfdMasterOption { return &nfdMasterOpt{f: func(n *nfdMaster) { n.k8sClient = cli }} } +// WithNfdClient forces to use the given NFD client, without initializing one from kubeconfig. +func WithKubeConfig(config *restclient.Config) NfdMasterOption { + return &nfdMasterOpt{f: func(n *nfdMaster) { n.kubeconfig = config }} +} + type nfdMasterOpt struct { f func(*nfdMaster) } @@ -283,7 +305,7 @@ func (m *nfdMaster) Run() error { } } - m.nodeUpdaterPool.start(m.config.NfdApiParallelism) + m.updaterPool.start(m.config.NfdApiParallelism) // Create watcher for config file configWatch, err := utils.CreateFsWatcher(time.Second, m.configFilePath) @@ -354,10 +376,10 @@ func (m *nfdMaster) Run() error { return err } - // Stop the nodeUpdaterPool so that no node updates are underway + // Stop the updaterPool so that no node updates are underway // while we reconfigure the NFD API controller (including the // listers) below - m.nodeUpdaterPool.stop() + m.updaterPool.stop() // restart NFD API controller if m.nfdController != nil { @@ -370,8 +392,8 @@ func (m *nfdMaster) Run() error { return nil } } - // Restart the nodeUpdaterPool - m.nodeUpdaterPool.start(m.config.NfdApiParallelism) + // Restart the updaterPool + m.updaterPool.start(m.config.NfdApiParallelism) // Update all nodes when the configuration changes if m.nfdController != nil && nfdfeatures.NFDFeatureGate.Enabled(nfdfeatures.NodeFeatureAPI) && m.args.EnableNodeFeatureApi { @@ -474,6 +496,8 @@ func (m *nfdMaster) nfdAPIUpdateHandler() { // disabled (i.e. NodeFeature API is enabled) updateAll := nfdfeatures.NFDFeatureGate.Enabled(nfdfeatures.NodeFeatureAPI) && m.args.EnableNodeFeatureApi updateNodes := make(map[string]struct{}) + nodeFeatureGroup := make(map[string]struct{}) + updateAllNodeFeatureGroups := false rateLimit := time.After(time.Second) for { select { @@ -481,7 +505,12 @@ func (m *nfdMaster) nfdAPIUpdateHandler() { updateAll = true case nodeName := <-m.nfdController.updateOneNodeChan: updateNodes[nodeName] = struct{}{} + case <-m.nfdController.updateAllNodeFeatureGroupsChan: + updateAllNodeFeatureGroups = true + case nodeFeatureGroupName := <-m.nfdController.updateNodeFeatureGroupChan: + nodeFeatureGroup[nodeFeatureGroupName] = struct{}{} case <-rateLimit: + // NodeFeature errUpdateAll := false if updateAll { if err := m.nfdAPIUpdateAllNodes(); err != nil { @@ -490,12 +519,26 @@ func (m *nfdMaster) nfdAPIUpdateHandler() { } } else { for nodeName := range updateNodes { - m.nodeUpdaterPool.addNode(nodeName) + m.updaterPool.addNode(nodeName) + } + } + // NodeFeatureGroup + errUpdateAllNFG := false + if updateAllNodeFeatureGroups { + if err := m.nfdAPIUpdateAllNodeFeatureGroups(); err != nil { + klog.ErrorS(err, "failed to update NodeFeatureGroups") + errUpdateAllNFG = true + } + } else { + for nodeFeatureGroupName := range nodeFeatureGroup { + m.updaterPool.addNodeFeatureGroup(nodeFeatureGroupName) } } // Reset "work queue" and timer updateAll = errUpdateAll + updateAllNodeFeatureGroups = errUpdateAllNFG + nodeFeatureGroup = map[string]struct{}{} updateNodes = map[string]struct{}{} rateLimit = time.After(time.Second) } @@ -515,7 +558,7 @@ func (m *nfdMaster) Stop() { m.nfdController.stop() } - m.nodeUpdaterPool.stop() + m.updaterPool.stop() close(m.stop) } @@ -758,21 +801,24 @@ func (m *nfdMaster) nfdAPIUpdateAllNodes() error { } for _, node := range nodes.Items { - m.nodeUpdaterPool.addNode(node.Name) + m.updaterPool.addNode(node.Name) } return nil } -func (m *nfdMaster) nfdAPIUpdateOneNode(cli k8sclient.Interface, node *corev1.Node) error { - if m.nfdController == nil || m.nfdController.featureLister == nil { - return nil +// mergeNodeFeatures merges the features of the given NodeFeature objects into a single NodeFeatureSpec. +func (m *nfdMaster) mergeNodeFeatures(nodeName, api string) (*nfdv1alpha1.NodeFeature, error) { + nodeFeatures := &nfdv1alpha1.NodeFeature{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, } - sel := k8sLabels.SelectorFromSet(k8sLabels.Set{nfdv1alpha1.NodeFeatureObjNodeNameLabel: node.Name}) + sel := k8sLabels.SelectorFromSet(k8sLabels.Set{nfdv1alpha1.NodeFeatureObjNodeNameLabel: nodeName}) objs, err := m.nfdController.featureLister.List(sel) if err != nil { - return fmt.Errorf("failed to get NodeFeature resources for node %q: %w", node.Name, err) + return nodeFeatures, fmt.Errorf("failed to get NodeFeature resources for node %q: %w", nodeName, err) } // Sort our objects @@ -792,16 +838,14 @@ func (m *nfdMaster) nfdAPIUpdateOneNode(cli k8sclient.Interface, node *corev1.No return objs[i].Namespace < objs[j].Namespace }) - klog.V(1).InfoS("processing of node initiated by NodeFeature API", "nodeName", node.Name) - - features := nfdv1alpha1.NewNodeFeatureSpec() + klog.V(1).InfoS("processing of node initiated by", "API", api, "nodeName", nodeName) if len(objs) > 0 { // Merge in features // // NOTE: changing the rule api to support handle multiple objects instead // of merging would probably perform better with lot less data to copy. - features = objs[0].Spec.DeepCopy() + features := objs[0].Spec.DeepCopy() if !nfdfeatures.NFDFeatureGate.Enabled(nfdfeatures.DisableAutoPrefix) && m.config.AutoDefaultNs { features.Labels = addNsToMapKeys(features.Labels, nfdv1alpha1.FeatureLabelNs) } @@ -813,19 +857,114 @@ func (m *nfdMaster) nfdAPIUpdateOneNode(cli k8sclient.Interface, node *corev1.No s.MergeInto(features) } + // Set the merged features to the NodeFeature object + nodeFeatures.Spec = *features + klog.V(4).InfoS("merged nodeFeatureSpecs", "newNodeFeatureSpec", utils.DelayedDumper(features)) } + return nodeFeatures, nil +} + +func (m *nfdMaster) nfdAPIUpdateOneNode(cli k8sclient.Interface, node *corev1.Node) error { + if m.nfdController == nil || m.nfdController.featureLister == nil { + return nil + } + + // Merge all NodeFeature objects into a single NodeFeatureSpec + nodeFeatures, err := m.mergeNodeFeatures(node.Name, "NodeFeature") + if err != nil { + return fmt.Errorf("failed to merge NodeFeature objects for node %q: %w", node.Name, err) + } + // Update node labels et al. This may also mean removing all NFD-owned // labels (et al.), for example in the case no NodeFeature objects are // present. - if err := m.refreshNodeFeatures(cli, node, features.Labels, &features.Features); err != nil { + if err := m.refreshNodeFeatures(cli, node, nodeFeatures.Spec.Labels, &nodeFeatures.Spec.Features); err != nil { return err } return nil } +func (m *nfdMaster) nfdAPIUpdateAllNodeFeatureGroups() error { + klog.V(1).InfoS("updating all NodeFeatureGroups") + + nodeFeatureGroups, err := m.nfdController.featureGroupLister.List(k8sLabels.Everything()) + if err != nil { + return fmt.Errorf("failed to get NodeFeatureGroup objects: %w", err) + } + + if len(nodeFeatureGroups) != 0 { + for _, nodeFeatureGroup := range nodeFeatureGroups { + m.updaterPool.nfgQueue.Add(nodeFeatureGroup.Name) + } + } + + return nil +} + +func (m *nfdMaster) nfdAPIUpdateNodeFeatureGroup(nfdClient *nfdclientset.Clientset, nodeFeatureGroup *nfdv1alpha1.NodeFeatureGroup) error { + if m.nfdController == nil || m.nfdController.featureLister == nil { + return nil + } + + // Get all Nodes + nodes, err := getNodes(m.k8sClient) + if err != nil { + return fmt.Errorf("failed to get nodes: %w", err) + } + nodeFeaturesList := make([]*nfdv1alpha1.NodeFeature, 0) + for _, node := range nodes.Items { + // Merge all NodeFeature objects into a single NodeFeatureSpec + nodeFeatures, err := m.mergeNodeFeatures(node.Name, "NodeFeatureGroup") + if err != nil { + return fmt.Errorf("failed to merge NodeFeature objects for node %q: %w", node.Name, err) + } + nodeFeaturesList = append(nodeFeaturesList, nodeFeatures) + klog.InfoS("nodeFeaturesList", "nodeFeaturesList", utils.DelayedDumper(nodeFeaturesList)) + } + + // Execute rules and create matching groups + nodePool := make([]string, 0) + nodeGroupValidator := make(map[string]bool) + for _, rule := range nodeFeatureGroup.Spec.Rules { + for _, feature := range nodeFeaturesList { + match, err := nodefeaturerule.ExecuteGroupRule(&rule, &feature.Spec.Features) + if err != nil { + return fmt.Errorf("failed to evaluate rule %q: %w", rule.Name, err) + } + + if match { + klog.InfoS("node matched rule", "nodeName", feature.Name, "ruleName", rule.Name) + system := feature.Spec.Features.Attributes["system.name"] + nodeName := system.Elements["nodename"] + if _, ok := nodeGroupValidator[nodeName]; !ok { + nodePool = append(nodePool, nodeName) + nodeGroupValidator[nodeName] = true + } + } + } + } + + // Update the NodeFeatureGroup object with the updated featureGroupRules + nodeFeatureGroupUpdated := nodeFeatureGroup.DeepCopy() + nodeFeatureGroupUpdated.Status.Nodes = nodePool + + if !apiequality.Semantic.DeepEqual(nodeFeatureGroup, nodeFeatureGroupUpdated) { + klog.InfoS("updating NodeFeatureGroup object", "nodeFeatureGroup", klog.KObj(nodeFeatureGroup)) + nodeFeatureGroupUpdated, err = nfdClient.NfdV1alpha1().NodeFeatureGroups(m.namespace).UpdateStatus(context.TODO(), nodeFeatureGroupUpdated, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update NodeFeatureGroup object: %w", err) + } + klog.V(4).InfoS("NodeFeatureGroup object updated", "nodeFeatureGroup", utils.DelayedDumper(nodeFeatureGroupUpdated)) + } else { + klog.V(1).InfoS("no changes in NodeFeatureGroup, object is up to date", "nodeFeatureGroup", klog.KObj(nodeFeatureGroup)) + } + + return nil +} + // filterExtendedResources filters extended resources and returns a map // of valid extended resources. func (m *nfdMaster) filterExtendedResources(features *nfdv1alpha1.Features, extendedResources ExtendedResources) ExtendedResources { @@ -1433,6 +1572,10 @@ func getNode(cli k8sclient.Interface, nodeName string) (*corev1.Node, error) { return cli.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) } +func getNodeFeatureGroup(cli nfdclientset.Interface, namespace, name string) (*nfdv1alpha1.NodeFeatureGroup, error) { + return cli.NfdV1alpha1().NodeFeatureGroups(namespace).Get(context.TODO(), name, metav1.GetOptions{}) +} + func getNodes(cli k8sclient.Interface) (*corev1.NodeList, error) { return cli.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) } diff --git a/pkg/nfd-master/node-updater-pool.go b/pkg/nfd-master/node-updater-pool.go deleted file mode 100644 index 587c7adae7..0000000000 --- a/pkg/nfd-master/node-updater-pool.go +++ /dev/null @@ -1,131 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package nfdmaster - -import ( - "sync" - "time" - - "golang.org/x/time/rate" - apierrors "k8s.io/apimachinery/pkg/api/errors" - k8sclient "k8s.io/client-go/kubernetes" - "k8s.io/client-go/util/workqueue" - "k8s.io/klog/v2" -) - -type nodeUpdaterPool struct { - queue workqueue.RateLimitingInterface - sync.RWMutex - - wg sync.WaitGroup - nfdMaster *nfdMaster -} - -func newNodeUpdaterPool(nfdMaster *nfdMaster) *nodeUpdaterPool { - return &nodeUpdaterPool{ - nfdMaster: nfdMaster, - wg: sync.WaitGroup{}, - } -} - -func (u *nodeUpdaterPool) processNodeUpdateRequest(cli k8sclient.Interface, queue workqueue.RateLimitingInterface) bool { - n, quit := queue.Get() - if quit { - return false - } - nodeName := n.(string) - - defer queue.Done(nodeName) - - nodeUpdateRequests.Inc() - - // Check if node exists - if node, err := getNode(cli, nodeName); apierrors.IsNotFound(err) { - klog.InfoS("node not found, skip update", "nodeName", nodeName) - } else if err := u.nfdMaster.nfdAPIUpdateOneNode(cli, node); err != nil { - if n := queue.NumRequeues(nodeName); n < 15 { - klog.InfoS("retrying node update", "nodeName", nodeName, "lastError", err, "numRetries", n) - } else { - klog.ErrorS(err, "node update failed, queuing for retry ", "nodeName", nodeName, "numRetries", n) - // Count only long-failing attempts - nodeUpdateFailures.Inc() - } - queue.AddRateLimited(nodeName) - return true - } - queue.Forget(nodeName) - return true -} - -func (u *nodeUpdaterPool) runNodeUpdater(queue workqueue.RateLimitingInterface) { - var cli k8sclient.Interface - if u.nfdMaster.kubeconfig != nil { - // For normal execution, initialize a separate api client for each updater - cli = k8sclient.NewForConfigOrDie(u.nfdMaster.kubeconfig) - } else { - // For tests, re-use the api client from nfd-master - cli = u.nfdMaster.k8sClient - } - for u.processNodeUpdateRequest(cli, queue) { - } - u.wg.Done() -} - -func (u *nodeUpdaterPool) start(parallelism int) { - u.Lock() - defer u.Unlock() - - if u.queue != nil && !u.queue.ShuttingDown() { - klog.InfoS("the NFD master node updater pool is already running.") - return - } - - klog.InfoS("starting the NFD master node updater pool", "parallelism", parallelism) - - // Create ratelimiter. Mimic workqueue.DefaultControllerRateLimiter() but - // with modified per-item (node) rate limiting parameters. - rl := workqueue.NewMaxOfRateLimiter( - workqueue.NewItemExponentialFailureRateLimiter(50*time.Millisecond, 100*time.Second), - &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, - ) - u.queue = workqueue.NewRateLimitingQueue(rl) - - for i := 0; i < parallelism; i++ { - u.wg.Add(1) - go u.runNodeUpdater(u.queue) - } -} - -func (u *nodeUpdaterPool) stop() { - u.Lock() - defer u.Unlock() - - if u.queue == nil || u.queue.ShuttingDown() { - klog.InfoS("the NFD master node updater pool is not running.") - return - } - - klog.InfoS("stopping the NFD master node updater pool") - u.queue.ShutDown() - u.wg.Wait() -} - -func (u *nodeUpdaterPool) addNode(nodeName string) { - u.RLock() - defer u.RUnlock() - u.queue.Add(nodeName) -} diff --git a/pkg/nfd-master/updater-pool.go b/pkg/nfd-master/updater-pool.go new file mode 100644 index 0000000000..fec6027137 --- /dev/null +++ b/pkg/nfd-master/updater-pool.go @@ -0,0 +1,194 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nfdmaster + +import ( + "sync" + "time" + + "golang.org/x/time/rate" + apierrors "k8s.io/apimachinery/pkg/api/errors" + k8sclient "k8s.io/client-go/kubernetes" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + nfdclientset "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned" + nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1" + "sigs.k8s.io/node-feature-discovery/pkg/features" +) + +type updaterPool struct { + queue workqueue.RateLimitingInterface + nfgQueue workqueue.RateLimitingInterface + sync.RWMutex + + wg sync.WaitGroup + nfgWg sync.WaitGroup + nfdMaster *nfdMaster +} + +func newUpdaterPool(nfdMaster *nfdMaster) *updaterPool { + return &updaterPool{ + nfdMaster: nfdMaster, + wg: sync.WaitGroup{}, + } +} + +func (u *updaterPool) processNodeUpdateRequest(cli k8sclient.Interface, queue workqueue.RateLimitingInterface) bool { + n, quit := queue.Get() + if quit { + return false + } + nodeName := n.(string) + + defer queue.Done(nodeName) + + nodeUpdateRequests.Inc() + + // Check if node exists + if node, err := getNode(cli, nodeName); apierrors.IsNotFound(err) { + klog.InfoS("node not found, skip update", "nodeName", nodeName) + } else if err := u.nfdMaster.nfdAPIUpdateOneNode(cli, node); err != nil { + if n := queue.NumRequeues(nodeName); n < 15 { + klog.InfoS("retrying node update", "nodeName", nodeName, "lastError", err, "numRetries", n) + } else { + klog.ErrorS(err, "node update failed, queuing for retry ", "nodeName", nodeName, "numRetries", n) + // Count only long-failing attempts + nodeUpdateFailures.Inc() + } + queue.AddRateLimited(nodeName) + return true + } + queue.Forget(nodeName) + return true +} + +func (u *updaterPool) runNodeUpdater(queue workqueue.RateLimitingInterface) { + var cli k8sclient.Interface + if u.nfdMaster.kubeconfig != nil { + // For normal execution, initialize a separate api client for each updater + cli = k8sclient.NewForConfigOrDie(u.nfdMaster.kubeconfig) + } else { + // For tests, re-use the api client from nfd-master + cli = u.nfdMaster.k8sClient + } + for u.processNodeUpdateRequest(cli, queue) { + } + u.wg.Done() +} + +func (u *updaterPool) processNodeFeatureGroupUpdateRequest(cli nfdclientset.Interface, ngfQueue workqueue.RateLimitingInterface) bool { + nfgName, quit := ngfQueue.Get() + if quit { + return false + } + defer ngfQueue.Done(nfgName) + + nodeFeatureGroupUpdateRequests.Inc() + + // Check if NodeFeatureGroup exists + var nfg *nfdv1alpha1.NodeFeatureGroup + var err error + if nfg, err = getNodeFeatureGroup(cli, u.nfdMaster.namespace, nfgName.(string)); apierrors.IsNotFound(err) { + klog.InfoS("NodeFeatureGroup not found, skip update", "NodeFeatureGroupName", nfgName) + } else if err := u.nfdMaster.nfdAPIUpdateNodeFeatureGroup(u.nfdMaster.nfdClient, nfg); err != nil { + if n := ngfQueue.NumRequeues(nfgName); n < 15 { + klog.InfoS("retrying NodeFeatureGroup update", "nodeFeatureGroup", klog.KObj(nfg), "lastError", err) + } else { + klog.ErrorS(err, "failed to update NodeFeatureGroup, queueing for retry", "nodeFeatureGroup", klog.KObj(nfg), "lastError", err, "numRetries", n) + } + ngfQueue.AddRateLimited(nfgName) + return true + } + + ngfQueue.Forget(nfgName) + return true +} + +func (u *updaterPool) runNodeFeatureGroupUpdater(ngfQueue workqueue.RateLimitingInterface) { + cli := nfdclientset.NewForConfigOrDie(u.nfdMaster.kubeconfig) + for u.processNodeFeatureGroupUpdateRequest(cli, ngfQueue) { + } + u.nfgWg.Done() +} + +func (u *updaterPool) start(parallelism int) { + u.Lock() + defer u.Unlock() + + if u.queue != nil && !u.queue.ShuttingDown() { + klog.InfoS("the NFD master updater pool is already running.") + return + } + + if u.nfgQueue != nil && !u.nfgQueue.ShuttingDown() { + klog.InfoS("the NFD master node feature group updater pool is already running.") + return + } + + klog.InfoS("starting the NFD master updater pool", "parallelism", parallelism) + + // Create ratelimiter. Mimic workqueue.DefaultControllerRateLimiter() but + // with modified per-item (node) rate limiting parameters. + rl := workqueue.NewMaxOfRateLimiter( + workqueue.NewItemExponentialFailureRateLimiter(50*time.Millisecond, 100*time.Second), + &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, + ) + u.queue = workqueue.NewRateLimitingQueue(rl) + u.nfgQueue = workqueue.NewRateLimitingQueue(rl) + + for i := 0; i < parallelism; i++ { + u.wg.Add(1) + go u.runNodeUpdater(u.queue) + if features.NFDFeatureGate.Enabled(features.NodeFeatureGroupAPI) { + u.nfgWg.Add(1) + go u.runNodeFeatureGroupUpdater(u.nfgQueue) + } + } +} + +func (u *updaterPool) stop() { + u.Lock() + defer u.Unlock() + + if u.queue == nil || u.queue.ShuttingDown() { + klog.InfoS("the NFD master updater pool is not running.") + return + } + + if u.nfgQueue == nil || u.nfgQueue.ShuttingDown() { + klog.InfoS("the NFD master updater pool is not running.") + return + } + + klog.InfoS("stopping the NFD master updater pool") + u.queue.ShutDown() + u.wg.Wait() + u.nfgQueue.ShutDown() + u.nfgWg.Wait() +} + +func (u *updaterPool) addNode(nodeName string) { + u.RLock() + defer u.RUnlock() + u.queue.Add(nodeName) +} + +func (u *updaterPool) addNodeFeatureGroup(nodeFeatureGroupName string) { + u.RLock() + defer u.RUnlock() + u.nfgQueue.Add(nodeFeatureGroupName) +} diff --git a/pkg/nfd-master/node-updater-pool_test.go b/pkg/nfd-master/updater-pool_test.go similarity index 60% rename from pkg/nfd-master/node-updater-pool_test.go rename to pkg/nfd-master/updater-pool_test.go index a648ee4353..efeab40434 100644 --- a/pkg/nfd-master/node-updater-pool_test.go +++ b/pkg/nfd-master/updater-pool_test.go @@ -26,44 +26,44 @@ import ( fakenfdclient "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned/fake" ) -func newFakeNodeUpdaterPool(nfdMaster *nfdMaster) *nodeUpdaterPool { - return &nodeUpdaterPool{ +func newFakeupdaterPool(nfdMaster *nfdMaster) *updaterPool { + return &updaterPool{ nfdMaster: nfdMaster, wg: sync.WaitGroup{}, } } -func TestNodeUpdaterStart(t *testing.T) { +func TestUpdaterStart(t *testing.T) { fakeMaster := newFakeMaster() - nodeUpdaterPool := newFakeNodeUpdaterPool(fakeMaster) + updaterPool := newFakeupdaterPool(fakeMaster) Convey("When starting the node updater pool", t, func() { - nodeUpdaterPool.start(10) - q := nodeUpdaterPool.queue + updaterPool.start(10) + q := updaterPool.queue Convey("Node updater pool queue properties should change", func() { So(q, ShouldNotBeNil) So(q.ShuttingDown(), ShouldBeFalse) }) - nodeUpdaterPool.start(10) + updaterPool.start(10) Convey("Node updater pool queue should not change", func() { - So(nodeUpdaterPool.queue, ShouldEqual, q) + So(updaterPool.queue, ShouldEqual, q) }) }) } func TestNodeUpdaterStop(t *testing.T) { fakeMaster := newFakeMaster() - nodeUpdaterPool := newFakeNodeUpdaterPool(fakeMaster) + updaterPool := newFakeupdaterPool(fakeMaster) - nodeUpdaterPool.start(10) + updaterPool.start(10) Convey("When stoping the node updater pool", t, func() { - nodeUpdaterPool.stop() + updaterPool.stop() Convey("Node updater pool queue should be removed", func() { // Wait for the wg.Done() So(func() interface{} { - return nodeUpdaterPool.queue.ShuttingDown() + return updaterPool.queue.ShuttingDown() }, withTimeout, 2*time.Second, ShouldBeTrue) }) }) @@ -72,15 +72,31 @@ func TestNodeUpdaterStop(t *testing.T) { func TestRunNodeUpdater(t *testing.T) { fakeMaster := newFakeMaster(WithKubernetesClient(fakek8sclient.NewSimpleClientset())) fakeMaster.nfdController = newFakeNfdAPIController(fakenfdclient.NewSimpleClientset()) - nodeUpdaterPool := newFakeNodeUpdaterPool(fakeMaster) + updaterPool := newFakeupdaterPool(fakeMaster) - nodeUpdaterPool.start(10) + updaterPool.start(10) Convey("Queue has no element", t, func() { - So(nodeUpdaterPool.queue.Len(), ShouldEqual, 0) + So(updaterPool.queue.Len(), ShouldEqual, 0) }) - nodeUpdaterPool.queue.Add(testNodeName) + updaterPool.queue.Add(testNodeName) Convey("Added element to the queue should be removed", t, func() { - So(func() interface{} { return nodeUpdaterPool.queue.Len() }, + So(func() interface{} { return updaterPool.queue.Len() }, + withTimeout, 2*time.Second, ShouldEqual, 0) + }) +} + +func TestRunNodeFeatureGroupUpdater(t *testing.T) { + fakeMaster := newFakeMaster(WithKubernetesClient(fakek8sclient.NewSimpleClientset())) + fakeMaster.nfdController = newFakeNfdAPIController(fakenfdclient.NewSimpleClientset()) + updaterPool := newFakeupdaterPool(fakeMaster) + + updaterPool.start(10) + Convey("Queue has no element", t, func() { + So(updaterPool.nfgQueue.Len(), ShouldEqual, 0) + }) + updaterPool.nfgQueue.Add(testNodeName) + Convey("Added element to the queue should be removed", t, func() { + So(func() interface{} { return updaterPool.queue.Len() }, withTimeout, 2*time.Second, ShouldEqual, 0) }) } diff --git a/pkg/nfd-worker/nfd-worker_test.go b/pkg/nfd-worker/nfd-worker_test.go index 219098d0e0..c5582d064a 100644 --- a/pkg/nfd-worker/nfd-worker_test.go +++ b/pkg/nfd-worker/nfd-worker_test.go @@ -53,6 +53,7 @@ func setupTest(args *master.Args) testContext { os.Exit(1) } _ = features.NFDMutableFeatureGate.OverrideDefault(features.NodeFeatureAPI, false) + _ = features.NFDMutableFeatureGate.OverrideDefault(features.NodeFeatureGroupAPI, false) m, err := master.NewNfdMaster( master.WithArgs(args), master.WithKubernetesClient(fakeclient.NewSimpleClientset())) diff --git a/test/e2e/data/nodefeaturegroup-1.yaml b/test/e2e/data/nodefeaturegroup-1.yaml new file mode 100644 index 0000000000..96fcc9ced8 --- /dev/null +++ b/test/e2e/data/nodefeaturegroup-1.yaml @@ -0,0 +1,11 @@ +apiVersion: nfd.k8s-sigs.io/v1alpha1 +kind: NodeFeatureGroup +metadata: + name: e2e-test-1 +spec: + featureGroupRules: + - name: "e2e-attribute-test-1" + matchFeatures: + - feature: kernel.version + matchExpressions: + major: {op: Exists} diff --git a/test/e2e/node_feature_discovery_test.go b/test/e2e/node_feature_discovery_test.go index 12ad9a2754..bd771b9ab0 100644 --- a/test/e2e/node_feature_discovery_test.go +++ b/test/e2e/node_feature_discovery_test.go @@ -35,6 +35,7 @@ import ( resourcev1 "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + sets "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" taintutils "k8s.io/kubernetes/pkg/util/taints" "k8s.io/kubernetes/test/e2e/framework" @@ -181,6 +182,19 @@ func cleanupCRs(ctx context.Context, cli *nfdclient.Clientset, namespace string) }()).NotTo(HaveOccurred()) } } + + // Drop NodeFeatureGroup objects + nfgs, err := cli.NfdV1alpha1().NodeFeatureGroups(namespace).List(ctx, metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) + + if len(nfgs.Items) != 0 { + By("Deleting NodeFeatureGroup objects from namespace " + namespace) + for _, nfg := range nfgs.Items { + err = cli.NfdV1alpha1().NodeFeatureGroups(namespace).Delete(ctx, nfg.Name, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + } + } // Actual test suite @@ -873,6 +887,70 @@ core: }) }) + // Test NodeFeatureGroups + Context("and NodeFeatureGroups objects deployed", Label("nodefeaturegroup"), func() { + BeforeEach(func(ctx context.Context) { + // We need a NodeFeature from the node, can't be a fake one + if !useNodeFeatureApi { + Skip("NodeFeature API not enabled") + } + // enable the node feature group api + extraMasterPodSpecOpts = []testpod.SpecOption{ + testpod.SpecWithContainerExtraArgs( + "--feature-gates=NodeFeatureGroupAPI=true", + ), + } + }) + It("custom NodeFeatureGroup should be updated", func(ctx context.Context) { + By("Creating nfd-worker config") + cm := testutils.NewConfigMap("nfd-worker-conf", "nfd-worker.conf", ` + core: + sleepInterval: "1s" + `) + _, err := f.ClientSet.CoreV1().ConfigMaps(f.Namespace.Name).Create(ctx, cm, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + By("Creating nfd-worker daemonset") + podSpecOpts := createPodSpecOpts( + testpod.SpecWithContainerImage(dockerImage()), + ) + workerDS := testds.NFDWorker(podSpecOpts...) + workerDS, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(ctx, workerDS, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("Waiting for worker daemonset pods to be ready") + Expect(testpod.WaitForReady(ctx, f.ClientSet, f.Namespace.Name, workerDS.Spec.Template.Labels["name"], 2)).NotTo(HaveOccurred()) + + nodes, err := getNonControlPlaneNodes(ctx, f.ClientSet) + Expect(err).NotTo(HaveOccurred()) + + targetNodes := make([]string, 0) + for _, node := range nodes { + checkNodeFeatureObject(ctx, node.Name) + if !Expect(node.Name).ToNot(BeEmpty(), "No suitable worker node found") { + continue + } + targetNodes = append(targetNodes, node.Name) + } + + By("Creating NodeFeatureGroups #1") + Expect(testutils.CreateNodeFeatureGroupsFromFile(ctx, nfdClient, f.Namespace.Name, "nodefeaturegroup-1.yaml")).NotTo(HaveOccurred()) + + By("Verifying NodeFeatureGroups #1") + expectedGroup := nfdv1alpha1.NodeFeatureGroup{ + Status: nfdv1alpha1.NodeFeatureGroupStatus{ + Nodes: targetNodes, + }, + } + Eventually(func() bool { + group, err := nfdClient.NfdV1alpha1().NodeFeatureGroups(f.Namespace.Name).Get(ctx, "e2e-test-1", metav1.GetOptions{}) + if err != nil { + return false + } + return areEqualLists(group.Status.Nodes, expectedGroup.Status.Nodes) + }, 5*time.Minute, 5*time.Second).Should(BeTrue()) + }) + }) + Context("and check whether master config passed successfully or not", func() { BeforeEach(func(ctx context.Context) { extraMasterPodSpecOpts = []testpod.SpecOption{ @@ -1044,3 +1122,10 @@ func getNode(nodes []corev1.Node, nodeName string) corev1.Node { } return corev1.Node{} } + +// Function to compare two string slices for equality regardless of order +func areEqualLists(list1, list2 []string) bool { + set1 := sets.NewString(list1...) + set2 := sets.NewString(list2...) + return set1.Equal(set2) +} diff --git a/test/e2e/utils/crd.go b/test/e2e/utils/crd.go index ef4821fd90..3cefe7eef9 100644 --- a/test/e2e/utils/crd.go +++ b/test/e2e/utils/crd.go @@ -123,6 +123,21 @@ func CreateNodeFeatureRulesFromFile(ctx context.Context, cli nfdclientset.Interf return nil } +// CreateNodeFeatureGroupsFromFile creates a NodeFeatureGroup object from a given file located under test data directory. +func CreateNodeFeatureGroupsFromFile(ctx context.Context, cli nfdclientset.Interface, namespace, filename string) error { + objs, err := nodeFeatureGroupsFromFile(filepath.Join(packagePath, "..", "data", filename)) + if err != nil { + return err + } + + for _, obj := range objs { + if _, err = cli.NfdV1alpha1().NodeFeatureGroups(namespace).Create(ctx, obj, metav1.CreateOptions{}); err != nil { + return err + } + } + return nil +} + // UpdateNodeFeatureRulesFromFile updates existing NodeFeatureRule object from a given file located under test data directory. func UpdateNodeFeatureRulesFromFile(ctx context.Context, cli nfdclientset.Interface, filename string) error { objs, err := nodeFeatureRulesFromFile(filepath.Join(packagePath, "..", "data", filename)) @@ -238,6 +253,25 @@ func nodeFeatureRulesFromFile(path string) ([]*nfdv1alpha1.NodeFeatureRule, erro return crs, nil } +func nodeFeatureGroupsFromFile(path string) ([]*nfdv1alpha1.NodeFeatureGroup, error) { + objs, err := apiObjsFromFile(path, nfdscheme.Codecs.UniversalDeserializer()) + if err != nil { + return nil, err + } + + crs := make([]*nfdv1alpha1.NodeFeatureGroup, len(objs)) + + for i, obj := range objs { + var ok bool + crs[i], ok = obj.(*nfdv1alpha1.NodeFeatureGroup) + if !ok { + return nil, fmt.Errorf("unexpected type %t when reading %q", obj, path) + } + } + + return crs, nil +} + func init() { _, thisFile, _, _ := runtime.Caller(0) packagePath = filepath.Dir(thisFile) diff --git a/test/e2e/utils/rbac.go b/test/e2e/utils/rbac.go index c71e372486..902609eec3 100644 --- a/test/e2e/utils/rbac.go +++ b/test/e2e/utils/rbac.go @@ -186,6 +186,16 @@ func createClusterRoleMaster(ctx context.Context, cs clientset.Interface) (*rbac Resources: []string{"nodefeatures", "nodefeaturerules"}, Verbs: []string{"get", "list", "watch"}, }, + { + APIGroups: []string{"nfd.k8s-sigs.io"}, + Resources: []string{"nodefeaturegroups"}, + Verbs: []string{"get", "list", "watch", "update"}, + }, + { + APIGroups: []string{"nfd.k8s-sigs.io"}, + Resources: []string{"nodefeaturegroups/status"}, + Verbs: []string{"patch", "update"}, + }, }, } if *openShift {