From 73c1f81fa81c790246d111b1da99de4c7b17106d Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Sat, 23 Mar 2024 18:34:28 +0200 Subject: [PATCH] RDMA subsystem is implemented via ib_core module config. --- api/v1/sriovnetworknodestate_types.go | 8 ++ api/v1/sriovnetworkpoolconfig_types.go | 4 + api/v1/zz_generated.deepcopy.go | 17 +++ ...k.openshift.io_sriovnetworknodestates.yaml | 18 +++ ....openshift.io_sriovnetworkpoolconfigs.yaml | 6 + controllers/drain_controller.go | 99 +--------------- controllers/helper.go | 109 +++++++++++++++++- .../sriovnetworknodepolicy_controller.go | 7 ++ ...k.openshift.io_sriovnetworknodestates.yaml | 18 +++ ....openshift.io_sriovnetworkpoolconfigs.yaml | 6 + pkg/consts/constants.go | 3 + pkg/daemon/daemon.go | 10 ++ pkg/daemon/writer.go | 7 ++ pkg/helper/mock/mock_helper.go | 29 +++++ .../internal/lib/netlink/mock/mock_netlink.go | 15 +++ pkg/host/internal/lib/netlink/netlink.go | 7 ++ pkg/host/internal/network/network.go | 31 +++++ pkg/host/internal/network/network_test.go | 31 +++++ pkg/host/mock/mock_host.go | 29 +++++ pkg/host/types/interfaces.go | 4 + pkg/utils/cluster.go | 3 +- 21 files changed, 357 insertions(+), 104 deletions(-) diff --git a/api/v1/sriovnetworknodestate_types.go b/api/v1/sriovnetworknodestate_types.go index 4b90d61d2..e5f59d71c 100644 --- a/api/v1/sriovnetworknodestate_types.go +++ b/api/v1/sriovnetworknodestate_types.go @@ -27,6 +27,7 @@ import ( type SriovNetworkNodeStateSpec struct { Interfaces Interfaces `json:"interfaces,omitempty"` Bridges Bridges `json:"bridges,omitempty"` + System System `json:"system,omitempty"` } type Interfaces []Interface @@ -114,10 +115,17 @@ type OVSUplinkConfigExt struct { Interface OVSInterfaceConfig `json:"interface,omitempty"` } +type System struct { + // +kubebuilder:validation:Enum=shared;exclusive + //RDMA subsystem. Allowed value "shared", "exclusive". + RdmaMode string `json:"rdmaMode,omitempty"` +} + // SriovNetworkNodeStateStatus defines the observed state of SriovNetworkNodeState type SriovNetworkNodeStateStatus struct { Interfaces InterfaceExts `json:"interfaces,omitempty"` Bridges Bridges `json:"bridges,omitempty"` + System System `json:"system,omitempty"` SyncStatus string `json:"syncStatus,omitempty"` LastSyncError string `json:"lastSyncError,omitempty"` } diff --git a/api/v1/sriovnetworkpoolconfig_types.go b/api/v1/sriovnetworkpoolconfig_types.go index c6e710a99..011ffc7d9 100644 --- a/api/v1/sriovnetworkpoolconfig_types.go +++ b/api/v1/sriovnetworkpoolconfig_types.go @@ -21,6 +21,10 @@ type SriovNetworkPoolConfigSpec struct { // Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards, // even if maxUnavailable is greater than one. MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` + + // +kubebuilder:validation:Enum=shared;exclusive + // RDMA subsystem. Allowed value "shared", "exclusive". + RdmaMode string `json:"rdmaMode,omitempty"` } type OvsHardwareOffloadConfig struct { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index fc9477593..0209c0573 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -783,6 +783,7 @@ func (in *SriovNetworkNodeStateSpec) DeepCopyInto(out *SriovNetworkNodeStateSpec } } in.Bridges.DeepCopyInto(&out.Bridges) + out.System = in.System } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetworkNodeStateSpec. @@ -806,6 +807,7 @@ func (in *SriovNetworkNodeStateStatus) DeepCopyInto(out *SriovNetworkNodeStateSt } } in.Bridges.DeepCopyInto(&out.Bridges) + out.System = in.System } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetworkNodeStateStatus. @@ -1066,6 +1068,21 @@ func (in *SriovOperatorConfigStatus) DeepCopy() *SriovOperatorConfigStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *System) DeepCopyInto(out *System) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new System. +func (in *System) DeepCopy() *System { + if in == nil { + return nil + } + out := new(System) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TrunkConfig) DeepCopyInto(out *TrunkConfig) { *out = *in diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index c5bf230c3..31ddf3bf1 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -174,6 +174,15 @@ spec: - pciAddress type: object type: array + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object status: description: SriovNetworkNodeStateStatus defines the observed state of @@ -335,6 +344,15 @@ spec: type: string syncStatus: type: string + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object type: object served: true diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index 2cb2ece31..3d8a6a105 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -111,6 +111,12 @@ spec: Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/controllers/drain_controller.go b/controllers/drain_controller.go index 86da909d8..b96458fa7 100644 --- a/controllers/drain_controller.go +++ b/controllers/drain_controller.go @@ -24,11 +24,8 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" @@ -48,13 +45,6 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var ( - oneNode = intstr.FromInt32(1) - defaultNpcl = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ - MaxUnavailable: &oneNode, - NodeSelector: &metav1.LabelSelector{}}} -) - type DrainReconcile struct { client.Client Scheme *runtime.Scheme @@ -346,94 +336,7 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) ( } func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { - logger := log.FromContext(ctx) - logger.Info("findNodePoolConfig():") - // get all the sriov network pool configs - npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} - err := dr.List(ctx, npcl) - if err != nil { - logger.Error(err, "failed to list sriovNetworkPoolConfig") - return nil, nil, err - } - - selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} - nodesInPools := map[string]interface{}{} - - for _, npc := range npcl.Items { - // we skip hw offload objects - if npc.Spec.OvsHardwareOffloadConfig.Name != "" { - continue - } - - if npc.Spec.NodeSelector == nil { - npc.Spec.NodeSelector = &metav1.LabelSelector{} - } - - selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - if selector.Matches(labels.Set(node.Labels)) { - selectedNpcl = append(selectedNpcl, npc.DeepCopy()) - } - - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", - "machineConfigPoolName", npc, - "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - for _, nodeName := range nodeList.Items { - nodesInPools[nodeName.Name] = nil - } - } - - if len(selectedNpcl) > 1 { - // don't allow the node to be part of multiple pools - err = fmt.Errorf("node is part of more then one pool") - logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) - return nil, nil, err - } else if len(selectedNpcl) == 1 { - // found one pool for our node - logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) - selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) - return nil, nil, err - } - - // list all the nodes that are also part of this pool and return them - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) - return nil, nil, err - } - - return selectedNpcl[0], nodeList.Items, nil - } else { - // in this case we get all the nodes and remove the ones that already part of any pool - logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultNpcl) - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList) - if err != nil { - logger.Error(err, "failed to list all the nodes") - return nil, nil, err - } - - defaultNodeLists := []corev1.Node{} - for _, nodeObj := range nodeList.Items { - if _, exist := nodesInPools[nodeObj.Name]; !exist { - defaultNodeLists = append(defaultNodeLists, nodeObj) - } - } - return defaultNpcl, defaultNodeLists, nil - } + return findNodePoolConfig(ctx, node, dr.Client) } // SetupWithManager sets up the controller with the Manager. diff --git a/controllers/helper.go b/controllers/helper.go index 9ff735473..b90ad44f8 100644 --- a/controllers/helper.go +++ b/controllers/helper.go @@ -30,9 +30,12 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" kscheme "k8s.io/client-go/kubernetes/scheme" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -47,10 +50,17 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var webhooks = map[string](string){ - constants.InjectorWebHookName: constants.InjectorWebHookPath, - constants.OperatorWebHookName: constants.OperatorWebHookPath, -} +var ( + webhooks = map[string](string){ + constants.InjectorWebHookName: constants.InjectorWebHookPath, + constants.OperatorWebHookName: constants.OperatorWebHookPath, + } + oneNode = intstr.FromInt32(1) + defaultPoolConfig = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ + MaxUnavailable: &oneNode, + NodeSelector: &metav1.LabelSelector{}, + RdmaMode: ""}} +) const ( clusterRoleResourceName = "ClusterRole" @@ -397,3 +407,94 @@ func updateDaemonsetNodeSelector(obj *uns.Unstructured, nodeSelector map[string] } return nil } + +func findNodePoolConfig(ctx context.Context, node *corev1.Node, c k8sclient.Client) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { + logger := log.FromContext(ctx) + logger.Info("FindNodePoolConfig():") + // get all the sriov network pool configs + npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} + err := c.List(ctx, npcl) + if err != nil { + logger.Error(err, "failed to list sriovNetworkPoolConfig") + return nil, nil, err + } + + selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} + nodesInPools := map[string]interface{}{} + + for _, npc := range npcl.Items { + // we skip hw offload objects + if npc.Spec.OvsHardwareOffloadConfig.Name != "" { + continue + } + + if npc.Spec.NodeSelector == nil { + npc.Spec.NodeSelector = &metav1.LabelSelector{} + } + + selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + if selector.Matches(labels.Set(node.Labels)) { + selectedNpcl = append(selectedNpcl, npc.DeepCopy()) + } + + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", + "machineConfigPoolName", npc, + "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + for _, nodeName := range nodeList.Items { + nodesInPools[nodeName.Name] = nil + } + } + + if len(selectedNpcl) > 1 { + // don't allow the node to be part of multiple pools + err = fmt.Errorf("node is part of more then one pool") + logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) + return nil, nil, err + } else if len(selectedNpcl) == 1 { + // found one pool for our node + logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) + selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) + return nil, nil, err + } + + // list all the nodes that are also part of this pool and return them + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) + return nil, nil, err + } + + return selectedNpcl[0], nodeList.Items, nil + } else { + // in this case we get all the nodes and remove the ones that already part of any pool + logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultPoolConfig) + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList) + if err != nil { + logger.Error(err, "failed to list all the nodes") + return nil, nil, err + } + + defaultNodeLists := []corev1.Node{} + for _, nodeObj := range nodeList.Items { + if _, exist := nodesInPools[nodeObj.Name]; !exist { + defaultNodeLists = append(defaultNodeLists, nodeObj) + } + } + return defaultPoolConfig, defaultNodeLists, nil + } +} diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index be46880b7..1d2811fac 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -272,6 +272,13 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con ns.Name = node.Name ns.Namespace = vars.Namespace j, _ := json.Marshal(ns) + netPoolConfig, _, err := findNodePoolConfig(ctx, &node, r.Client) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node") + } + if netPoolConfig != nil { + ns.Spec.System.RdmaMode = netPoolConfig.Spec.RdmaMode + } logger.V(2).Info("SriovNetworkNodeState CR", "content", j) if err := r.syncSriovNetworkNodeState(ctx, dc, npl, ns, &node); err != nil { logger.Error(err, "Fail to sync", "SriovNetworkNodeState", ns.Name) diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index c5bf230c3..31ddf3bf1 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -174,6 +174,15 @@ spec: - pciAddress type: object type: array + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object status: description: SriovNetworkNodeStateStatus defines the observed state of @@ -335,6 +344,15 @@ spec: type: string syncStatus: type: string + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object type: object served: true diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index 2cb2ece31..3d8a6a105 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -111,6 +111,12 @@ spec: Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index f7025c90d..66a5ad2b5 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -54,6 +54,9 @@ const ( VdpaTypeVirtio = "virtio" VdpaTypeVhost = "vhost" + RdmaSubsystemModeShared = "shared" + RdmaSubsystemModeExclusive = "exclusive" + ClusterTypeOpenshift = "openshift" ClusterTypeKubernetes = "kubernetes" diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index ff7f326dc..0867685dc 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -429,6 +429,16 @@ func (dn *Daemon) nodeStateSyncHandler() error { reqReboot = reqReboot || r } + if dn.currentNodeState.Status.System.RdmaMode != dn.desiredNodeState.Spec.System.RdmaMode { + err = dn.HostHelpers.SetRDMASubsystem(dn.desiredNodeState.Spec.System.RdmaMode) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to set RDMA subsystem") + return err + } + reqReboot = true + reqDrain = true + } + // When running using systemd check if the applied configuration is the latest one // or there is a new config we need to apply // When using systemd configuration we write the file diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index 09d06d8f9..60d4e8d91 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -118,6 +118,7 @@ func (w *NodeStateStatusWriter) pollNicStatus() error { log.Log.V(2).Info("pollNicStatus()") var iface []sriovnetworkv1.InterfaceExt var bridges sriovnetworkv1.Bridges + var rdmaMode string var err error if vars.PlatformType == consts.VirtualOpenStack { @@ -138,8 +139,14 @@ func (w *NodeStateStatusWriter) pollNicStatus() error { } } + rdmaMode, err = w.hostHelper.DiscoverRDMASubsystem() + if err != nil { + return err + } + w.status.Interfaces = iface w.status.Bridges = bridges + w.status.System.RdmaMode = rdmaMode return nil } diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index 432d741be..b413ecdee 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -294,6 +294,21 @@ func (mr *MockHostHelpersInterfaceMockRecorder) DiscoverBridges() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverBridges", reflect.TypeOf((*MockHostHelpersInterface)(nil).DiscoverBridges)) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockHostHelpersInterface) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockHostHelpersInterfaceMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockHostHelpersInterface)(nil).DiscoverRDMASubsystem)) +} + // DiscoverSriovDevices mocks base method. func (m *MockHostHelpersInterface) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]v1.InterfaceExt, error) { m.ctrl.T.Helper() @@ -1044,6 +1059,20 @@ func (mr *MockHostHelpersInterfaceMockRecorder) SetNicSriovMode(pciAddr, mode in return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetNicSriovMode", reflect.TypeOf((*MockHostHelpersInterface)(nil).SetNicSriovMode), pciAddr, mode) } +// SetRDMASubsystem mocks base method. +func (m *MockHostHelpersInterface) SetRDMASubsystem(mode string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetRDMASubsystem", mode) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetRDMASubsystem indicates an expected call of SetRDMASubsystem. +func (mr *MockHostHelpersInterfaceMockRecorder) SetRDMASubsystem(mode interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetRDMASubsystem", reflect.TypeOf((*MockHostHelpersInterface)(nil).SetRDMASubsystem), mode) +} + // SetSriovNumVfs mocks base method. func (m *MockHostHelpersInterface) SetSriovNumVfs(pciAddr string, numVfs int) error { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/mock/mock_netlink.go b/pkg/host/internal/lib/netlink/mock/mock_netlink.go index 5b3bcc790..758346a3f 100644 --- a/pkg/host/internal/lib/netlink/mock/mock_netlink.go +++ b/pkg/host/internal/lib/netlink/mock/mock_netlink.go @@ -145,6 +145,21 @@ func (mr *MockNetlinkLibMockRecorder) DevlinkSetDeviceParam(bus, device, param, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DevlinkSetDeviceParam", reflect.TypeOf((*MockNetlinkLib)(nil).DevlinkSetDeviceParam), bus, device, param, cmode, value) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockNetlinkLib) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockNetlinkLibMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockNetlinkLib)(nil).DiscoverRDMASubsystem)) +} + // IsLinkAdminStateUp mocks base method. func (m *MockNetlinkLib) IsLinkAdminStateUp(link netlink.Link) bool { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/netlink.go b/pkg/host/internal/lib/netlink/netlink.go index ed063834e..7d857921d 100644 --- a/pkg/host/internal/lib/netlink/netlink.go +++ b/pkg/host/internal/lib/netlink/netlink.go @@ -68,6 +68,8 @@ type NetlinkLib interface { RdmaLinkByName(name string) (*netlink.RdmaLink, error) // IsLinkAdminStateUp checks if the admin state of a link is up IsLinkAdminStateUp(link Link) bool + // DiscoverRDMASubsystem returns RDMA subsystem mode + DiscoverRDMASubsystem() (string, error) } type libWrapper struct{} @@ -185,3 +187,8 @@ func (w *libWrapper) RdmaLinkByName(name string) (*netlink.RdmaLink, error) { func (w *libWrapper) IsLinkAdminStateUp(link Link) bool { return link.Attrs().Flags&net.FlagUp == 1 } + +// DiscoverRDMASubsystem returns RDMA subsystem mode +func (w *libWrapper) DiscoverRDMASubsystem() (string, error) { + return netlink.RdmaSystemGetNetnsMode() +} diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index ef85ad24a..940c4b248 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -429,3 +429,34 @@ func (n *network) GetPciAddressFromInterfaceName(interfaceName string) (string, log.Log.V(2).Info("GetPciAddressFromInterfaceName(): result", "interface", interfaceName, "pci address", pciAddress) return pciAddress, nil } + +func (n *network) DiscoverRDMASubsystem() (string, error) { + log.Log.Info("DiscoverRDMASubsystem(): retrieving RDMA subsystem mode") + subsystem, err := n.netlinkLib.DiscoverRDMASubsystem() + + if err != nil { + log.Log.Error(err, "DiscoverRDMASubsystem(): failed to get RDMA subsystem mode") + return "", err + } + + return subsystem, nil +} + +func (n *network) SetRDMASubsystem(mode string) error { + log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode") + + modeValue := 1 + if mode == "exclusive" { + modeValue = 0 + } + config := fmt.Sprintf("options ib_core netns_mode=%d\n", modeValue) + path := filepath.Join(vars.FilesystemRoot, consts.Host, "etc", "modprobe.d", "ib_core.conf") + err := os.WriteFile(path, []byte(config), 0644) + + if err != nil { + log.Log.Error(err, "SetRDMASubsystem(): failed to write ib_core config") + return fmt.Errorf("failed to write ib_core config: %v", err) + } + + return nil +} diff --git a/pkg/host/internal/network/network_test.go b/pkg/host/internal/network/network_test.go index 19eb3f438..51c56b875 100644 --- a/pkg/host/internal/network/network_test.go +++ b/pkg/host/internal/network/network_test.go @@ -283,4 +283,35 @@ var _ = Describe("Network", func() { Expect(pci).To(Equal("0000:3b:00.0")) }) }) + Context("DiscoverRDMASubsystem", func() { + It("Should get RDMA Subsystem using netlink", func() { + netlinkLibMock.EXPECT().DiscoverRDMASubsystem().Return("shared", nil) + + pci, err := n.DiscoverRDMASubsystem() + Expect(err).NotTo(HaveOccurred()) + Expect(pci).To(Equal("shared")) + }) + }) + Context("SetRDMASubsystem", func() { + It("Should set RDMA Subsystem shared mode", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/host/etc/modprobe.d"}, + Files: map[string][]byte{ + "/host/etc/modprobe.d/ib_core.conf": {}, + }, + }) + Expect(n.SetRDMASubsystem("shared")).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=1\n") + }) + It("Should set RDMA Subsystem exclusive mode", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/host/etc/modprobe.d"}, + Files: map[string][]byte{ + "/host/etc/modprobe.d/ib_core.conf": {}, + }, + }) + Expect(n.SetRDMASubsystem("exclusive")).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=0\n") + }) + }) }) diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index 5ebed46aa..095d270a9 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -264,6 +264,21 @@ func (mr *MockHostManagerInterfaceMockRecorder) DiscoverBridges() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverBridges", reflect.TypeOf((*MockHostManagerInterface)(nil).DiscoverBridges)) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockHostManagerInterface) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockHostManagerInterfaceMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockHostManagerInterface)(nil).DiscoverRDMASubsystem)) +} + // DiscoverSriovDevices mocks base method. func (m *MockHostManagerInterface) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]v1.InterfaceExt, error) { m.ctrl.T.Helper() @@ -859,6 +874,20 @@ func (mr *MockHostManagerInterfaceMockRecorder) SetNicSriovMode(pciAddr, mode in return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetNicSriovMode", reflect.TypeOf((*MockHostManagerInterface)(nil).SetNicSriovMode), pciAddr, mode) } +// SetRDMASubsystem mocks base method. +func (m *MockHostManagerInterface) SetRDMASubsystem(mode string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetRDMASubsystem", mode) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetRDMASubsystem indicates an expected call of SetRDMASubsystem. +func (mr *MockHostManagerInterfaceMockRecorder) SetRDMASubsystem(mode interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetRDMASubsystem", reflect.TypeOf((*MockHostManagerInterface)(nil).SetRDMASubsystem), mode) +} + // SetSriovNumVfs mocks base method. func (m *MockHostManagerInterface) SetSriovNumVfs(pciAddr string, numVfs int) error { m.ctrl.T.Helper() diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index c6e0c8faf..6844ee5ae 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -90,6 +90,10 @@ type NetworkInterface interface { GetNetDevLinkAdminState(ifaceName string) string // GetPciAddressFromInterfaceName parses sysfs to get pci address of an interface by name GetPciAddressFromInterfaceName(interfaceName string) (string, error) + // DiscoverRDMASubsystem returns RDMA subsystem mode + DiscoverRDMASubsystem() (string, error) + // SetRDMASubsystem changes RDMA subsystem mode + SetRDMASubsystem(mode string) error } type ServiceInterface interface { diff --git a/pkg/utils/cluster.go b/pkg/utils/cluster.go index 6f8d72e07..c5f1f333a 100644 --- a/pkg/utils/cluster.go +++ b/pkg/utils/cluster.go @@ -5,13 +5,12 @@ import ( "fmt" "os" - "sigs.k8s.io/controller-runtime/pkg/log" - configv1 "github.com/openshift/api/config/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" )