From cb947ae856e0dc8a421611ee2de6b4d46062e92a Mon Sep 17 00:00:00 2001 From: Justin Riley Date: Tue, 12 Sep 2023 16:20:58 -0400 Subject: [PATCH 1/2] add operators for nvidia gpus This adds the following bundles to the nerc-ocp-prod overlay: - node-feature-discovery (used by gpu operator to find GPU nodes) - nvidia-gpu-operator --- cluster-scope/overlays/nerc-ocp-prod/kustomization.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cluster-scope/overlays/nerc-ocp-prod/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-prod/kustomization.yaml index c286d0c9..eaf8ec19 100644 --- a/cluster-scope/overlays/nerc-ocp-prod/kustomization.yaml +++ b/cluster-scope/overlays/nerc-ocp-prod/kustomization.yaml @@ -16,6 +16,8 @@ resources: - ../../bundles/amq-streams-operator - ../../bundles/openshift-pipelines-operator - ../../bundles/rhods-operator +- ../../bundles/node-feature-discovery +- ../../bundles/nvidia-gpu-operator - feature/odf - feature/custom-routes - ../../base/core/namespaces/openshift-gitops From d62ca0b352b2f8c0f862cc659199c82a714bb4fa Mon Sep 17 00:00:00 2001 From: Justin Riley Date: Thu, 14 Sep 2023 14:22:25 -0400 Subject: [PATCH 2/2] add nfd config for prod cluster Needed to configure the node feature discovery instance --- .../overlays/nerc-ocp-prod/kustomization.yaml | 1 + .../nodefeaturediscoveries/kustomization.yaml | 4 ++ .../nodefeaturediscoveries/nfd-instance.yaml | 60 +++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 cluster-scope/overlays/nerc-ocp-prod/nodefeaturediscoveries/kustomization.yaml create mode 100644 cluster-scope/overlays/nerc-ocp-prod/nodefeaturediscoveries/nfd-instance.yaml diff --git a/cluster-scope/overlays/nerc-ocp-prod/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-prod/kustomization.yaml index eaf8ec19..cf86d161 100644 --- a/cluster-scope/overlays/nerc-ocp-prod/kustomization.yaml +++ b/cluster-scope/overlays/nerc-ocp-prod/kustomization.yaml @@ -31,6 +31,7 @@ resources: - certificates - consolelinks - odhdashboardconfigs +- nodefeaturediscoveries components: - ../../components/argocd-skip-dryrun diff --git a/cluster-scope/overlays/nerc-ocp-prod/nodefeaturediscoveries/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-prod/nodefeaturediscoveries/kustomization.yaml new file mode 100644 index 00000000..9aaa34ca --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-prod/nodefeaturediscoveries/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - nfd-instance.yaml diff --git a/cluster-scope/overlays/nerc-ocp-prod/nodefeaturediscoveries/nfd-instance.yaml b/cluster-scope/overlays/nerc-ocp-prod/nodefeaturediscoveries/nfd-instance.yaml new file mode 100644 index 00000000..e2715ae0 --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-prod/nodefeaturediscoveries/nfd-instance.yaml @@ -0,0 +1,60 @@ +apiVersion: nfd.openshift.io/v1 +kind: NodeFeatureDiscovery +metadata: + name: nfd-instance + namespace: openshift-nfd +spec: + customConfig: + configData: "" + instance: "" + operand: + image: registry.redhat.io/openshift4/ose-node-feature-discovery:v4.11 + imagePullPolicy: Always + servicePort: 0 + topologyupdater: false + workerConfig: + configData: | + core: + sleepInterval: 60s + sources: + cpu: + cpuid: + # NOTE: whitelist has priority over blacklist + attributeBlacklist: + - "BMI1" + - "BMI2" + - "CLMUL" + - "CMOV" + - "CX16" + - "ERMS" + - "F16C" + - "HTT" + - "LZCNT" + - "MMX" + - "MMXEXT" + - "NX" + - "POPCNT" + - "RDRAND" + - "RDSEED" + - "RDTSCP" + - "SGX" + - "SSE" + - "SSE2" + - "SSE3" + - "SSE4.1" + - "SSE4.2" + - "SSSE3" + attributeWhitelist: + kernel: + configOpts: + - "NO_HZ" + - "X86" + - "DMI" + pci: + deviceClassWhitelist: + - "0200" + - "0300" + - "0302" + deviceLabelFields: + - "vendor" + - "class"