diff --git a/.wordlist-md b/.wordlist-md index fa185fbd..85baa39c 100644 --- a/.wordlist-md +++ b/.wordlist-md @@ -19,12 +19,14 @@ ClusterTask CodeReady DNS Dev +DevSpaces DevWorkspace DevWorkspaces Devfile DotNET Eventing FullAccess +GPUs Gi GitOps HTPasswd @@ -101,6 +103,7 @@ arn aws canada ceph +che checluster cicd cli @@ -112,10 +115,12 @@ configmap datasource deployable dev +devfile devspaces devworkspace devworkspaces disableNameSuffixHash +dn dns dotnet ec @@ -180,6 +185,7 @@ prometheus redhat redistributions repo +rhel rhpds runtime sagemaker @@ -194,8 +200,10 @@ sublicense tekton templating thanos +traefik truly vSphere +vscode vsphere wordlist workspaces diff --git a/devspaces/NOTES.md b/devspaces/NOTES.md new file mode 100644 index 00000000..11a1c830 --- /dev/null +++ b/devspaces/NOTES.md @@ -0,0 +1,34 @@ +# General Notes + +## Key images + +Init containers + +``` +# che / vscode image +registry.redhat.io/devspaces/code-rhel8 +# init container copies bins to `/checode` + +# project clone +registry.redhat.io/devworkspace/devworkspace-project-clone-rhel8 +``` + +Other containers + +``` +# che gateway +registry.redhat.io/devspaces/traefik-rhel8 + +# developer tools +https://github.com/devfile/developer-images + +# che docs +https://eclipse.dev/che/docs/stable/overview/introduction-to-eclipse-che/ +https://github.com/eclipse/che +``` + +Dashboard / devfile registry + +``` +https://github.com/eclipse-che/che-devfile-registry +``` diff --git a/devspaces/aggregate/overlays/default/kustomization.yaml b/devspaces/aggregate/overlays/default/kustomization.yaml new file mode 100644 index 00000000..713d4b98 --- /dev/null +++ b/devspaces/aggregate/overlays/default/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +commonAnnotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true + +resources: + - ../../../instance/overlays/default + - ../../../operator/overlays/stable diff --git a/devspaces/aggregate/overlays/fix-autoscale/kustomization.yaml b/devspaces/aggregate/overlays/fix-autoscale/kustomization.yaml new file mode 100644 index 00000000..1fabfb08 --- /dev/null +++ b/devspaces/aggregate/overlays/fix-autoscale/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +commonAnnotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true + +resources: + - ../../../instance/overlays/timeout-12m + - ../../../operator/overlays/stable diff --git a/devspaces/instance/base/checluster.yaml b/devspaces/instance/base/checluster.yaml new file mode 100644 index 00000000..e5ef05a9 --- /dev/null +++ b/devspaces/instance/base/checluster.yaml @@ -0,0 +1,63 @@ +apiVersion: org.eclipse.che/v2 +kind: CheCluster +metadata: + annotations: + argocd.argoproj.io/sync-wave: "5" + name: devspaces +spec: + components: + cheServer: + debug: false + logLevel: INFO + extraProperties: + CHE_SYSTEM_ADMIN__NAME: 'opentlc-mgr' + dashboard: + headerMessage: + show: false + text: >- + It's time to get your Dev on! + # database: + # credentialsSecretName: postgres-credentials + # externalDb: false + # postgresDb: devspaces + # # BUG: can not change postgresHostNamae + # postgresHostName: postgres + # postgresPort: '5432' + # pvc: + # claimSize: 1Gi + devfileRegistry: + # deployment: + # containers: + # - name: devfile-registry + # # image: quay.io/eclipse/che-devfile-registry:next + # image: registry.redhat.io/devspaces/devfileregistry-rhel8:latest + externalDevfileRegistries: + - url: https://eclipse-che.github.io/che-devfile-registry/main + metrics: + enable: true + # pluginRegistry: + # openVSXURL: "https://open-vsx.org" + # openVSXURL: "https://marketplace.visualstudio.com" + containerRegistry: {} + devEnvironments: + startTimeoutSeconds: 180 + secondsOfRunBeforeIdling: -1 + maxNumberOfRunningWorkspacesPerUser: 2 + maxNumberOfWorkspacesPerUser: -1 + containerBuildConfiguration: + openShiftSecurityContextConstraint: container-build + disableContainerBuildCapabilities: true + defaultEditor: che-incubator/che-code/latest + # defaultComponents: + # - container: + # sourceMapping: /projects + # image: registry.redhat.io/devspaces/udi-rhel8:latest + # name: universal-developer-image + defaultNamespace: + autoProvision: true + template: workspace- + secondsOfInactivityBeforeIdling: 1800 + storage: + pvcStrategy: per-user + gitServices: {} + networking: {} diff --git a/devspaces/instance/base/devworkspace-config.yaml b/devspaces/instance/base/devworkspace-config.yaml new file mode 100644 index 00000000..d6343121 --- /dev/null +++ b/devspaces/instance/base/devworkspace-config.yaml @@ -0,0 +1,9 @@ +apiVersion: controller.devfile.io/v1alpha1 +kind: DevWorkspaceOperatorConfig +metadata: + name: devworkspace-config +config: + workspace: + # kludge: allow cluster autoscaling + ignoredUnrecoverableEvents: + - FailedScheduling diff --git a/devspaces/instance/base/kustomization.yaml b/devspaces/instance/base/kustomization.yaml new file mode 100644 index 00000000..178fdc23 --- /dev/null +++ b/devspaces/instance/base/kustomization.yaml @@ -0,0 +1,12 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +commonLabels: + component: devspaces + +namespace: devspaces + +resources: + - checluster.yaml + - devworkspace-config.yaml + - namespace.yaml diff --git a/devspaces/instance/base/namespace.yaml b/devspaces/instance/base/namespace.yaml new file mode 100644 index 00000000..2283637e --- /dev/null +++ b/devspaces/instance/base/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: devspaces + annotations: + openshift.io/display-name: "DevSpaces Infra" + argocd.argoproj.io/sync-wave: "0" diff --git a/devspaces/instance/base/network-policy.yaml b/devspaces/instance/base/network-policy.yaml new file mode 100644 index 00000000..38c9c782 --- /dev/null +++ b/devspaces/instance/base/network-policy.yaml @@ -0,0 +1,13 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-from-openshift-devspaces +spec: + ingress: + - from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openshift-devspaces + podSelector: {} + policyTypes: + - Ingress diff --git a/devspaces/instance/overlays/default/README.md b/devspaces/instance/overlays/default/README.md new file mode 100644 index 00000000..635e9907 --- /dev/null +++ b/devspaces/instance/overlays/default/README.md @@ -0,0 +1,3 @@ +# Dev Spaces + +[OpenShift Dev Spaces Docs](https://access.redhat.com/documentation/en-us/red_hat_openshift_dev_spaces) diff --git a/nvidia-gpu-operator/operator/base/kustomization.yaml b/devspaces/instance/overlays/default/kustomization.yaml similarity index 53% rename from nvidia-gpu-operator/operator/base/kustomization.yaml rename to devspaces/instance/overlays/default/kustomization.yaml index 1e66bd5f..774a422d 100644 --- a/nvidia-gpu-operator/operator/base/kustomization.yaml +++ b/devspaces/instance/overlays/default/kustomization.yaml @@ -2,6 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - namespace.yaml - - operator-group.yaml - - subscription.yaml + - ../../base diff --git a/devspaces/instance/overlays/low-idle/kustomization.yaml b/devspaces/instance/overlays/low-idle/kustomization.yaml new file mode 100644 index 00000000..84a9d4bc --- /dev/null +++ b/devspaces/instance/overlays/low-idle/kustomization.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: devspaces + +resources: + - ../../base + +patches: + - target: + group: org.eclipse.che + kind: CheCluster + name: devspaces + patch: |- + - op: replace + path: /spec/devEnvironments/secondsOfRunBeforeIdling + value: 300 diff --git a/devspaces/instance/overlays/timeout-12m/kustomization.yaml b/devspaces/instance/overlays/timeout-12m/kustomization.yaml new file mode 100644 index 00000000..134e8cb0 --- /dev/null +++ b/devspaces/instance/overlays/timeout-12m/kustomization.yaml @@ -0,0 +1,24 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: devspaces + +resources: + - ../../base + +patches: + - target: + group: org.eclipse.che + kind: CheCluster + name: devspaces + patch: |- + - op: replace + path: /spec/components/dashboard/headerMessage/show + value: true + - op: replace + path: /spec/components/dashboard/headerMessage/text + value: | + Please be patient... GPUs may take up to 12 min to be available! + - op: replace + path: /spec/devEnvironments/startTimeoutSeconds + value: 720 diff --git a/gpu-operator-certified/instance/README.md b/gpu-operator-certified/instance/README.md index e69de29b..e238cc20 100644 --- a/gpu-operator-certified/instance/README.md +++ b/gpu-operator-certified/instance/README.md @@ -0,0 +1,51 @@ +# GPU Notes + +For more info please review the following: + +- [Demo GPUs on OpenShift](https://github.com/redhat-na-ssa/demo-ocp-gpu) + +## Instance Types + +AWS GPU Types: + +Multi-instance GPU (MIG) can be: + +- `p5.48xlarge` - 8 x H100 Tensor Core +- `p4d.24xlarge` - 8 x A100 Tensor Core + +Time-slicing GPU can be any Nvidia type (as documented by Nvidia): + +- P3 - V100 + - `p3.2xlarge` - 1 x V100 + - `p3.8xlarge` - 4 x V100 + - `p3.16xlarge` - 8 x V100 +- P2 - K80 + - `P2.xlarge` - 1 x K80 + - `P2.8xlarge` - 8 x K80 + - `P2.16xlarge` - 16 x K80 +- G5g - T4G + - `g5g.{,2,4,8}xlarge` - 1 x T4G + - `g5g.16xlarge`, `g5g.metal` - 2 x T4G +- G5 - A10G + - `g5.{,2,4,8,16}xlarge` - 1 x A10G + - `g5.{12,24}xlarge` - 4 x A10G + - `g5.48xlarge` - 8 x A10G +- G4dn - T4 + - `g4dn.{,2,4,8,16}xlarge` - 1 x T4 + - `g4dn.48xlarge` - 4 x T4 + - `g4dn.metal` - 8 x T4 +- G3 - M60 + - `g3s.xlarge` - 1 x M60 + - `g3.4xlarge` - 1 x M60 + - `g3.8xlarge` - 2 x M60 + - `g3.16xlarge` - 4 x M60 + +## Links + +- [Docs - AWS GPU Instances](https://aws.amazon.com/ec2/instance-types/#Accelerated_Computing) +- [Docs - Nvidia GPU Operator on Openshift](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/openshift/contents.html) +- [Docs - Nvidia GPU admin dashboard](https://docs.openshift.com/container-platform/4.11/monitoring/nvidia-gpu-admin-dashboard.html) +- [Docs - MIG support in OCP](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/openshift/mig-ocp.html) +- [Blog - RH Nvidia GPUs on OpenShift](https://cloud.redhat.com/blog/autoscaling-nvidia-gpus-on-red-hat-openshift) +- [Demo - GPU DevSpaces](https://github.com/bkoz/devspaces) +- [GPU Operator default config map](https://gitlab.com/nvidia/kubernetes/gpu-operator/-/blob/v23.6.1/assets/state-mig-manager/0400_configmap.yaml?ref_type=tags) \ No newline at end of file diff --git a/gpu-operator-certified/instance/base/cluster-policy.yaml b/gpu-operator-certified/instance/base/cluster-policy.yaml index 33712f0f..724b2026 100644 --- a/gpu-operator-certified/instance/base/cluster-policy.yaml +++ b/gpu-operator-certified/instance/base/cluster-policy.yaml @@ -1,4 +1,82 @@ -apiVersion: nvidia.com/v1 kind: ClusterPolicy +apiVersion: nvidia.com/v1 metadata: name: gpu-cluster-policy +spec: + operator: + defaultRuntime: crio + use_ocp_driver_toolkit: true + initContainer: {} + sandboxWorkloads: + enabled: false + defaultWorkload: container + driver: + enabled: true + upgradePolicy: + autoUpgrade: true + drain: + deleteEmptyDir: false + enable: false + force: false + timeoutSeconds: 300 + maxParallelUpgrades: 1 + maxUnavailable: 25% + podDeletion: + deleteEmptyDir: false + force: false + timeoutSeconds: 300 + waitForCompletion: + timeoutSeconds: 0 + repoConfig: + configMapName: '' + certConfig: + name: '' + licensingConfig: + nlsEnabled: false + configMapName: '' + virtualTopology: + config: '' + kernelModuleConfig: + name: '' + dcgmExporter: + enabled: true + config: + name: 'console-plugin-nvidia-gpu' + serviceMonitor: + enabled: true + dcgm: + enabled: true + daemonsets: + updateStrategy: RollingUpdate + rollingUpdate: + maxUnavailable: '1' + devicePlugin: + enabled: true + config: + name: '' + default: '' + gfd: + enabled: true + migManager: + enabled: true + nodeStatusExporter: + enabled: true + mig: + strategy: single + toolkit: + enabled: true + validator: + plugin: + env: + - name: WITH_WORKLOAD + value: 'true' + vgpuManager: + enabled: false + vgpuDeviceManager: + enabled: true + sandboxDevicePlugin: + enabled: true + vfioManager: + enabled: true + gds: + enabled: false diff --git a/gpu-operator-certified/instance/base/device-plugin-config.yaml b/gpu-operator-certified/instance/base/device-plugin-config.yaml new file mode 100644 index 00000000..47fa37a7 --- /dev/null +++ b/gpu-operator-certified/instance/base/device-plugin-config.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: device-plugin-config +data: {} diff --git a/gpu-operator-certified/instance/base/kustomization.yaml b/gpu-operator-certified/instance/base/kustomization.yaml index 0dfb6137..eead45d1 100644 --- a/gpu-operator-certified/instance/base/kustomization.yaml +++ b/gpu-operator-certified/instance/base/kustomization.yaml @@ -5,3 +5,4 @@ namespace: nvidia-gpu-operator resources: - cluster-policy.yaml + - device-plugin-config.yaml diff --git a/gpu-operator-certified/instance/overlays/mig-mixed/kustomization.yaml b/gpu-operator-certified/instance/overlays/mig-mixed/kustomization.yaml new file mode 100644 index 00000000..5abb963b --- /dev/null +++ b/gpu-operator-certified/instance/overlays/mig-mixed/kustomization.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base + +patches: + - target: + kind: ClusterPolicy + name: gpu-cluster-policy + patch: |- + - op: add + path: /spec/mig/strategy + value: mixed diff --git a/gpu-operator-certified/instance/overlays/mig-single/kustomization.yaml b/gpu-operator-certified/instance/overlays/mig-single/kustomization.yaml new file mode 100644 index 00000000..87472ae9 --- /dev/null +++ b/gpu-operator-certified/instance/overlays/mig-single/kustomization.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base + +patches: + - target: + kind: ClusterPolicy + name: gpu-cluster-policy + patch: |- + - op: add + path: /spec/mig/strategy + value: single diff --git a/gpu-operator-certified/instance/overlays/time-slicing-2/kustomization.yaml b/gpu-operator-certified/instance/overlays/time-slicing-2/kustomization.yaml new file mode 100644 index 00000000..1ed4b944 --- /dev/null +++ b/gpu-operator-certified/instance/overlays/time-slicing-2/kustomization.yaml @@ -0,0 +1,34 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base + +patches: + - target: + kind: ClusterPolicy + name: gpu-cluster-policy + patch: |- + - op: add + path: /spec/devicePlugin/config/name + value: device-plugin-config + - op: add + path: /spec/devicePlugin/config/default + value: Tesla-T4 + - op: replace + path: /spec/gfd/enabled + value: true + - target: + kind: ConfigMap + name: device-plugin-config + patch: |- + - op: add + path: /data + value: + Tesla-T4: |- + version: v1 + sharing: + timeSlicing: + resources: + - name: nvidia.com/gpu + replicas: 2 diff --git a/gpu-operator-certified/instance/overlays/time-slicing-4/kustomization.yaml b/gpu-operator-certified/instance/overlays/time-slicing-4/kustomization.yaml new file mode 100644 index 00000000..35fe72ba --- /dev/null +++ b/gpu-operator-certified/instance/overlays/time-slicing-4/kustomization.yaml @@ -0,0 +1,21 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../time-slicing-2 + +patches: + - target: + kind: ConfigMap + name: device-plugin-config + patch: |- + - op: add + path: /data + value: + Tesla-T4: |- + version: v1 + sharing: + timeSlicing: + resources: + - name: nvidia.com/gpu + replicas: 4 diff --git a/gpu-operator-certified/instance/overlays/time-slicing-8-a100/kustomization.yaml b/gpu-operator-certified/instance/overlays/time-slicing-8-a100/kustomization.yaml new file mode 100644 index 00000000..9b9570d1 --- /dev/null +++ b/gpu-operator-certified/instance/overlays/time-slicing-8-a100/kustomization.yaml @@ -0,0 +1,42 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base + +patches: + - target: + kind: ClusterPolicy + name: gpu-cluster-policy + patch: |- + - op: add + path: /spec/devicePlugin/config/name + value: device-plugin-config + - op: add + path: /spec/devicePlugin/config/default + value: A100-SXM4-40GB + - op: replace + path: /spec/gfd/enabled + value: true + - target: + kind: ConfigMap + name: device-plugin-config + patch: |- + - op: add + path: /data + value: + A100-SXM4-40GB: |- + version: v1 + sharing: + timeSlicing: + resources: + - name: nvidia.com/gpu + replicas: 8 + - name: nvidia.com/mig-1g.5gb + replicas: 1 + - name: nvidia.com/mig-2g.10gb + replicas: 2 + - name: nvidia.com/mig-3g.20gb + replicas: 3 + - name: nvidia.com/mig-7g.40gb + replicas: 7 diff --git a/nfd/aggregate/overlays/default/kustomization.yaml b/nfd/aggregate/overlays/default/kustomization.yaml index c666be10..303e9470 100644 --- a/nfd/aggregate/overlays/default/kustomization.yaml +++ b/nfd/aggregate/overlays/default/kustomization.yaml @@ -1,4 +1,3 @@ ---- apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/nfd/aggregate/overlays/only-nvidia/kustomization.yaml b/nfd/aggregate/overlays/only-nvidia/kustomization.yaml new file mode 100644 index 00000000..a4d3d612 --- /dev/null +++ b/nfd/aggregate/overlays/only-nvidia/kustomization.yaml @@ -0,0 +1,11 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +commonAnnotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true + +namespace: openshift-nfd + +resources: + - ../../../operator/overlays/stable + - ../../../instance/overlays/only-nvidia diff --git a/nfd/instance/README.md b/nfd/instance/README.md deleted file mode 100644 index 61c6c968..00000000 --- a/nfd/instance/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# OpenShift Node Feature Discovery (NFD) - -Installs a basic nodeFeatureDiscovery instance. - -## Prerequisites - -First, install the [OpenShift NFD Operator](../operator) in your cluster. - -Do not use the `base` directory directly, as you will need to patch the `channel` based on the version of OpenShift you are using, or the version of the operator you want to use. - -## Overlays - -The options for this operator are the following *overlays*: -* [default](overlays/default) - -### Default - -[default](overlays/default) configures a basic default configuration for a nodeFeatureDiscovery instance. For more details on customizing the NFD workers, refer to the [docs](https://kubernetes-sigs.github.io/node-feature-discovery/v0.10/advanced/worker-configuration-reference.html). - -## Usage - -If you have cloned the `gitops-catalog` repository, you can install the Storage System by running from the root `gitops-catalog` directory - -``` -oc apply -k nfd/instance/overlays/default -``` - -Or, without cloning: - -``` -oc apply -k https://github.com/redhat-cop/gitops-catalog/nfd/instance/overlays/default -``` - -As part of a different overlay in your own GitOps repo: - -``` -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -resources: - - github.com/redhat-cop/gitops-catalog/nfd/instance/overlays/default?ref=main -``` diff --git a/nfd/instance/base/kustomization.yaml b/nfd/instance/base/kustomization.yaml index 133b643b..309c6ea7 100644 --- a/nfd/instance/base/kustomization.yaml +++ b/nfd/instance/base/kustomization.yaml @@ -1,4 +1,3 @@ ---- apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/nfd/instance/base/node-feature-discovery.yaml b/nfd/instance/base/node-feature-discovery.yaml index 9f6c3b84..fa447eb8 100644 --- a/nfd/instance/base/node-feature-discovery.yaml +++ b/nfd/instance/base/node-feature-discovery.yaml @@ -13,8 +13,9 @@ spec: # matchOn: # - nodename: ["special-.*-node-.*"] operand: - image: >- - registry.redhat.io/openshift4/ose-node-feature-discovery@sha256:9c080fc2cd9d9cbca9ec360674e32fe54b3724ec87bedaa513ac3ee71cb14269 + # bug: an image has to be defined otherwise the deployment fails + # bug: this behavior recently changed + image: registry.redhat.io/openshift4/ose-node-feature-discovery:latest servicePort: 12000 workerConfig: configData: | diff --git a/nfd/instance/overlays/default/kustomization.yaml b/nfd/instance/overlays/default/kustomization.yaml index ef6e263c..774a422d 100644 --- a/nfd/instance/overlays/default/kustomization.yaml +++ b/nfd/instance/overlays/default/kustomization.yaml @@ -1,4 +1,3 @@ ---- apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/nvidia-gpu-operator/operator/overlays/default/kustomization.yaml b/nfd/instance/overlays/only-nvidia/kustomization.yaml similarity index 51% rename from nvidia-gpu-operator/operator/overlays/default/kustomization.yaml rename to nfd/instance/overlays/only-nvidia/kustomization.yaml index c771cd2a..4c67b875 100644 --- a/nvidia-gpu-operator/operator/overlays/default/kustomization.yaml +++ b/nfd/instance/overlays/only-nvidia/kustomization.yaml @@ -6,6 +6,6 @@ resources: patches: - target: - kind: Subscription - name: gpu-operator-certified - path: patch-channel.yaml + group: nfd.openshift.io + kind: NodeFeatureDiscovery + path: patch-node-feature-discovery.yaml diff --git a/nfd/instance/overlays/only-nvidia/patch-node-feature-discovery.yaml b/nfd/instance/overlays/only-nvidia/patch-node-feature-discovery.yaml new file mode 100644 index 00000000..27d2eac1 --- /dev/null +++ b/nfd/instance/overlays/only-nvidia/patch-node-feature-discovery.yaml @@ -0,0 +1,22 @@ +kind: NodeFeatureDiscovery +apiVersion: nfd.openshift.io/v1 +metadata: + name: nfd-instance +spec: + instance: '' + operand: + image: registry.redhat.io/openshift4/ose-node-feature-discovery:latest + servicePort: 12000 + topologyUpdater: false + workerConfig: + configData: | + core: + sleepInterval: 60s + sources: + pci: + deviceClassWhitelist: + - "0200" + - "03" + - "12" + deviceLabelFields: + - "vendor" diff --git a/nvidia-gpu-operator b/nvidia-gpu-operator new file mode 120000 index 00000000..b4c63bff --- /dev/null +++ b/nvidia-gpu-operator @@ -0,0 +1 @@ +gpu-operator-certified/ \ No newline at end of file diff --git a/nvidia-gpu-operator/README.md b/nvidia-gpu-operator/README.md deleted file mode 100644 index 0f21b90f..00000000 --- a/nvidia-gpu-operator/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# NVIDIA GPU Operator - -Installs the NVIDIA GPU Operator. - -## Prerequisites - -First, install the [NVIDIA GPU Operator](../operator) in your cluster. - -Do not use the `base` directory directly, as you will need to patch the `channel` based on the version of OpenShift you are using, or the version of the operator you want to use. - -## Overlays - -The options for this operator are the following *overlays*: -* [default](overlays/default) - -### Default - -[default](overlays/default) configures the NVIDIA GPU Operator. - -## Usage - -If you have cloned the `gitops-catalog` repository, you can install the Storage System by running from the root `gitops-catalog` directory - -``` -oc apply -k nvidia-gpu-operator/operator/overlays/default -``` - -Or, without cloning: - -``` -oc apply -k https://github.com/redhat-cop/gitops-catalog/nvidia-gpu-operator/instance/overlays/default -``` - -As part of a different overlay in your own GitOps repo: - -``` -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -resources: - - github.com/redhat-cop/gitops-catalog/nvidia-gpu-operator/instance/overlays/default?ref=main -``` \ No newline at end of file diff --git a/nvidia-gpu-operator/operator/base/namespace.yaml b/nvidia-gpu-operator/operator/base/namespace.yaml deleted file mode 100644 index 9f802932..00000000 --- a/nvidia-gpu-operator/operator/base/namespace.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - annotations: - openshift.io/display-name: "NVIDIA GPU Operator" - labels: - openshift.io/cluster-monitoring: "true" - name: nvidia-gpu-operator diff --git a/nvidia-gpu-operator/operator/base/operator-group.yaml b/nvidia-gpu-operator/operator/base/operator-group.yaml deleted file mode 100644 index 53acfaaf..00000000 --- a/nvidia-gpu-operator/operator/base/operator-group.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: operators.coreos.com/v1 -kind: OperatorGroup -metadata: - name: nvidia-gpu-operator-group - namespace: nvidia-gpu-operator -spec: - targetNamespaces: - - nvidia-gpu-operator diff --git a/nvidia-gpu-operator/operator/base/subscription.yaml b/nvidia-gpu-operator/operator/base/subscription.yaml deleted file mode 100644 index 322840eb..00000000 --- a/nvidia-gpu-operator/operator/base/subscription.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: operators.coreos.com/v1alpha1 -kind: Subscription -metadata: - name: gpu-operator-certified - namespace: nvidia-gpu-operator -spec: - channel: patch-me-see-overlays-dir - installPlanApproval: Automatic - name: gpu-operator-certified - source: certified-operators - sourceNamespace: openshift-marketplace diff --git a/nvidia-gpu-operator/operator/overlays/default/patch-channel.yaml b/nvidia-gpu-operator/operator/overlays/default/patch-channel.yaml deleted file mode 100644 index 6642eb17..00000000 --- a/nvidia-gpu-operator/operator/overlays/default/patch-channel.yaml +++ /dev/null @@ -1,3 +0,0 @@ -- op: replace - path: /spec/channel - value: stable