From dea2041de4f9be0688b414d1e69783679b2d2dd4 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sun, 16 Jun 2024 21:58:26 +0000 Subject: [PATCH] WIP: Add a POC of an alternate partitioaing scheme Signed-off-by: Kevin Klues --- dra-evolution/pkg/api/poc.yaml | 289 +++++++++++++++++++++++++++++++++ 1 file changed, 289 insertions(+) create mode 100644 dra-evolution/pkg/api/poc.yaml diff --git a/dra-evolution/pkg/api/poc.yaml b/dra-evolution/pkg/api/poc.yaml new file mode 100644 index 0000000..8977a77 --- /dev/null +++ b/dra-evolution/pkg/api/poc.yaml @@ -0,0 +1,289 @@ +sharedAttributeGroups: +- name: common-attributes + attributes: + - name: product-name + string: Mock NVIDIA A100-SXM4-40GB + - name: brand + string: Nvidia + - name: architecture + string: Ampere + - name: cuda-compute-capability + version: 8.0.0 + - name: driver-version + version: 550.54.15 + - name: cuda-driver-version + version: 12.4.0 + +sharedCapacityTemplates: +- name: gpu-shared-resources + capacities: + - name: memory + quantity: 40Gi + - name: multiprocessors + quantity: "98" + - name: copy-engines + quantity: "7" + - name: decoders + quantity: "5" + - name: encoders + quantity: "0" + - name: jpeg-engines + quantity: "1" + - name: ofa-engines + quantity: "1" + - name: memory-slices + intRange: 0-7 + +deviceTemplates: +- name: gpu + sharedAttributeGroups: + - common-attributes + attributes: + - name: memory + quantity: 40Gi + - name: mig-capable + bool: true + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: multiprocessors + quantity: "98" + - name: copy-engines + quantity: "7" + - name: decoders + quantity: "5" + - name: encoders + quantity: "0" + - name: jpeg-engines + quantity: "1" + - name: ofa-engines + quantity: "1" + - name: memory-slices + intRange: 0-7 +- name: mig-1g.5gb + sharedAttributeGroups: + - common-attributes + attributes: + - name: mig-profile + string: 1g.5gb + - name: memory + quantity: 4864Mi + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: multiprocessors + quantity: "14" + - name: copy-engines + quantity: "1" + - name: decoders + quantity: "0" + - name: encoders + quantity: "0" + - name: jpeg-engines + quantity: "0" + - name: ofa-engines + quantity: "0" + +devices: +- name: gpu + namespace: gpu-0 + deviceTemplateName: gpu + attributes: + - name: index + string: "0" + - name: minor + string: "0" + - name: uuid + string: GPU-0eaad900-5263-4fd6-b020-f03d30efac31 +- name: mig-1g.5gb-0 + namespace: gpu-0 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "0:0" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "0" +- name: mig-1g.5gb-1 + namespace: gpu-0 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "0:1" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "1" +- name: mig-1g.5gb-2 + namespace: gpu-0 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "0:2" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "2" +- name: mig-1g.5gb-3 + namespace: gpu-0 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "0:3" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "3" +- name: mig-1g.5gb-4 + namespace: gpu-0 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "0:4" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "4" +- name: mig-1g.5gb-5 + namespace: gpu-0 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "0:5" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "5" +- name: mig-1g.5gb-6 + namespace: gpu-0 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "0:6" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "6" +- name: gpu + namespace: gpu-1 + deviceTemplateName: gpu + attributes: + - name: index + string: "1" + - name: minor + string: "1" + - name: uuid + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c +- name: mig-1g.5gb-0 + namespace: gpu-1 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "1:0" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "0" +- name: mig-1g.5gb-1 + namespace: gpu-1 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "1:1" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "1" +- name: mig-1g.5gb-2 + namespace: gpu-1 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "1:2" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "2" +- name: mig-1g.5gb-3 + namespace: gpu-1 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "1:3" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "3" +- name: mig-1g.5gb-4 + namespace: gpu-1 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "1:4" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "4" +- name: mig-1g.5gb-5 + namespace: gpu-1 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "1:5" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "5" +- name: mig-1g.5gb-6 + namespace: gpu-1 + deviceTemplateName: mig-1g.5gb + attributes: + - name: index + string: "1:6" + - name: parentUUID + string: GPU-4404041a-04cf-1ccf-9e70-f139a9b1e23c + sharedCapacitiesConsumed: + - sharedCapacityTemplateName: gpu-shared-resources + capacities: + - name: memory-slices + intRange: "6"