Introduce InstanceTypes

Signed-off-by: Andrei Kvapil <[email protected]>
aenix-io · Oct 10, 2024 · 7f623c5 · 7f623c5
1 parent 16d3979
commit 7f623c5
Show file tree

Hide file tree

Showing 18 changed files with 4,216 additions and 89 deletions.
diff --git a/packages/apps/kubernetes/Makefile b/packages/apps/kubernetes/Makefile
@@ -65,3 +65,7 @@ image-cluster-autoscaler:
 	echo "$(REGISTRY)/cluster-autoscaler:$(call settag,$(KUBERNETES_PKG_TAG))@$$(yq e '."containerimage.digest"' images/cluster-autoscaler.json -o json -r)" \
 		> images/cluster-autoscaler.tag
 	rm -f images/cluster-autoscaler.json
+
+prefs:
+	INSTANCE_TYPES=$(yq e '.metadata.name' -o=json -r ../../system/kubevirt-common-instancetypes/templates/instancetypes.yaml)
+	INSTANCE_PREFS=$(yq e '.metadata.name' -o=json -r ../../system/kubevirt-common-instancetypes/templates/preferences.yaml)
diff --git a/packages/apps/kubernetes/README.md b/packages/apps/kubernetes/README.md
@@ -50,3 +50,266 @@ kubectl get secret -n <namespace> kubernetes-<clusterName>-admin-kubeconfig -o g
 | `addons.fluxcd.enabled`              | Enables Flux CD                                                                    | `false` |
 | `addons.fluxcd.valuesOverride`       | Custom values to override                                                          | `{}`    |
 
+## U Series
+
+The U Series is quite neutral and provides resources for
+general purpose applications.
+
+*U* is the abbreviation for "Universal", hinting at the universal
+attitude towards workloads.
+
+VMs of instance types will share physical CPU cores on a
+time-slice basis with other VMs.
+
+### U Series Characteristics
+
+Specific characteristics of this series are:
+- *Burstable CPU performance* - The workload has a baseline compute
+  performance but is permitted to burst beyond this baseline, if
+  excess compute resources are available.
+- *vCPU-To-Memory Ratio (1:4)* - A vCPU-to-Memory ratio of 1:4, for less
+  noise per node.
+
+## O Series
+
+The O Series is based on the U Series, with the only difference
+being that memory is overcommitted.
+
+*O* is the abbreviation for "Overcommitted".
+
+### UO Series Characteristics
+
+Specific characteristics of this series are:
+- *Burstable CPU performance* - The workload has a baseline compute
+  performance but is permitted to burst beyond this baseline, if
+  excess compute resources are available.
+- *Overcommitted Memory* - Memory is over-committed in order to achieve
+  a higher workload density.
+- *vCPU-To-Memory Ratio (1:4)* - A vCPU-to-Memory ratio of 1:4, for less
+  noise per node.
+
+## CX Series
+
+The CX Series provides exclusive compute resources for compute
+intensive applications.
+
+*CX* is the abbreviation of "Compute Exclusive".
+
+The exclusive resources are given to the compute threads of the
+VM. In order to ensure this, some additional cores (depending
+on the number of disks and NICs) will be requested to offload
+the IO threading from cores dedicated to the workload.
+In addition, in this series, the NUMA topology of the used
+cores is provided to the VM.
+
+### CX Series Characteristics
+
+Specific characteristics of this series are:
+- *Hugepages* - Hugepages are used in order to improve memory
+  performance.
+- *Dedicated CPU* - Physical cores are exclusively assigned to every
+  vCPU in order to provide fixed and high compute guarantees to the
+  workload.
+- *Isolated emulator threads* - Hypervisor emulator threads are isolated
+  from the vCPUs in order to reduce emaulation related impact on the
+  workload.
+- *vNUMA* - Physical NUMA topology is reflected in the guest in order to
+  optimize guest sided cache utilization.
+- *vCPU-To-Memory Ratio (1:2)* - A vCPU-to-Memory ratio of 1:2.
+
+## GN Series
+
+The GN Series provides instances types intended for VMs with
+NVIDIA GPU resources attached.
+
+*GN* is the abbreviation of "GPU NVIDIA".
+
+This series is intended to be used with VMs consuming GPUs
+provided by the
+[NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-operator)
+which can be installed on Kubernetes and also is made available
+on OpenShift via OperatorHub.
+
+### GN Series Characteristics
+
+Specific characteristics of this series are:
+- *Has GPUs* - Has GPUs predefined.
+- *Burstable CPU performance* - The workload has a baseline compute
+  performance but is permitted to burst beyond this baseline, if
+  excess compute resources are available.
+- *vCPU-To-Memory Ratio (1:4)* - A vCPU-to-Memory ratio of 1:4, for less
+  noise per node.
+
+## M Series
+
+The M Series provides resources for memory intensive
+applications.
+
+*M* is the abbreviation of "Memory".
+
+### M Series Characteristics
+
+Specific characteristics of this series are:
+- *Hugepages* - Hugepages are used in order to improve memory
+  performance.
+- *Burstable CPU performance* - The workload has a baseline compute
+  performance but is permitted to burst beyond this baseline, if
+  excess compute resources are available.
+- *vCPU-To-Memory Ratio (1:8)* - A vCPU-to-Memory ratio of 1:8, for much
+  less noise per node.
+
+## N Series
+
+The N Series provides resources for network intensive DPDK
+applications, like VNFs.
+
+*N* is the abbreviation for "Network".
+
+This series of instancetypes requires nodes capable
+of running DPDK workloads and being marked with the respective
+node-role.kubevirt.io/worker-dpdk label as such.
+
+### N Series Characteristics
+
+Specific characteristics of this series are:
+- *Hugepages* - Hugepages are used in order to improve memory
+  performance.
+- *Dedicated CPU* - Physical cores are exclusively assigned to every
+  vCPU in order to provide fixed and high compute guarantees to the
+  workload.
+- *Isolated emulator threads* - Hypervisor emulator threads are isolated
+  from the vCPUs in order to reduce emaulation related impact on the
+  workload.
+- *vCPU-To-Memory Ratio (1:2)* - A vCPU-to-Memory ratio of 1:2.
+
+## RT Series
+
+The RT Series provides resources for realtime applications, like Oslat.
+
+*RT* is the abbreviation for "realtime".
+
+This series of instance types requires nodes capable of running
+realtime applications.
+
+### RT Series Characteristics
+
+Specific characteristics of this series are:
+- *Hugepages* - Hugepages are used in order to improve memory
+  performance.
+- *Dedicated CPU* - Physical cores are exclusively assigned to every
+  vCPU in order to provide fixed and high compute guarantees to the
+  workload.
+- *Isolated emulator threads* - Hypervisor emulator threads are isolated
+  from the vCPUs in order to reduce emaulation related impact on the
+  workload.
+- *vCPU-To-Memory Ratio (1:4)* - A vCPU-to-Memory ratio of 1:4 starting from
+  the medium size.
+
+## Development
+
+To get started with customizing or creating your own instancetypes and preferences
+see [DEVELOPMENT.md](./DEVELOPMENT.md).
+
+## Resources
+
+The following instancetype resources (cluster-wide and namespaced) are
+provided by this project:
+
+Name | vCPUs | Memory
+-----|-------|-------
+cx1.2xlarge  |  8  |  16Gi
+cx1.4xlarge  |  16  |  32Gi
+cx1.8xlarge  |  32  |  64Gi
+cx1.large  |  2  |  4Gi
+cx1.medium  |  1  |  2Gi
+cx1.xlarge  |  4  |  8Gi
+gn1.2xlarge  |  8  |  32Gi
+gn1.4xlarge  |  16  |  64Gi
+gn1.8xlarge  |  32  |  128Gi
+gn1.xlarge  |  4  |  16Gi
+m1.2xlarge  |  8  |  64Gi
+m1.4xlarge  |  16  |  128Gi
+m1.8xlarge  |  32  |  256Gi
+m1.large  |  2  |  16Gi
+m1.xlarge  |  4  |  32Gi
+n1.2xlarge  |  16  |  32Gi
+n1.4xlarge  |  32  |  64Gi
+n1.8xlarge  |  64  |  128Gi
+n1.large  |  4  |  8Gi
+n1.medium  |  4  |  4Gi
+n1.xlarge  |  8  |  16Gi
+o1.2xlarge  |  8  |  32Gi
+o1.4xlarge  |  16  |  64Gi
+o1.8xlarge  |  32  |  128Gi
+o1.large  |  2  |  8Gi
+o1.medium  |  1  |  4Gi
+o1.micro  |  1  |  1Gi
+o1.nano  |  1  |  512Mi
+o1.small  |  1  |  2Gi
+o1.xlarge  |  4  |  16Gi
+rt1.2xlarge  |  8  |  32Gi
+rt1.4xlarge  |  16  |  64Gi
+rt1.8xlarge  |  32  |  128Gi
+rt1.large  |  2  |  8Gi
+rt1.medium  |  1  |  4Gi
+rt1.micro  |  1  |  1Gi
+rt1.small  |  1  |  2Gi
+rt1.xlarge  |  4  |  16Gi
+u1.2xlarge  |  8  |  32Gi
+u1.2xmedium  |  2  |  4Gi
+u1.4xlarge  |  16  |  64Gi
+u1.8xlarge  |  32  |  128Gi
+u1.large  |  2  |  8Gi
+u1.medium  |  1  |  4Gi
+u1.micro  |  1  |  1Gi
+u1.nano  |  1  |  512Mi
+u1.small  |  1  |  2Gi
+u1.xlarge  |  4  |  16Gi
+
+The following preference resources (cluster-wide and namespaced) are
+provided by this project:
+
+Name | Guest OS
+-----|---------
+alpine | Alpine
+centos.7 | CentOS 7
+centos.7.desktop | CentOS 7
+centos.stream10 | CentOS Stream 10
+centos.stream10.desktop | CentOS Stream 10
+centos.stream8 | CentOS Stream 8
+centos.stream8.desktop | CentOS Stream 8
+centos.stream8.dpdk | CentOS Stream 8
+centos.stream9 | CentOS Stream 9
+centos.stream9.desktop | CentOS Stream 9
+centos.stream9.dpdk | CentOS Stream 9
+cirros | Cirros
+fedora | Fedora (amd64)
+fedora.arm64 | Fedora (arm64)
+opensuse.leap | OpenSUSE Leap
+opensuse.tumbleweed | OpenSUSE Tumbleweed
+rhel.10 | Red Hat Enterprise Linux 10 Beta (amd64)
+rhel.10.arm64 | Red Hat Enterprise Linux 10 Beta (arm64)
+rhel.7 | Red Hat Enterprise Linux 7
+rhel.7.desktop | Red Hat Enterprise Linux 7
+rhel.8 | Red Hat Enterprise Linux 8
+rhel.8.desktop | Red Hat Enterprise Linux 8
+rhel.8.dpdk | Red Hat Enterprise Linux 8
+rhel.9 | Red Hat Enterprise Linux 9 (amd64)
+rhel.9.arm64 | Red Hat Enterprise Linux 9 (arm64)
+rhel.9.desktop | Red Hat Enterprise Linux 9 Desktop (amd64)
+rhel.9.dpdk | Red Hat Enterprise Linux 9 DPDK (amd64)
+rhel.9.realtime | Red Hat Enterprise Linux 9 Realtime (amd64)
+sles | SUSE Linux Enterprise Server
+ubuntu | Ubuntu
+windows.10 | Microsoft Windows 10
+windows.10.virtio | Microsoft Windows 10 (virtio)
+windows.11 | Microsoft Windows 11
+windows.11.virtio | Microsoft Windows 11 (virtio)
+windows.2k16 | Microsoft Windows Server 2016
+windows.2k16.virtio | Microsoft Windows Server 2016 (virtio)
+windows.2k19 | Microsoft Windows Server 2019
+windows.2k19.virtio | Microsoft Windows Server 2019 (virtio)
+windows.2k22 | Microsoft Windows Server 2022
+windows.2k22.virtio | Microsoft Windows Server 2022 (virtio)
+windows.2k25 | Microsoft Windows Server 2025
+windows.2k25.virtio | Microsoft Windows Server 2025 (virtio)
diff --git a/packages/apps/kubernetes/templates/cluster.yaml b/packages/apps/kubernetes/templates/cluster.yaml
@@ -15,6 +15,11 @@ spec:
         node-role.kubernetes.io/{{ . }}: ""
         {{- end }}
     spec:
+      {{- with .group.instanceType }}
+      instancetype:
+        kind: VirtualMachineClusterInstancetype
+        name: {{ . }}
+      {{- end }}
       runStrategy: Always
       template:
         metadata:
@@ -26,10 +31,12 @@ spec:
             {{- end }}
         spec:
           domain:
+            {{- if and .group.resources .group.resources.cpu }}
             cpu:
               threads: 1
               cores: {{ .group.resources.cpu }}
               sockets: 1
+            {{- end }}
             devices:
               disks:
               - name: system
@@ -43,8 +50,10 @@ spec:
               interfaces:
               - name: default
                 bridge: {}
+            {{- if and .group.resources .group.resources.memory }}
             memory:
               guest: {{ .group.resources.memory }}
+            {{- end }}
           evictionStrategy: External
           volumes:
           - name: system
@@ -176,6 +185,14 @@ spec:
   template:
     {{- $kubevirtmachinetemplate | nindent 4 }}
 ---
+{{- $instanceType := dict }}
+{{- if $group.instanceType }}
+{{-   $instanceType = (lookup "instancetype.kubevirt.io/v1beta1" "VirtualMachineClusterInstancetype" "" $group.instanceType) }}
+{{-   if not $instanceType }}
+{{-     fail (printf "Specified instancetype not exists in cluster: %s" $group.instanceType) }}
+{{-   end }}
+{{- end }}
+
 apiVersion: cluster.x-k8s.io/v1beta1
 kind: MachineDeployment
 metadata:
@@ -184,8 +201,16 @@ metadata:
   annotations:
     cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size: "{{ $group.minReplicas }}"
     cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size: "{{ $group.maxReplicas }}"
+    {{- if and $group.resources $group.resources.memory }}
     capacity.cluster-autoscaler.kubernetes.io/memory: "{{ $group.resources.memory }}"
+    {{- else }}
+    capacity.cluster-autoscaler.kubernetes.io/memory: "{{ $instanceType.spec.memory.guest }}"
+    {{- end }}
+    {{- if and $group.resources $group.resources.cpu }}
     capacity.cluster-autoscaler.kubernetes.io/cpu: "{{ $group.resources.cpu }}"
+    {{- else }}
+    capacity.cluster-autoscaler.kubernetes.io/cpu: "{{ $instanceType.spec.cpu.guest }}"
+    {{- end }}
 spec:
   clusterName: {{ $.Release.Name }}
   template:

diff --git a/packages/apps/kubernetes/values.yaml b/packages/apps/kubernetes/values.yaml
@@ -15,13 +15,15 @@ nodeGroups:
   md0:
     minReplicas: 0
     maxReplicas: 10
-    resources:
-      cpu: 2
-      memory: 1024Mi
+    instanceType: "u1.medium"
     ephemeralStorage: 20Gi
     roles:
     - ingress-nginx
 
+    resources:
+      cpu: ""
+      memory: ""
+
 ## @section Cluster Addons
 ##
 addons:

diff --git a/packages/apps/virtual-machine/Chart.yaml b/packages/apps/virtual-machine/Chart.yaml
@@ -17,7 +17,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.4.0
+version: 0.5.0
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to

diff --git a/packages/apps/virtual-machine/Makefile b/packages/apps/virtual-machine/Makefile
@@ -1,10 +1,9 @@
 include ../../../scripts/package.mk
 
 generate:
-	readme-generator -v values.yaml -s values.schema.json.tmp -r README.md
-	cat values.schema.json.tmp | \
-		jq '.properties.image.enum = ["ubuntu", "cirros", "alpine", "fedora", "talos"]' | \
-		jq '.properties.resources.properties.memory["x-display"] = "slider"' | \
-		jq '.properties.externalPorts.items.type = "integer"' \
-		> values.schema.json
-	rm -f values.schema.json.tmp
+	readme-generator -v values.yaml -s values.schema.json -r README.md
+	INSTANCE_TYPES=$$(yq e '.metadata.name' -o=json -r ../../system/kubevirt-common-instancetypes/templates/instancetypes.yaml | yq 'split(" ")' -o json) \
+	  && yq -i -o json ".properties.instanceType.optional=true | .properties.instanceType.enum = $${INSTANCE_TYPES}" values.schema.json
+	PREFERENCES=$$(yq e '.metadata.name' -o=json -r ../../system/kubevirt-common-instancetypes/templates/preferences.yaml | yq 'split(" ")' -o json) \
+	  && yq -i -o json ".properties.instanceProfile.optional=true | .properties.instanceProfile.enum = $${PREFERENCES}" values.schema.json
+	yq -i -o json '.properties.externalPorts.items.type = "integer"' values.schema.json