Merge pull request #54 from klueska/add-custom-config

Add support for opaque configs with examples
kubernetes-sigs · Aug 30, 2024 · 4269e61 · 4269e61
2 parents 0c883fd + dfc6542
commit 4269e61
Show file tree

Hide file tree

Showing 18 changed files with 650 additions and 486 deletions.
diff --git a/README.md b/README.md
@@ -225,10 +225,10 @@ metadata:
 ```
 
 Next, deploy four example apps that demonstrate how `ResourceClaim`s,
-`ResourceClaimTemplate`s, and custom `ClaimParameter` objects can be used to
-request access to resources in various ways:
+`ResourceClaimTemplate`s, and custom `GpuConfig` objects can be used to
+select and configure resources in various ways:
 ```bash
-kubectl apply --filename=demo/gpu-test{1,2,3,4}.yaml
+kubectl apply --filename=demo/gpu-test{1,2,3,4,5}.yaml
 ```
 
 And verify that they are coming up successfully:
@@ -242,10 +242,11 @@ gpu-test2   pod0   0/2     Pending             0          2s
 gpu-test3   pod0   0/1     ContainerCreating   0          2s
 gpu-test3   pod1   0/1     ContainerCreating   0          2s
 gpu-test4   pod0   0/1     Pending             0          2s
+gpu-test5   pod0   0/4     Pending             0          2s
 ...
 ```
 
-Use your favorite editor to look through each of the `gpu-test{1,2,3,4}.yaml`
+Use your favorite editor to look through each of the `gpu-test{1,2,3,4,5}.yaml`
 files and see what they are doing. The semantics of each match the figure
 below:
 
@@ -254,12 +255,16 @@ below:
 Then dump the logs of each app to verify that GPUs were allocated to them
 according to these semantics:
 ```bash
-for example in $(seq 1 4); do \
+for example in $(seq 1 5); do \
   echo "gpu-test${example}:"
   for pod in $(kubectl get pod -n gpu-test${example} --output=jsonpath='{.items[*].metadata.name}'); do \
     for ctr in $(kubectl get pod -n gpu-test${example} ${pod} -o jsonpath='{.spec.containers[*].name}'); do \
       echo "${pod} ${ctr}:"
-      kubectl logs -n gpu-test${example} ${pod} -c ${ctr}| grep GPU_DEVICE
+      if [ "${example}" -lt 3 ]; then
+        kubectl logs -n gpu-test${example} ${pod} -c ${ctr}| grep -E "GPU_DEVICE_[0-9]+="
+      else
+        kubectl logs -n gpu-test${example} ${pod} -c ${ctr}| grep -E "GPU_DEVICE_[0-9]+"
+      fi
     done
   done
   echo ""
@@ -270,43 +275,67 @@ This should produce output similar to the following:
 ```bash
 gpu-test1:
 pod0 ctr0:
-declare -x GPU_DEVICE_0="gpu-e7b42cb1-4fd8-91b2-bc77-352a0c1f5747"
+declare -x GPU_DEVICE_0="gpu-ee3e4b55-fcda-44b8-0605-64b7a9967744"
 pod1 ctr0:
-declare -x GPU_DEVICE_0="gpu-f11773a1-5bfb-e48b-3d98-1beb5baaf08e"
+declare -x GPU_DEVICE_0="gpu-9ede7e32-5825-a11b-fa3d-bab6d47e0243"
 
 gpu-test2:
 pod0 ctr0:
+declare -x GPU_DEVICE_0="gpu-e7b42cb1-4fd8-91b2-bc77-352a0c1f5747"
+declare -x GPU_DEVICE_1="gpu-f11773a1-5bfb-e48b-3d98-1beb5baaf08e"
+
+gpu-test3:
+pod0 ctr0:
 declare -x GPU_DEVICE_0="gpu-0159f35e-99ee-b2b5-74f1-9d18df3f22ac"
+declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
+declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Default"
 pod0 ctr1:
 declare -x GPU_DEVICE_0="gpu-0159f35e-99ee-b2b5-74f1-9d18df3f22ac"
+declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
+declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Default"
 
-gpu-test3:
+gpu-test4:
 pod0 ctr0:
 declare -x GPU_DEVICE_0="gpu-657bd2e7-f5c2-a7f2-fbaa-0d1cdc32f81b"
+declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
+declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Default"
 pod1 ctr0:
 declare -x GPU_DEVICE_0="gpu-657bd2e7-f5c2-a7f2-fbaa-0d1cdc32f81b"
+declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
+declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Default"
 
-gpu-test4:
-pod0 ctr0:
+gpu-test5:
+pod0 ts-ctr0:
 declare -x GPU_DEVICE_0="gpu-18db0e85-99e9-c746-8531-ffeb86328b39"
+declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
+declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Long"
+pod0 ts-ctr1:
+declare -x GPU_DEVICE_0="gpu-18db0e85-99e9-c746-8531-ffeb86328b39"
+declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
+declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Long"
+pod0 sp-ctr0:
+declare -x GPU_DEVICE_1="gpu-93d37703-997c-c46f-a531-755e3e0dc2ac"
+declare -x GPU_DEVICE_1_PARTITION_COUNT="10"
+declare -x GPU_DEVICE_1_SHARING_STRATEGY="SpacePartitioning"
+pod0 sp-ctr1:
 declare -x GPU_DEVICE_1="gpu-93d37703-997c-c46f-a531-755e3e0dc2ac"
-declare -x GPU_DEVICE_2="gpu-ee3e4b55-fcda-44b8-0605-64b7a9967744"
-declare -x GPU_DEVICE_3="gpu-9ede7e32-5825-a11b-fa3d-bab6d47e0243"
+declare -x GPU_DEVICE_1_PARTITION_COUNT="10"
+declare -x GPU_DEVICE_1_SHARING_STRATEGY="SpacePartitioning"
 ```
 
 In this example resource driver, no "actual" GPUs are made available to any
 containers. Instead, a set of environment variables are set in each container
 to indicate which GPUs *would* have been injected into them by a real resource
-driver.
+driver and how they *would* have been configured.
 
-You can use the UUIDs of the GPUs set in these environment variables to verify
-that they were handed out in a way consistent with the semantics shown in the
-figure above.
+You can use the UUIDs of the GPUs as well as the GPU sharing settings set in
+these environment variables to verify that they were handed out in a way
+consistent with the semantics shown in the figure above.
 
 Once you have verified everything is running correctly, delete all of the
 example apps:
 ```bash
-kubectl delete --wait=false --filename=demo/gpu-test{1,2,3,4}.yaml
+kubectl delete --wait=false --filename=demo/gpu-test{1,2,3,4,5}.yaml
 ```
 
 And wait for them to terminate:
@@ -320,6 +349,7 @@ gpu-test2   pod0   2/2     Terminating   0          31m
 gpu-test3   pod0   1/1     Terminating   0          31m
 gpu-test3   pod1   1/1     Terminating   0          31m
 gpu-test4   pod0   1/1     Terminating   0          31m
+gpu-test5   pod0   4/4     Terminating   0          31m
 ...
 ```
 

diff --git a/api/example.com/resource/gpu/v1alpha1/api.go b/api/example.com/resource/gpu/v1alpha1/api.go
@@ -17,32 +17,94 @@
 package v1alpha1
 
 import (
-	"k8s.io/utils/ptr"
+	"fmt"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/runtime/serializer/json"
 )
 
 const (
 	GroupName = "gpu.resource.example.com"
 	Version   = "v1alpha1"
 
-	GpuDeviceType     = "gpu"
-	UnknownDeviceType = "unknown"
-
-	GpuClaimParametersKind = "GpuClaimParameters"
+	GpuConfigKind = "GpuConfig"
 )
 
-func DefaultDeviceClassParametersSpec() *DeviceClassParametersSpec {
-	return &DeviceClassParametersSpec{
-		DeviceSelector: []DeviceSelector{
-			{
-				Type: GpuDeviceType,
-				Name: "*",
+// Decoder implements a decoder for objects in this API group.
+var Decoder runtime.Decoder
+
+// +genclient
+// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
+
+// GpuConfig holds the set of parameters for configuring a GPU.
+type GpuConfig struct {
+	metav1.TypeMeta `json:",inline"`
+	Sharing         *GpuSharing `json:"sharing,omitempty"`
+}
+
+// DefaultGpuConfig provides the default GPU configuration.
+func DefaultGpuConfig() *GpuConfig {
+	return &GpuConfig{
+		TypeMeta: metav1.TypeMeta{
+			APIVersion: GroupName + "/" + Version,
+			Kind:       GpuConfigKind,
+		},
+		Sharing: &GpuSharing{
+			Strategy: TimeSlicingStrategy,
+			TimeSlicingConfig: &TimeSlicingConfig{
+				Interval: "Default",
 			},
 		},
 	}
 }
 
-func DefaultGpuClaimParametersSpec() *GpuClaimParametersSpec {
-	return &GpuClaimParametersSpec{
-		Count: ptr.To(1),
+// Normalize updates a GpuConfig config with implied default values based on other settings.
+func (c *GpuConfig) Normalize() error {
+	if c == nil {
+		return fmt.Errorf("config is 'nil'")
+	}
+	if c.Sharing == nil {
+		c.Sharing = &GpuSharing{
+			Strategy: TimeSlicingStrategy,
+		}
 	}
+	if c.Sharing.Strategy == TimeSlicingStrategy && c.Sharing.TimeSlicingConfig == nil {
+		c.Sharing.TimeSlicingConfig = &TimeSlicingConfig{
+			Interval: "Default",
+		}
+	}
+	if c.Sharing.Strategy == SpacePartitioningStrategy && c.Sharing.SpacePartitioningConfig == nil {
+		c.Sharing.SpacePartitioningConfig = &SpacePartitioningConfig{
+			PartitionCount: 1,
+		}
+	}
+	return nil
+}
+
+func init() {
+	// Create a new scheme and add our types to it. If at some point in the
+	// future a new version of the configuration API becomes necessary, then
+	// conversion functions can be generated and registered to continue
+	// supporting older versions.
+	scheme := runtime.NewScheme()
+	schemeGroupVersion := schema.GroupVersion{
+		Group:   GroupName,
+		Version: Version,
+	}
+	scheme.AddKnownTypes(schemeGroupVersion,
+		&GpuConfig{},
+	)
+	metav1.AddToGroupVersion(scheme, schemeGroupVersion)
+
+	// Set up a json serializer to decode our types.
+	Decoder = json.NewSerializerWithOptions(
+		json.DefaultMetaFactory,
+		scheme,
+		scheme,
+		json.SerializerOptions{
+			Pretty: true, Strict: true,
+		},
+	)
 }
diff --git a/api/example.com/resource/gpu/v1alpha1/deviceclass.go b/api/example.com/resource/gpu/v1alpha1/deviceclass.go
diff --git a/api/example.com/resource/gpu/v1alpha1/gpuclaim.go b/api/example.com/resource/gpu/v1alpha1/gpuclaim.go