Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional garbage collection of finished workloads #2686

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
16 changes: 16 additions & 0 deletions apis/config/v1beta1/configuration_types.go
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm leaving the API discussion to #2742

Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ type Configuration struct {

// Resources provides additional configuration options for handling the resources.
Resources *Resources `json:"resources,omitempty"`

// ObjectRetentionPolicies provides configuration options for retention of Kueue owned
// objects. A nil value will disable automatic deletion for all objects.
ObjectRetentionPolicies *ObjectRetentionPolicies `json:"objectRetentionPolicies,omitempty"`
}

type ControllerManager struct {
Expand Down Expand Up @@ -400,3 +404,15 @@ type FairSharing struct {
// The default strategy is ["LessThanOrEqualToFinalShare", "LessThanInitialShare"].
PreemptionStrategies []PreemptionStrategy `json:"preemptionStrategies,omitempty"`
}

type ObjectRetentionPolicies struct {
// FinishedWorkloadRetention is the duration to retain finished Workloads.
// A duration of 0 will delete finished Workloads immediately.
// A nil value will disable automatic deletion.
// The value is represented using the metav1.Duration format, allowing for flexible
// specification of time units (e.g., "24h", "1h30m", "30s").
//
// Defaults to null.
// +optional
FinishedWorkloadRetention *metav1.Duration `json:"finishedWorkloadRetention"`
mwysokin marked this conversation as resolved.
Show resolved Hide resolved
}
3 changes: 3 additions & 0 deletions apis/config/v1beta1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,4 +193,7 @@ func SetDefaults_Configuration(cfg *Configuration) {
if fs := cfg.FairSharing; fs != nil && fs.Enable && len(fs.PreemptionStrategies) == 0 {
fs.PreemptionStrategies = []PreemptionStrategy{LessThanOrEqualToFinalShare, LessThanInitialShare}
}
if cfg.ObjectRetentionPolicies == nil {
cfg.ObjectRetentionPolicies = &ObjectRetentionPolicies{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are not adding anything relevant, let's not add any code to "defaults"

}
}
136 changes: 90 additions & 46 deletions apis/config/v1beta1/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ func TestSetDefaults_Configuration(t *testing.T) {
WorkerLostTimeout: &metav1.Duration{Duration: DefaultMultiKueueWorkerLostTimeout},
}

defaultObjectRetentionPolicies := &ObjectRetentionPolicies{
FinishedWorkloadRetention: nil,
}

podsReadyTimeoutTimeout := metav1.Duration{Duration: defaultPodsReadyTimeout}
podsReadyTimeoutOverwrite := metav1.Duration{Duration: time.Minute}

Expand All @@ -121,10 +125,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"defaulting ControllerManager": {
Expand Down Expand Up @@ -162,10 +167,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"should not default ControllerManager": {
Expand Down Expand Up @@ -219,10 +225,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"should not set LeaderElectionID": {
Expand Down Expand Up @@ -260,10 +267,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"defaulting InternalCertManagement": {
Expand All @@ -278,10 +286,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
WebhookServiceName: ptr.To(DefaultWebhookServiceName),
WebhookSecretName: ptr.To(DefaultWebhookSecretName),
},
ClientConnection: defaultClientConnection,
Integrations: overwriteNamespaceIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: overwriteNamespaceIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"should not default InternalCertManagement": {
Expand All @@ -297,10 +306,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: overwriteNamespaceIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: overwriteNamespaceIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"should not default values in custom ClientConnection": {
Expand All @@ -324,9 +334,10 @@ func TestSetDefaults_Configuration(t *testing.T) {
QPS: ptr.To[float32](123.0),
Burst: ptr.To[int32](456),
},
Integrations: overwriteNamespaceIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
Integrations: overwriteNamespaceIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"should default empty custom ClientConnection": {
Expand All @@ -343,10 +354,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: overwriteNamespaceIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: overwriteNamespaceIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"defaulting waitForPodsReady values": {
Expand Down Expand Up @@ -374,10 +386,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"set waitForPodsReady.blockAdmission to false when enable is false": {
Expand Down Expand Up @@ -405,10 +418,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"respecting provided waitForPodsReady values": {
Expand Down Expand Up @@ -442,10 +456,11 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"integrations": {
Expand All @@ -468,8 +483,9 @@ func TestSetDefaults_Configuration(t *testing.T) {
Frameworks: []string{"a", "b"},
PodOptions: defaultIntegrations.PodOptions,
},
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"queue visibility": {
Expand Down Expand Up @@ -498,7 +514,8 @@ func TestSetDefaults_Configuration(t *testing.T) {
MaxCount: 0,
},
},
MultiKueue: defaultMultiKueue,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"multiKueue": {
Expand Down Expand Up @@ -526,6 +543,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
Origin: ptr.To("multikueue-manager1"),
WorkerLostTimeout: &metav1.Duration{Duration: time.Minute},
},
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"multiKueue GCInterval 0": {
Expand All @@ -552,6 +570,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
Origin: ptr.To("multikueue-manager1"),
WorkerLostTimeout: &metav1.Duration{Duration: 15 * time.Minute},
},
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"add default fair sharing configuration when enabled": {
Expand All @@ -577,6 +596,31 @@ func TestSetDefaults_Configuration(t *testing.T) {
Enable: true,
PreemptionStrategies: []PreemptionStrategy{LessThanOrEqualToFinalShare, LessThanInitialShare},
},
ObjectRetentionPolicies: defaultObjectRetentionPolicies,
},
},
"set object retention policy for finished workloads": {
original: &Configuration{
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ObjectRetentionPolicies: &ObjectRetentionPolicies{
FinishedWorkloadRetention: &metav1.Duration{Duration: 30 * time.Minute},
},
},
want: &Configuration{
Namespace: ptr.To(DefaultNamespace),
ControllerManager: defaultCtrlManagerConfigurationSpec,
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
ObjectRetentionPolicies: &ObjectRetentionPolicies{
FinishedWorkloadRetention: &metav1.Duration{Duration: 30 * time.Minute},
},
},
},
}
Expand Down
2 changes: 2 additions & 0 deletions charts/kueue/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ managerConfig:
# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
#resources:
# excludeResourcePrefixes: []
#objectRetentionPolicies:
# finishedWorkloadRetention: null # null indicates infinite retention, 0s means no retention at all
# ports definition for metricsService and webhookService.
metricsService:
ports:
Expand Down
2 changes: 2 additions & 0 deletions config/components/manager/controller_manager_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,5 @@ integrations:
# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
#resources:
# excludeResourcePrefixes: []
#objectRetentionPolicies:
# finishedWorkloadRetention: null # null indicates infinite retention, 0s means no retention at all
Loading