From 2adb28242fd8beb9760280496736816613323b32 Mon Sep 17 00:00:00 2001 From: den-rgb Date: Mon, 2 Dec 2024 16:29:28 +0000 Subject: [PATCH] OCM-12057 | feat: Added adjustable custom wait times --- docs/resources/cluster_rosa_classic.md | 1 + docs/resources/cluster_rosa_hcp.md | 2 ++ .../classic/cluster_rosa_classic_resource.go | 16 ++++++++-- .../classic/cluster_rosa_classic_state.go | 1 + provider/clusterrosa/common/consts.go | 10 +++--- provider/clusterrosa/hcp/resource.go | 32 ++++++++++++++++--- provider/clusterrosa/hcp/state.go | 10 +++--- provider/common/cluster_waiter.go | 10 ++++++ 8 files changed, 67 insertions(+), 15 deletions(-) diff --git a/docs/resources/cluster_rosa_classic.md b/docs/resources/cluster_rosa_classic.md index 7558a1aa..78a4d2f5 100644 --- a/docs/resources/cluster_rosa_classic.md +++ b/docs/resources/cluster_rosa_classic.md @@ -78,6 +78,7 @@ resource "rhcs_cluster_rosa_classic" "rosa_sts_cluster" { - `kms_key_arn` (String) Used to encrypt root volume of compute node pools. The key ARN is the Amazon Resource Name (ARN) of a AWS Key Management Service (KMS) Key. It is a unique, fully qualified identifier for the AWS KMS Key. A key ARN includes the AWS account, Region, and the key ID(optional). After the creation of the resource, it is not possible to update the attribute value. - `machine_cidr` (String) Block of IP addresses for nodes. After the creation of the resource, it is not possible to update the attribute value. - `max_replicas` (Number) Maximum replicas of worker nodes in a machine pool. This attribute specifically applies to the Worker Machine Pool and becomes irrelevant once the resource is created. Any modifications to the initial Machine Pool should be made through the Terraform imported Machine Pool resource. For more details, refer to [Worker Machine Pool in ROSA Cluster](../guides/worker-machine-pool.md) +- `max_wait_timeout_in_minutes` (Number) This value sets the maximum duration in minutes to wait for the cluster to be in a ready state. - `min_replicas` (Number) Minimum replicas of worker nodes in a machine pool. This attribute specifically applies to the Worker Machine Pool and becomes irrelevant once the resource is created. Any modifications to the initial Machine Pool should be made through the Terraform imported Machine Pool resource. For more details, refer to [Worker Machine Pool in ROSA Cluster](../guides/worker-machine-pool.md) - `multi_az` (Boolean) Indicates if the cluster should be deployed to multiple availability zones. Default value is 'false'. This attribute specifically applies to the Worker Machine Pool and becomes irrelevant once the resource is created. Any modifications to the initial Machine Pool should be made through the Terraform imported Machine Pool resource. For more details, refer to [Worker Machine Pool in ROSA Cluster](../guides/worker-machine-pool.md) - `pod_cidr` (String) Block of IP addresses for pods. After the creation of the resource, it is not possible to update the attribute value. diff --git a/docs/resources/cluster_rosa_hcp.md b/docs/resources/cluster_rosa_hcp.md index 00e51318..d8e5d489 100644 --- a/docs/resources/cluster_rosa_hcp.md +++ b/docs/resources/cluster_rosa_hcp.md @@ -73,6 +73,8 @@ resource "rhcs_cluster_rosa_hcp" "rosa_sts_cluster" { - `host_prefix` (Number) Length of the prefix of the subnet assigned to each node. After the creation of the resource, it is not possible to update the attribute value. - `kms_key_arn` (String) Used to encrypt root volume of compute node pools. The key ARN is the Amazon Resource Name (ARN) of a AWS Key Management Service (KMS) Key. It is a unique, fully qualified identifier for the AWS KMS Key. A key ARN includes the AWS account, Region, and the key ID(optional). After the creation of the resource, it is not possible to update the attribute value. - `machine_cidr` (String) Block of IP addresses for nodes. After the creation of the resource, it is not possible to update the attribute value. +- `max_hcp_cluster_wait_timeout_in_minutes` (Number) This value sets the maximum duration in minutes to wait for a HCP cluster to be in a ready state. +- `max_wait_timeout_in_minutes` (Number) This value sets the maximum duration in minutes to wait for machine pools to be in a ready state. - `pod_cidr` (String) Block of IP addresses for pods. After the creation of the resource, it is not possible to update the attribute value. - `private` (Boolean) Provides private connectivity from your cluster's VPC to Red Hat SRE, without exposing traffic to the public internet. After the creation of the resource, it is not possible to update the attribute value. - `properties` (Map of String) User defined properties. It is essential to include property 'role_creator_arn' with the value of the user creating the cluster. Example: properties = {rosa_creator_arn = data.aws_caller_identity.current.arn} diff --git a/provider/clusterrosa/classic/cluster_rosa_classic_resource.go b/provider/clusterrosa/classic/cluster_rosa_classic_resource.go index a07cc3df..4681dedd 100644 --- a/provider/clusterrosa/classic/cluster_rosa_classic_resource.go +++ b/provider/clusterrosa/classic/cluster_rosa_classic_resource.go @@ -457,6 +457,10 @@ func (r *ClusterRosaClassicResource) Schema(ctx context.Context, req resource.Sc Description: "Wait until the cluster is either in a ready state or in an error state. The waiter has a timeout of 60 minutes, with the default value set to false", Optional: true, }, + "max_wait_timeout_in_minutes": schema.Int64Attribute{ + Description: "This value sets the maximum duration in minutes to wait for the cluster to be in a ready state.", + Optional: true, + }, }, } } @@ -882,7 +886,15 @@ func (r *ClusterRosaClassicResource) Create(ctx context.Context, request resourc } if common.HasValue(state.WaitForCreateComplete) && state.WaitForCreateComplete.ValueBool() { - object, err = r.ClusterWait.WaitForClusterToBeReady(ctx, object.ID(), rosa.DefaultWaitTimeoutInMinutes) + timeOut := common.OptionalInt64(state.MaxWaitTimeoutInMinutes) + timeOut, err = common.ValidateTimeout(timeOut, rosa.MaxWaitTimeoutInMinutes) + if err != nil { + response.Diagnostics.AddError( + "Waiting for cluster creation finished with error", + fmt.Sprintf("Waiting for cluster creation finished with the error %v", err), + ) + } + object, err = r.ClusterWait.WaitForClusterToBeReady(ctx, object.ID(), *timeOut) if err != nil { response.Diagnostics.AddError( "Waiting for cluster creation finished with error", @@ -1370,7 +1382,7 @@ func (r *ClusterRosaClassicResource) Delete(ctx context.Context, request resourc if common.HasValue(state.DisableWaitingInDestroy) && state.DisableWaitingInDestroy.ValueBool() { tflog.Info(ctx, "Waiting for destroy to be completed, is disabled") } else { - timeout := rosa.DefaultWaitTimeoutInMinutes + timeout := rosa.MaxWaitTimeoutInMinutes if common.HasValue(state.DestroyTimeout) { if state.DestroyTimeout.ValueInt64() <= 0 { response.Diagnostics.AddWarning(rosa.NonPositiveTimeoutSummary, fmt.Sprintf(rosa.NonPositiveTimeoutFormat, state.ID.ValueString())) diff --git a/provider/clusterrosa/classic/cluster_rosa_classic_state.go b/provider/clusterrosa/classic/cluster_rosa_classic_state.go index e7b18fbc..3df0181e 100644 --- a/provider/clusterrosa/classic/cluster_rosa_classic_state.go +++ b/provider/clusterrosa/classic/cluster_rosa_classic_state.go @@ -79,4 +79,5 @@ type ClusterRosaClassicState struct { DisableWaitingInDestroy types.Bool `tfsdk:"disable_waiting_in_destroy"` DestroyTimeout types.Int64 `tfsdk:"destroy_timeout"` WaitForCreateComplete types.Bool `tfsdk:"wait_for_create_complete"` + MaxWaitTimeoutInMinutes types.Int64 `tfsdk:"max_wait_timeout_in_minutes"` } diff --git a/provider/clusterrosa/common/consts.go b/provider/clusterrosa/common/consts.go index 518e1de9..4310ec0f 100644 --- a/provider/clusterrosa/common/consts.go +++ b/provider/clusterrosa/common/consts.go @@ -15,11 +15,11 @@ const ( PropertyRosaTfCommit = tagsPrefix + "tf_commit" PropertyRosaCreatorArn = tagsPrefix + "creator_arn" - DefaultWaitTimeoutForHCPControlPlaneInMinutes = int64(45) - DefaultWaitTimeoutInMinutes = int64(60) - DefaultPollingIntervalInMinutes = 2 - NonPositiveTimeoutSummary = "Can't poll cluster state with a non-positive timeout" - NonPositiveTimeoutFormat = "Can't poll state of cluster with identifier '%s', the timeout that was set is not a positive number" + MaxHCPClusterWaitTimeoutInMinutes = int64(45) + MaxWaitTimeoutInMinutes = int64(60) + DefaultPollingIntervalInMinutes = 2 + NonPositiveTimeoutSummary = "Can't poll cluster state with a non-positive timeout" + NonPositiveTimeoutFormat = "Can't poll state of cluster with identifier '%s', the timeout that was set is not a positive number" MaxClusterNameLength = 54 MaxClusterDomainPrefixLength = 15 diff --git a/provider/clusterrosa/hcp/resource.go b/provider/clusterrosa/hcp/resource.go index 77f04f5f..420c8cbd 100644 --- a/provider/clusterrosa/hcp/resource.go +++ b/provider/clusterrosa/hcp/resource.go @@ -320,6 +320,14 @@ func (r *ClusterRosaHcpResource) Schema(ctx context.Context, req resource.Schema Description: "Wait until the cluster standard compute pools are created. The waiter has a timeout of 60 minutes, with the default value set to false. This can only be provided when also waiting for create completion.", Optional: true, }, + "max_hcp_cluster_wait_timeout_in_minutes": schema.Int64Attribute{ + Description: "This value sets the maximum duration in minutes to wait for a HCP cluster to be in a ready state.", + Optional: true, + }, + "max_wait_timeout_in_minutes": schema.Int64Attribute{ + Description: "This value sets the maximum duration in minutes to wait for machine pools to be in a ready state.", + Optional: true, + }, "create_admin_user": schema.BoolAttribute{ Description: "Indicates if create cluster admin user. Set it true to create cluster admin user with default username `cluster-admin` " + "and generated password. It will be ignored if `admin_credentials` is set." + common.ValueCannotBeChangedStringDescription, @@ -393,7 +401,7 @@ func (r *ClusterRosaHcpResource) Configure(ctx context.Context, req resource.Con if !ok { resp.Diagnostics.AddError( "Unexpected Resource Configure Type", - fmt.Sprintf("Expected *sdk.Connaction, got: %T. Please report this issue to the provider developers.", req.ProviderData), + fmt.Sprintf("Expected *sdk.Connection, got: %T. Please report this issue to the provider developers.", req.ProviderData), ) return } @@ -764,7 +772,15 @@ func (r *ClusterRosaHcpResource) Create(ctx context.Context, request resource.Cr if shouldWaitCreationComplete { tflog.Info(ctx, "Waiting for cluster to get ready") - object, err = r.ClusterWait.WaitForClusterToBeReady(ctx, object.ID(), rosa.DefaultWaitTimeoutForHCPControlPlaneInMinutes) + timeOut := common.OptionalInt64(state.MaxHCPClusterWaitTimeoutInMinutes) + timeOut, err = common.ValidateTimeout(timeOut, rosa.MaxHCPClusterWaitTimeoutInMinutes) + if err != nil { + response.Diagnostics.AddError( + "Waiting for cluster creation finished with error", + fmt.Sprintf("Waiting for cluster creation finished with the error %v", err), + ) + } + object, err = r.ClusterWait.WaitForClusterToBeReady(ctx, object.ID(), *timeOut) if err != nil { response.Diagnostics.AddError( "Waiting for cluster creation finished with error", @@ -778,7 +794,15 @@ func (r *ClusterRosaHcpResource) Create(ctx context.Context, request resource.Cr } if shouldWaitComputeNodesComplete { tflog.Info(ctx, "Waiting for standard compute nodes to get ready") - object, err = r.ClusterWait.WaitForStdComputeNodesToBeReady(ctx, object.ID(), rosa.DefaultWaitTimeoutInMinutes) + timeOut := common.OptionalInt64(state.MaxWaitTimeoutInMinutes) + timeOut, err = common.ValidateTimeout(timeOut, rosa.MaxWaitTimeoutInMinutes) + if err != nil { + response.Diagnostics.AddError( + "Waiting for cluster creation finished with error", + fmt.Sprintf("Waiting for cluster creation finished with the error %v", err), + ) + } + object, err = r.ClusterWait.WaitForStdComputeNodesToBeReady(ctx, object.ID(), *timeOut) if err != nil { response.Diagnostics.AddError( "Waiting for std compute nodes completion finished with error", @@ -1260,7 +1284,7 @@ func (r *ClusterRosaHcpResource) Delete(ctx context.Context, request resource.De if common.HasValue(state.DisableWaitingInDestroy) && state.DisableWaitingInDestroy.ValueBool() { tflog.Info(ctx, "Waiting for destroy to be completed, is disabled") } else { - timeout := rosa.DefaultWaitTimeoutInMinutes + timeout := rosa.MaxWaitTimeoutInMinutes if common.HasValue(state.DestroyTimeout) { if state.DestroyTimeout.ValueInt64() <= 0 { response.Diagnostics.AddWarning(rosa.NonPositiveTimeoutSummary, fmt.Sprintf(rosa.NonPositiveTimeoutFormat, state.ID.ValueString())) diff --git a/provider/clusterrosa/hcp/state.go b/provider/clusterrosa/hcp/state.go index 6ab385c1..82ce2559 100644 --- a/provider/clusterrosa/hcp/state.go +++ b/provider/clusterrosa/hcp/state.go @@ -53,10 +53,12 @@ type ClusterRosaHcpState struct { UpgradeAcksFor types.String `tfsdk:"upgrade_acknowledgements_for"` // Meta fields - not related to cluster spec - DisableWaitingInDestroy types.Bool `tfsdk:"disable_waiting_in_destroy"` - DestroyTimeout types.Int64 `tfsdk:"destroy_timeout"` - WaitForCreateComplete types.Bool `tfsdk:"wait_for_create_complete"` - WaitForStdComputeNodesComplete types.Bool `tfsdk:"wait_for_std_compute_nodes_complete"` + DisableWaitingInDestroy types.Bool `tfsdk:"disable_waiting_in_destroy"` + DestroyTimeout types.Int64 `tfsdk:"destroy_timeout"` + WaitForCreateComplete types.Bool `tfsdk:"wait_for_create_complete"` + WaitForStdComputeNodesComplete types.Bool `tfsdk:"wait_for_std_compute_nodes_complete"` + MaxHCPClusterWaitTimeoutInMinutes types.Int64 `tfsdk:"max_hcp_cluster_wait_timeout_in_minutes"` + MaxWaitTimeoutInMinutes types.Int64 `tfsdk:"max_wait_timeout_in_minutes"` // Admin user fields CreateAdminUser types.Bool `tfsdk:"create_admin_user"` diff --git a/provider/common/cluster_waiter.go b/provider/common/cluster_waiter.go index 0efd1f9d..4ec51127 100644 --- a/provider/common/cluster_waiter.go +++ b/provider/common/cluster_waiter.go @@ -173,3 +173,13 @@ func pollClusterState(clusterId string, ctx context.Context, timeout int64, clus return object, nil } + +func ValidateTimeout(timeOut *int64, defaultTimeout int64) (*int64, error) { + if timeOut == nil { + return &defaultTimeout, nil + } + if *timeOut <= 0 { + return nil, fmt.Errorf("timeout must be greater than 0 minutes") + } + return timeOut, nil +}