Skip to content

Commit

Permalink
OCM-12057 | feat: Added adjustable custom wait times
Browse files Browse the repository at this point in the history
  • Loading branch information
den-rgb committed Dec 3, 2024
1 parent 9205671 commit 2adb282
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 15 deletions.
1 change: 1 addition & 0 deletions docs/resources/cluster_rosa_classic.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ resource "rhcs_cluster_rosa_classic" "rosa_sts_cluster" {
- `kms_key_arn` (String) Used to encrypt root volume of compute node pools. The key ARN is the Amazon Resource Name (ARN) of a AWS Key Management Service (KMS) Key. It is a unique, fully qualified identifier for the AWS KMS Key. A key ARN includes the AWS account, Region, and the key ID(optional). After the creation of the resource, it is not possible to update the attribute value.
- `machine_cidr` (String) Block of IP addresses for nodes. After the creation of the resource, it is not possible to update the attribute value.
- `max_replicas` (Number) Maximum replicas of worker nodes in a machine pool. This attribute specifically applies to the Worker Machine Pool and becomes irrelevant once the resource is created. Any modifications to the initial Machine Pool should be made through the Terraform imported Machine Pool resource. For more details, refer to [Worker Machine Pool in ROSA Cluster](../guides/worker-machine-pool.md)
- `max_wait_timeout_in_minutes` (Number) This value sets the maximum duration in minutes to wait for the cluster to be in a ready state.
- `min_replicas` (Number) Minimum replicas of worker nodes in a machine pool. This attribute specifically applies to the Worker Machine Pool and becomes irrelevant once the resource is created. Any modifications to the initial Machine Pool should be made through the Terraform imported Machine Pool resource. For more details, refer to [Worker Machine Pool in ROSA Cluster](../guides/worker-machine-pool.md)
- `multi_az` (Boolean) Indicates if the cluster should be deployed to multiple availability zones. Default value is 'false'. This attribute specifically applies to the Worker Machine Pool and becomes irrelevant once the resource is created. Any modifications to the initial Machine Pool should be made through the Terraform imported Machine Pool resource. For more details, refer to [Worker Machine Pool in ROSA Cluster](../guides/worker-machine-pool.md)
- `pod_cidr` (String) Block of IP addresses for pods. After the creation of the resource, it is not possible to update the attribute value.
Expand Down
2 changes: 2 additions & 0 deletions docs/resources/cluster_rosa_hcp.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ resource "rhcs_cluster_rosa_hcp" "rosa_sts_cluster" {
- `host_prefix` (Number) Length of the prefix of the subnet assigned to each node. After the creation of the resource, it is not possible to update the attribute value.
- `kms_key_arn` (String) Used to encrypt root volume of compute node pools. The key ARN is the Amazon Resource Name (ARN) of a AWS Key Management Service (KMS) Key. It is a unique, fully qualified identifier for the AWS KMS Key. A key ARN includes the AWS account, Region, and the key ID(optional). After the creation of the resource, it is not possible to update the attribute value.
- `machine_cidr` (String) Block of IP addresses for nodes. After the creation of the resource, it is not possible to update the attribute value.
- `max_hcp_cluster_wait_timeout_in_minutes` (Number) This value sets the maximum duration in minutes to wait for a HCP cluster to be in a ready state.
- `max_wait_timeout_in_minutes` (Number) This value sets the maximum duration in minutes to wait for machine pools to be in a ready state.
- `pod_cidr` (String) Block of IP addresses for pods. After the creation of the resource, it is not possible to update the attribute value.
- `private` (Boolean) Provides private connectivity from your cluster's VPC to Red Hat SRE, without exposing traffic to the public internet. After the creation of the resource, it is not possible to update the attribute value.
- `properties` (Map of String) User defined properties. It is essential to include property 'role_creator_arn' with the value of the user creating the cluster. Example: properties = {rosa_creator_arn = data.aws_caller_identity.current.arn}
Expand Down
16 changes: 14 additions & 2 deletions provider/clusterrosa/classic/cluster_rosa_classic_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,10 @@ func (r *ClusterRosaClassicResource) Schema(ctx context.Context, req resource.Sc
Description: "Wait until the cluster is either in a ready state or in an error state. The waiter has a timeout of 60 minutes, with the default value set to false",
Optional: true,
},
"max_wait_timeout_in_minutes": schema.Int64Attribute{
Description: "This value sets the maximum duration in minutes to wait for the cluster to be in a ready state.",
Optional: true,
},
},
}
}
Expand Down Expand Up @@ -882,7 +886,15 @@ func (r *ClusterRosaClassicResource) Create(ctx context.Context, request resourc
}

if common.HasValue(state.WaitForCreateComplete) && state.WaitForCreateComplete.ValueBool() {
object, err = r.ClusterWait.WaitForClusterToBeReady(ctx, object.ID(), rosa.DefaultWaitTimeoutInMinutes)
timeOut := common.OptionalInt64(state.MaxWaitTimeoutInMinutes)
timeOut, err = common.ValidateTimeout(timeOut, rosa.MaxWaitTimeoutInMinutes)
if err != nil {
response.Diagnostics.AddError(
"Waiting for cluster creation finished with error",
fmt.Sprintf("Waiting for cluster creation finished with the error %v", err),
)
}
object, err = r.ClusterWait.WaitForClusterToBeReady(ctx, object.ID(), *timeOut)
if err != nil {
response.Diagnostics.AddError(
"Waiting for cluster creation finished with error",
Expand Down Expand Up @@ -1370,7 +1382,7 @@ func (r *ClusterRosaClassicResource) Delete(ctx context.Context, request resourc
if common.HasValue(state.DisableWaitingInDestroy) && state.DisableWaitingInDestroy.ValueBool() {
tflog.Info(ctx, "Waiting for destroy to be completed, is disabled")
} else {
timeout := rosa.DefaultWaitTimeoutInMinutes
timeout := rosa.MaxWaitTimeoutInMinutes
if common.HasValue(state.DestroyTimeout) {
if state.DestroyTimeout.ValueInt64() <= 0 {
response.Diagnostics.AddWarning(rosa.NonPositiveTimeoutSummary, fmt.Sprintf(rosa.NonPositiveTimeoutFormat, state.ID.ValueString()))
Expand Down
1 change: 1 addition & 0 deletions provider/clusterrosa/classic/cluster_rosa_classic_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,5 @@ type ClusterRosaClassicState struct {
DisableWaitingInDestroy types.Bool `tfsdk:"disable_waiting_in_destroy"`
DestroyTimeout types.Int64 `tfsdk:"destroy_timeout"`
WaitForCreateComplete types.Bool `tfsdk:"wait_for_create_complete"`
MaxWaitTimeoutInMinutes types.Int64 `tfsdk:"max_wait_timeout_in_minutes"`
}
10 changes: 5 additions & 5 deletions provider/clusterrosa/common/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ const (
PropertyRosaTfCommit = tagsPrefix + "tf_commit"
PropertyRosaCreatorArn = tagsPrefix + "creator_arn"

DefaultWaitTimeoutForHCPControlPlaneInMinutes = int64(45)
DefaultWaitTimeoutInMinutes = int64(60)
DefaultPollingIntervalInMinutes = 2
NonPositiveTimeoutSummary = "Can't poll cluster state with a non-positive timeout"
NonPositiveTimeoutFormat = "Can't poll state of cluster with identifier '%s', the timeout that was set is not a positive number"
MaxHCPClusterWaitTimeoutInMinutes = int64(45)
MaxWaitTimeoutInMinutes = int64(60)
DefaultPollingIntervalInMinutes = 2
NonPositiveTimeoutSummary = "Can't poll cluster state with a non-positive timeout"
NonPositiveTimeoutFormat = "Can't poll state of cluster with identifier '%s', the timeout that was set is not a positive number"

MaxClusterNameLength = 54
MaxClusterDomainPrefixLength = 15
Expand Down
32 changes: 28 additions & 4 deletions provider/clusterrosa/hcp/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,14 @@ func (r *ClusterRosaHcpResource) Schema(ctx context.Context, req resource.Schema
Description: "Wait until the cluster standard compute pools are created. The waiter has a timeout of 60 minutes, with the default value set to false. This can only be provided when also waiting for create completion.",
Optional: true,
},
"max_hcp_cluster_wait_timeout_in_minutes": schema.Int64Attribute{
Description: "This value sets the maximum duration in minutes to wait for a HCP cluster to be in a ready state.",
Optional: true,
},
"max_wait_timeout_in_minutes": schema.Int64Attribute{
Description: "This value sets the maximum duration in minutes to wait for machine pools to be in a ready state.",
Optional: true,
},
"create_admin_user": schema.BoolAttribute{
Description: "Indicates if create cluster admin user. Set it true to create cluster admin user with default username `cluster-admin` " +
"and generated password. It will be ignored if `admin_credentials` is set." + common.ValueCannotBeChangedStringDescription,
Expand Down Expand Up @@ -393,7 +401,7 @@ func (r *ClusterRosaHcpResource) Configure(ctx context.Context, req resource.Con
if !ok {
resp.Diagnostics.AddError(
"Unexpected Resource Configure Type",
fmt.Sprintf("Expected *sdk.Connaction, got: %T. Please report this issue to the provider developers.", req.ProviderData),
fmt.Sprintf("Expected *sdk.Connection, got: %T. Please report this issue to the provider developers.", req.ProviderData),
)
return
}
Expand Down Expand Up @@ -764,7 +772,15 @@ func (r *ClusterRosaHcpResource) Create(ctx context.Context, request resource.Cr

if shouldWaitCreationComplete {
tflog.Info(ctx, "Waiting for cluster to get ready")
object, err = r.ClusterWait.WaitForClusterToBeReady(ctx, object.ID(), rosa.DefaultWaitTimeoutForHCPControlPlaneInMinutes)
timeOut := common.OptionalInt64(state.MaxHCPClusterWaitTimeoutInMinutes)
timeOut, err = common.ValidateTimeout(timeOut, rosa.MaxHCPClusterWaitTimeoutInMinutes)
if err != nil {
response.Diagnostics.AddError(
"Waiting for cluster creation finished with error",
fmt.Sprintf("Waiting for cluster creation finished with the error %v", err),
)
}
object, err = r.ClusterWait.WaitForClusterToBeReady(ctx, object.ID(), *timeOut)
if err != nil {
response.Diagnostics.AddError(
"Waiting for cluster creation finished with error",
Expand All @@ -778,7 +794,15 @@ func (r *ClusterRosaHcpResource) Create(ctx context.Context, request resource.Cr
}
if shouldWaitComputeNodesComplete {
tflog.Info(ctx, "Waiting for standard compute nodes to get ready")
object, err = r.ClusterWait.WaitForStdComputeNodesToBeReady(ctx, object.ID(), rosa.DefaultWaitTimeoutInMinutes)
timeOut := common.OptionalInt64(state.MaxWaitTimeoutInMinutes)
timeOut, err = common.ValidateTimeout(timeOut, rosa.MaxWaitTimeoutInMinutes)
if err != nil {
response.Diagnostics.AddError(
"Waiting for cluster creation finished with error",
fmt.Sprintf("Waiting for cluster creation finished with the error %v", err),
)
}
object, err = r.ClusterWait.WaitForStdComputeNodesToBeReady(ctx, object.ID(), *timeOut)
if err != nil {
response.Diagnostics.AddError(
"Waiting for std compute nodes completion finished with error",
Expand Down Expand Up @@ -1260,7 +1284,7 @@ func (r *ClusterRosaHcpResource) Delete(ctx context.Context, request resource.De
if common.HasValue(state.DisableWaitingInDestroy) && state.DisableWaitingInDestroy.ValueBool() {
tflog.Info(ctx, "Waiting for destroy to be completed, is disabled")
} else {
timeout := rosa.DefaultWaitTimeoutInMinutes
timeout := rosa.MaxWaitTimeoutInMinutes
if common.HasValue(state.DestroyTimeout) {
if state.DestroyTimeout.ValueInt64() <= 0 {
response.Diagnostics.AddWarning(rosa.NonPositiveTimeoutSummary, fmt.Sprintf(rosa.NonPositiveTimeoutFormat, state.ID.ValueString()))
Expand Down
10 changes: 6 additions & 4 deletions provider/clusterrosa/hcp/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ type ClusterRosaHcpState struct {
UpgradeAcksFor types.String `tfsdk:"upgrade_acknowledgements_for"`

// Meta fields - not related to cluster spec
DisableWaitingInDestroy types.Bool `tfsdk:"disable_waiting_in_destroy"`
DestroyTimeout types.Int64 `tfsdk:"destroy_timeout"`
WaitForCreateComplete types.Bool `tfsdk:"wait_for_create_complete"`
WaitForStdComputeNodesComplete types.Bool `tfsdk:"wait_for_std_compute_nodes_complete"`
DisableWaitingInDestroy types.Bool `tfsdk:"disable_waiting_in_destroy"`
DestroyTimeout types.Int64 `tfsdk:"destroy_timeout"`
WaitForCreateComplete types.Bool `tfsdk:"wait_for_create_complete"`
WaitForStdComputeNodesComplete types.Bool `tfsdk:"wait_for_std_compute_nodes_complete"`
MaxHCPClusterWaitTimeoutInMinutes types.Int64 `tfsdk:"max_hcp_cluster_wait_timeout_in_minutes"`
MaxWaitTimeoutInMinutes types.Int64 `tfsdk:"max_wait_timeout_in_minutes"`

// Admin user fields
CreateAdminUser types.Bool `tfsdk:"create_admin_user"`
Expand Down
10 changes: 10 additions & 0 deletions provider/common/cluster_waiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,13 @@ func pollClusterState(clusterId string, ctx context.Context, timeout int64, clus

return object, nil
}

func ValidateTimeout(timeOut *int64, defaultTimeout int64) (*int64, error) {
if timeOut == nil {
return &defaultTimeout, nil
}
if *timeOut <= 0 {
return nil, fmt.Errorf("timeout must be greater than 0 minutes")
}
return timeOut, nil
}

0 comments on commit 2adb282

Please sign in to comment.