Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kubernetes_cluster_node_pool: Fix race condition with virtual network status when creating node pool #25888

Merged
merged 2 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@ import (
"github.com/hashicorp/go-azure-sdk/resource-manager/containerservice/2023-06-02-preview/agentpools"
"github.com/hashicorp/go-azure-sdk/resource-manager/containerservice/2023-06-02-preview/managedclusters"
"github.com/hashicorp/go-azure-sdk/resource-manager/containerservice/2023-06-02-preview/snapshots"
"github.com/hashicorp/go-azure-sdk/resource-manager/network/2023-09-01/subnets"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"github.com/hashicorp/terraform-provider-azurerm/helpers/tf"
"github.com/hashicorp/terraform-provider-azurerm/internal/clients"
"github.com/hashicorp/terraform-provider-azurerm/internal/features"
"github.com/hashicorp/terraform-provider-azurerm/internal/locks"
computeValidate "github.com/hashicorp/terraform-provider-azurerm/internal/services/compute/validate"
"github.com/hashicorp/terraform-provider-azurerm/internal/services/containers/migration"
"github.com/hashicorp/terraform-provider-azurerm/internal/services/containers/parse"
containerValidate "github.com/hashicorp/terraform-provider-azurerm/internal/services/containers/validate"
"github.com/hashicorp/terraform-provider-azurerm/internal/services/network"
"github.com/hashicorp/terraform-provider-azurerm/internal/tf/pluginsdk"
"github.com/hashicorp/terraform-provider-azurerm/internal/tf/validation"
"github.com/hashicorp/terraform-provider-azurerm/internal/timeouts"
Expand Down Expand Up @@ -423,10 +426,14 @@ func resourceKubernetesClusterNodePoolSchema() map[string]*pluginsdk.Schema {

return s
}

func resourceKubernetesClusterNodePoolCreate(d *pluginsdk.ResourceData, meta interface{}) error {
containersClient := meta.(*clients.Client).Containers
clustersClient := containersClient.KubernetesClustersClient
poolsClient := containersClient.AgentPoolsClient
subnetClient := meta.(*clients.Client).Network.Client.Subnets
vnetClient := meta.(*clients.Client).Network.VnetClient

ctx, cancel := timeouts.ForCreate(meta.(*clients.Client).StopContext, d)
defer cancel()

Expand All @@ -435,6 +442,20 @@ func resourceKubernetesClusterNodePoolCreate(d *pluginsdk.ResourceData, meta int
return err
}

var subnetID *commonids.SubnetId
if subnetIDValue, ok := d.GetOk("vnet_subnet_id"); ok {
subnetID, err = commonids.ParseSubnetID(subnetIDValue.(string))
if err != nil {
return err
}

locks.ByName(subnetID.VirtualNetworkName, network.VirtualNetworkResourceName)
defer locks.UnlockByName(subnetID.VirtualNetworkName, network.VirtualNetworkResourceName)

locks.ByName(subnetID.SubnetName, network.SubnetResourceName)
defer locks.UnlockByName(subnetID.SubnetName, network.SubnetResourceName)
}

id := agentpools.NewAgentPoolID(clusterId.SubscriptionId, clusterId.ResourceGroupName, clusterId.ManagedClusterName, d.Get("name").(string))

log.Printf("[DEBUG] Retrieving %s...", *clusterId)
Expand Down Expand Up @@ -602,8 +623,8 @@ func resourceKubernetesClusterNodePoolCreate(d *pluginsdk.ResourceData, meta int
profile.PodSubnetID = utils.String(podSubnetID)
}

if vnetSubnetID := d.Get("vnet_subnet_id").(string); vnetSubnetID != "" {
profile.VnetSubnetID = utils.String(vnetSubnetID)
if subnetID != nil {
profile.VnetSubnetID = utils.String(subnetID.ID())
}

if hostGroupID := d.Get("host_group_id").(string); hostGroupID != "" {
Expand Down Expand Up @@ -677,6 +698,38 @@ func resourceKubernetesClusterNodePoolCreate(d *pluginsdk.ResourceData, meta int
return fmt.Errorf("creating %s: %+v", id, err)
}

if subnetID != nil {
// Wait for vnet to come back to Succeeded before releasing any locks
timeout, ok := ctx.Deadline()
if !ok {
return fmt.Errorf("internal-error: context had no deadline")
}

// TODO: refactor this into a `custompoller` within the `network` package
stateConf := &pluginsdk.StateChangeConf{
c4milo marked this conversation as resolved.
Show resolved Hide resolved
Pending: []string{string(subnets.ProvisioningStateUpdating)},
Target: []string{string(subnets.ProvisioningStateSucceeded)},
Refresh: network.SubnetProvisioningStateRefreshFunc(ctx, subnetClient, *subnetID),
MinTimeout: 1 * time.Minute,
Timeout: time.Until(timeout),
}
if _, err = stateConf.WaitForStateContext(ctx); err != nil {
return fmt.Errorf("waiting for provisioning state of subnet for AKS Node Pool creation %s: %+v", *subnetID, err)
}

vnetId := commonids.NewVirtualNetworkID(subnetID.SubscriptionId, subnetID.ResourceGroupName, subnetID.VirtualNetworkName)
vnetStateConf := &pluginsdk.StateChangeConf{
Pending: []string{string(subnets.ProvisioningStateUpdating)},
Target: []string{string(subnets.ProvisioningStateSucceeded)},
Refresh: network.VirtualNetworkProvisioningStateRefreshFunc(ctx, vnetClient, vnetId),
MinTimeout: 1 * time.Minute,
Timeout: time.Until(timeout),
}
if _, err = vnetStateConf.WaitForStateContext(ctx); err != nil {
return fmt.Errorf("waiting for provisioning state of virtual network for AKS Node Pool creation %s: %+v", vnetId, err)
}
}

d.SetId(id.ID())
return resourceKubernetesClusterNodePoolRead(d, meta)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,6 @@ func (KubernetesClusterNodePoolResource) scaleNodePool(nodeCount int) acceptance
nodePoolName := state.Attributes["name"]
kubernetesClusterId := state.Attributes["kubernetes_cluster_id"]
parsedK8sId, err := commonids.ParseKubernetesClusterID(kubernetesClusterId)

if err != nil {
return fmt.Errorf("parsing kubernetes cluster id: %+v", err)
}
Expand Down Expand Up @@ -1167,6 +1166,21 @@ func (KubernetesClusterNodePoolResource) scaleNodePool(nodeCount int) acceptance
}
}

func TestAccKubernetesClusterNodePool_virtualNetworkOwnershipRaceCondition(t *testing.T) {
data := acceptance.BuildTestData(t, "azurerm_kubernetes_cluster_node_pool", "test1")
r := KubernetesClusterNodePoolResource{}

data.ResourceTest(t, r, []acceptance.TestStep{
{
Config: r.virtualNetworkOwnershipRaceCondition(data),
Check: acceptance.ComposeTestCheckFunc(
check.That("azurerm_kubernetes_cluster_node_pool.test2").ExistsInAzure(r),
check.That("azurerm_kubernetes_cluster_node_pool.test3").ExistsInAzure(r),
),
},
})
}

func (r KubernetesClusterNodePoolResource) autoScaleConfig(data acceptance.TestData) string {
return fmt.Sprintf(`
provider "azurerm" {
Expand Down Expand Up @@ -2945,3 +2959,100 @@ resource "azurerm_kubernetes_cluster_node_pool" "test" {
}
`, data.Locations.Primary, data.RandomInteger)
}

func (KubernetesClusterNodePoolResource) virtualNetworkOwnershipRaceCondition(data acceptance.TestData) string {
return fmt.Sprintf(`
provider "azurerm" {
features {}
}

resource "azurerm_resource_group" "test" {
name = "acctestRG-aks-%d"
location = "%s"
}

resource "azurerm_virtual_network" "test" {
name = "acctestvirtnet%d"
address_space = ["10.1.0.0/16"]
location = azurerm_resource_group.test.location
resource_group_name = azurerm_resource_group.test.name
}

resource "azurerm_subnet" "test" {
count = 8
name = "acctestsubnet%d${count.index}"
resource_group_name = azurerm_resource_group.test.name
virtual_network_name = azurerm_virtual_network.test.name
address_prefixes = ["10.1.${count.index}.0/24"]
service_endpoints = [
"Microsoft.Storage.Global",
"Microsoft.AzureActiveDirectory",
"Microsoft.KeyVault"
]

lifecycle {
# AKS automatically configures subnet delegations when the subnets are assigned
# to node pools. We ignore changes so the terraform refresh run by Terraform's plugin-sdk,
# at the end of the test, returns empty and leaves the test succeed.
ignore_changes = [delegation]
}
}

resource "azurerm_kubernetes_cluster" "test" {
name = "acctestaks%d"
location = azurerm_resource_group.test.location
resource_group_name = azurerm_resource_group.test.name
dns_prefix = "acctestaks%d"

default_node_pool {
name = "default"
node_count = 1
vm_size = "Standard_DS2_v2"
vnet_subnet_id = azurerm_subnet.test["6"].id
pod_subnet_id = azurerm_subnet.test["7"].id
upgrade_settings {
max_surge = "10%%"
}
}

identity {
type = "SystemAssigned"
}

network_profile {
network_plugin = "azure"
load_balancer_sku = "standard"
}
}

resource "azurerm_kubernetes_cluster_node_pool" "test1" {
name = "internal1"
kubernetes_cluster_id = azurerm_kubernetes_cluster.test.id
vm_size = "Standard_L8s_v3"
node_count = 1
vnet_subnet_id = azurerm_subnet.test[0].id
pod_subnet_id = azurerm_subnet.test[1].id
zones = ["1"]
}

resource "azurerm_kubernetes_cluster_node_pool" "test2" {
name = "internal2"
kubernetes_cluster_id = azurerm_kubernetes_cluster.test.id
vm_size = "Standard_L8s_v3"
node_count = 1
vnet_subnet_id = azurerm_subnet.test[2].id
pod_subnet_id = azurerm_subnet.test[3].id
zones = ["1"]
}

resource "azurerm_kubernetes_cluster_node_pool" "test3" {
name = "internal3"
kubernetes_cluster_id = azurerm_kubernetes_cluster.test.id
vm_size = "Standard_L8s_v3"
node_count = 1
vnet_subnet_id = azurerm_subnet.test[4].id
pod_subnet_id = azurerm_subnet.test[5].id
zones = ["1"]
}
`, data.RandomInteger, data.Locations.Primary, data.RandomInteger, data.RandomInteger, data.RandomInteger, data.RandomInteger)
}
Loading