Skip to content

Commit

Permalink
Allow GKE to resume create after interruption
Browse files Browse the repository at this point in the history
Signed-off-by: Modular Magician <[email protected]>
  • Loading branch information
chrisst committed Dec 19, 2019
1 parent b467350 commit 1ceb0d5
Show file tree
Hide file tree
Showing 11 changed files with 105 additions and 57 deletions.
5 changes: 3 additions & 2 deletions google/bootstrap_utils_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package google

import (
"context"
"fmt"
"log"
"os"
Expand Down Expand Up @@ -73,7 +74,7 @@ func BootstrapKMSKeyWithPurposeInLocation(t *testing.T, purpose, locationID stri

ConfigureBasePaths(config)

if err := config.LoadAndValidate(); err != nil {
if err := config.LoadAndValidate(context.Background()); err != nil {
t.Errorf("Unable to bootstrap KMS key: %s", err)
}

Expand Down Expand Up @@ -213,7 +214,7 @@ func BootstrapServiceAccount(t *testing.T, project, testRunner string) string {

ConfigureBasePaths(config)

if err := config.LoadAndValidate(); err != nil {
if err := config.LoadAndValidate(context.Background()); err != nil {
t.Fatalf("Bootstrapping failed. Unable to load test config: %s", err)
}

Expand Down
78 changes: 39 additions & 39 deletions google/config.go

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions google/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestConfigLoadAndValidate_accountFilePath(t *testing.T) {

ConfigureBasePaths(config)

err := config.LoadAndValidate()
err := config.LoadAndValidate(context.Background())
if err != nil {
t.Fatalf("error: %v", err)
}
Expand All @@ -43,7 +43,7 @@ func TestConfigLoadAndValidate_accountFileJSON(t *testing.T) {

ConfigureBasePaths(config)

err = config.LoadAndValidate()
err = config.LoadAndValidate(context.Background())
if err != nil {
t.Fatalf("error: %v", err)
}
Expand All @@ -58,7 +58,7 @@ func TestConfigLoadAndValidate_accountFileJSONInvalid(t *testing.T) {

ConfigureBasePaths(config)

if config.LoadAndValidate() == nil {
if config.LoadAndValidate(context.Background()) == nil {
t.Fatalf("expected error, but got nil")
}
}
Expand All @@ -80,7 +80,7 @@ func TestAccConfigLoadValidate_credentials(t *testing.T) {

ConfigureBasePaths(config)

err := config.LoadAndValidate()
err := config.LoadAndValidate(context.Background())
if err != nil {
t.Fatalf("error: %v", err)
}
Expand Down Expand Up @@ -118,7 +118,7 @@ func TestAccConfigLoadValidate_accessToken(t *testing.T) {

ConfigureBasePaths(config)

err = config.LoadAndValidate()
err = config.LoadAndValidate(context.Background())
if err != nil {
t.Fatalf("error: %v", err)
}
Expand All @@ -139,7 +139,7 @@ func TestConfigLoadAndValidate_customScopes(t *testing.T) {

ConfigureBasePaths(config)

err := config.LoadAndValidate()
err := config.LoadAndValidate(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
Expand All @@ -165,7 +165,7 @@ func TestConfigLoadAndValidate_defaultBatchingConfig(t *testing.T) {
BatchingConfig: batchCfg,
}

err = config.LoadAndValidate()
err = config.LoadAndValidate(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
Expand Down Expand Up @@ -202,7 +202,7 @@ func TestConfigLoadAndValidate_customBatchingConfig(t *testing.T) {
BatchingConfig: batchCfg,
}

err = config.LoadAndValidate()
err = config.LoadAndValidate(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
Expand Down
12 changes: 12 additions & 0 deletions google/container_operation.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
package google

import (
"context"
"errors"
"fmt"
"log"

container "google.golang.org/api/container/v1beta1"
)

type ContainerOperationWaiter struct {
Service *container.Service
Context context.Context
Op *container.Operation
Project string
Location string
Expand Down Expand Up @@ -62,6 +66,13 @@ func (w *ContainerOperationWaiter) QueryOp() (interface{}, error) {
w.Project, w.Location, w.Op.Name)

var op *container.Operation
select {
case <-w.Context.Done():
log.Println("[WARN] request has been cancelled early")
return op, errors.New("unable to finish polling, context has been cancelled")
default:
// default must be here to keep the previous case from blocking
}
err := retryTimeDuration(func() (opErr error) {
op, opErr = w.Service.Projects.Locations.Operations.Get(name).Do()
return opErr
Expand All @@ -88,6 +99,7 @@ func (w *ContainerOperationWaiter) TargetStates() []string {
func containerOperationWait(config *Config, op *container.Operation, project, location, activity string, timeoutMinutes int) error {
w := &ContainerOperationWaiter{
Service: config.clientContainerBeta,
Context: config.context,
Op: op,
Project: project,
Location: location,
Expand Down
6 changes: 3 additions & 3 deletions google/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ func Provider() terraform.ResourceProvider {
// We can therefore assume that if it's missing it's 0.10 or 0.11
terraformVersion = "0.11+compatible"
}
return providerConfigure(d, terraformVersion)
return providerConfigure(d, provider, terraformVersion)
}

return provider
Expand Down Expand Up @@ -721,7 +721,7 @@ func ResourceMapWithErrors() (map[string]*schema.Resource, error) {
)
}

func providerConfigure(d *schema.ResourceData, terraformVersion string) (interface{}, error) {
func providerConfigure(d *schema.ResourceData, p *schema.Provider, terraformVersion string) (interface{}, error) {
config := Config{
Project: d.Get("project").(string),
Region: d.Get("region").(string),
Expand Down Expand Up @@ -814,7 +814,7 @@ func providerConfigure(d *schema.ResourceData, terraformVersion string) (interfa
config.StorageTransferBasePath = d.Get(StorageTransferCustomEndpointEntryKey).(string)
config.BigtableAdminBasePath = d.Get(BigtableAdminCustomEndpointEntryKey).(string)

if err := config.LoadAndValidate(); err != nil {
if err := config.LoadAndValidate(p.StopContext()); err != nil {
return nil, err
}

Expand Down
3 changes: 2 additions & 1 deletion google/resource_composer_environment_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package google

import (
"context"
"fmt"
"testing"

Expand Down Expand Up @@ -551,7 +552,7 @@ func testSweepComposerResources(region string) error {
return fmt.Errorf("error getting shared config for region: %s", err)
}

err = config.LoadAndValidate()
err = config.LoadAndValidate(context.Background())
if err != nil {
log.Fatalf("error loading: %s", err)
}
Expand Down
3 changes: 2 additions & 1 deletion google/resource_compute_instance_migrate_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package google

import (
"context"
"fmt"
"log"
"os"
Expand Down Expand Up @@ -941,7 +942,7 @@ func getInitializedConfig(t *testing.T) *Config {

ConfigureBasePaths(config)

err := config.LoadAndValidate()
err := config.LoadAndValidate(context.Background())
if err != nil {
t.Fatal(err)
}
Expand Down
30 changes: 30 additions & 0 deletions google/resource_container_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ func resourceContainerCluster() *schema.Resource {
},
},

"operation": {
Type: schema.TypeString,
Computed: true,
},

"location": {
Type: schema.TypeString,
Optional: true,
Expand Down Expand Up @@ -916,6 +921,18 @@ func resourceContainerClusterCreate(d *schema.ResourceData, meta interface{}) er
timeoutInMinutes := int(d.Timeout(schema.TimeoutCreate).Minutes())
waitErr := containerOperationWait(config, op, project, location, "creating GKE cluster", timeoutInMinutes)
if waitErr != nil {
// Check if the create operation failed because Terraform was prematurely terminated. If it was we can persist the
// operation id to state so that a subsequent refresh of this resource will wait until the operation has terminated
// before attempting to Read the state of the cluster. This allows a graceful resumption of a Create that was killed
// by the upstream Terraform process exiting early such as a sigterm.
select {
case <-config.context.Done():
log.Printf("[DEBUG] Persisting %s so this operation can be resumed \n", op.Name)
d.Set("operation", op.Name)
return nil
default:
// leaving default case to ensure this is non blocking
}
// Try a GET on the cluster so we can see the state in debug logs. This will help classify error states.
_, getErr := config.clientContainerBeta.Projects.Locations.Clusters.Get(containerClusterFullName(project, location, clusterName)).Do()
if getErr != nil {
Expand Down Expand Up @@ -974,6 +991,19 @@ func resourceContainerClusterRead(d *schema.ResourceData, meta interface{}) erro
return err
}

operation := d.Get("operation").(string)
if operation != "" {
log.Printf("[DEBUG] in progress operation detected at %v, attempting to resume", operation)
op := &containerBeta.Operation{
Name: operation,
}
d.Set("operation", "")
waitErr := containerOperationWait(config, op, project, location, "resuming GKE cluster", int(d.Timeout(schema.TimeoutCreate).Minutes()))
if waitErr != nil {
return waitErr
}
}

clusterName := d.Get("name").(string)
name := containerClusterFullName(project, location, clusterName)
cluster, err := config.clientContainerBeta.Projects.Locations.Clusters.Get(name).Do()
Expand Down
3 changes: 2 additions & 1 deletion google/resource_container_cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package google

import (
"bytes"
"context"
"fmt"
"log"
"regexp"
Expand All @@ -27,7 +28,7 @@ func testSweepContainerClusters(region string) error {
log.Fatalf("error getting shared config for region: %s", err)
}

err = config.LoadAndValidate()
err = config.LoadAndValidate(context.Background())
if err != nil {
log.Fatalf("error loading: %s", err)
}
Expand Down
3 changes: 2 additions & 1 deletion google/resource_monitoring_group_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package google

import (
"context"
"fmt"
"log"
"strings"
Expand All @@ -23,7 +24,7 @@ func testSweepMonitoringGroups(region string) error {
log.Fatalf("error getting shared config for region: %s", err)
}

err = config.LoadAndValidate()
err = config.LoadAndValidate(context.Background())
if err != nil {
log.Fatalf("error loading: %s", err)
}
Expand Down
3 changes: 2 additions & 1 deletion google/resource_sql_database_instance_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package google

import (
"context"
"fmt"
"log"
"strings"
Expand Down Expand Up @@ -40,7 +41,7 @@ func testSweepDatabases(region string) error {
return fmt.Errorf("error getting shared config for region: %s", err)
}

err = config.LoadAndValidate()
err = config.LoadAndValidate(context.Background())
if err != nil {
log.Fatalf("error loading: %s", err)
}
Expand Down

0 comments on commit 1ceb0d5

Please sign in to comment.