Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add kops delete instance command
Browse files Browse the repository at this point in the history
Ole Markus With committed Aug 19, 2020
1 parent 94833fa commit 611a052
Showing 7 changed files with 350 additions and 9 deletions.
1 change: 1 addition & 0 deletions cmd/kops/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@ go_library(
"create_secret_weave_encryptionconfig.go",
"delete.go",
"delete_cluster.go",
"delete_instance.go",
"delete_instancegroup.go",
"delete_secret.go",
"describe.go",
4 changes: 4 additions & 0 deletions cmd/kops/delete.go
Original file line number Diff line number Diff line change
@@ -46,6 +46,9 @@ var (
`))

deleteExample = templates.Examples(i18n.T(`
# Delete an instance
kops delete instance i-0a5ed581b862d3425
# Delete a cluster using a manifest file
kops delete -f my-cluster.yaml
@@ -90,6 +93,7 @@ func NewCmdDelete(f *util.Factory, out io.Writer) *cobra.Command {
cmd.AddCommand(NewCmdDeleteCluster(f, out))
cmd.AddCommand(NewCmdDeleteInstanceGroup(f, out))
cmd.AddCommand(NewCmdDeleteSecret(f, out))
cmd.AddCommand(NewCmdDeleteInstance(f, out))

return cmd
}
277 changes: 277 additions & 0 deletions cmd/kops/delete_instance.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"
"fmt"
"io"
"os"
"time"

"github.com/spf13/cobra"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/cli-runtime/pkg/genericclioptions"
"k8s.io/client-go/kubernetes"
_ "k8s.io/client-go/plugin/pkg/client/auth"
"k8s.io/kops/cmd/kops/util"
kopsapi "k8s.io/kops/pkg/apis/kops"
"k8s.io/kops/pkg/cloudinstances"
"k8s.io/kops/pkg/instancegroups"
"k8s.io/kops/pkg/validation"
"k8s.io/kops/upup/pkg/fi/cloudup"
"k8s.io/kubectl/pkg/util/i18n"
"k8s.io/kubectl/pkg/util/templates"
)

// DeleteInstanceOptions is the command Object for an instance deletion.
type DeleteInstanceOptions struct {
CloudOnly bool

// The following two variables are when kops is validating a cluster
// between detatch and deletion.

// FailOnDrainError fail deletion if drain errors.
FailOnDrainError bool

// FailOnValidate fail the deletion when the cluster
// does not validate, after a validation period.
FailOnValidate bool

// PostDrainDelay is the duration of a pause after a drain operation
PostDrainDelay time.Duration

// ValidationTimeout is the timeout for validation to succeed after the drain and pause
ValidationTimeout time.Duration

// ValidateCount is the amount of time that a cluster needs to be validated between drain and deletion
ValidateCount int32

ClusterName string

InstanceID string

Detatch bool
}

func (o *DeleteInstanceOptions) InitDefaults() {
o.CloudOnly = false
o.FailOnDrainError = false
o.FailOnValidate = true

o.PostDrainDelay = 5 * time.Second
o.ValidationTimeout = 15 * time.Minute
o.ValidateCount = 2

o.Detatch = true
}

func NewCmdDeleteInstance(f *util.Factory, out io.Writer) *cobra.Command {
deleteIgLong = templates.LongDesc(i18n.T(`
Delete an instance. By default, it will detatch the instance from
the instance group, drain it, then terminate it.`))

deleteIgExample = templates.Examples(i18n.T(`
# Delete an instance from the currently active cluster.
kops delete instance i-0a5ed581b862d3425
# Delete an instance from the currently active cluster without
validation or draining.
kops delete instance --cloudonly i-0a5ed581b862d3425
`))

deleteIgShort = i18n.T(`Delete instancegroup`)

var options DeleteInstanceOptions
options.InitDefaults()

cmd := &cobra.Command{
Use: "instance",
Short: "",
Long: "",
Example: "",
}

cmd.Flags().BoolVar(&options.CloudOnly, "cloudonly", options.CloudOnly, "Perform deletion update without confirming progress with k8s")
cmd.Flags().BoolVar(&options.Detatch, "detatch", options.Detatch, "Detatch the node from the ASG before deletion")

cmd.Flags().DurationVar(&options.ValidationTimeout, "validation-timeout", options.ValidationTimeout, "Maximum time to wait for a cluster to validate")
cmd.Flags().Int32Var(&options.ValidateCount, "validate-count", options.ValidateCount, "Amount of times that a cluster needs to be validated after single node update")
cmd.Flags().DurationVar(&options.PostDrainDelay, "post-drain-delay", options.PostDrainDelay, "Time to wait after draining each node")

cmd.Flags().BoolVar(&options.FailOnDrainError, "fail-on-drain-error", true, "The deletion will fail if draining a node fails.")
cmd.Flags().BoolVar(&options.FailOnValidate, "fail-on-validate-error", true, "The deletion will fail if the cluster fails to validate.")

cmd.Run = func(cmd *cobra.Command, args []string) {
ctx := context.TODO()

clusterName := rootCommand.ClusterName()

if clusterName == "" {
exitWithError(fmt.Errorf("--name is required"))
return
}

options.ClusterName = clusterName
if len(args) == 0 {
exitWithError(fmt.Errorf("specify ID of instance to delete"))
}
if len(args) != 1 {
exitWithError(fmt.Errorf("can only edit one instance at a time!"))
}

options.InstanceID = args[0]

err := RunDeleteInstance(ctx, f, os.Stdout, &options)
if err != nil {
exitWithError(err)
return
}

}

return cmd
}

func RunDeleteInstance(ctx context.Context, f *util.Factory, out io.Writer, options *DeleteInstanceOptions) error {

clientset, err := f.Clientset()
if err != nil {
return err
}

cluster, err := GetCluster(ctx, f, options.ClusterName)
if err != nil {
return err
}

contextName := cluster.ObjectMeta.Name
clientGetter := genericclioptions.NewConfigFlags(true)
clientGetter.Context = &contextName

config, err := clientGetter.ToRESTConfig()
if err != nil {
return fmt.Errorf("cannot load kubecfg settings for %q: %v", contextName, err)
}

var nodes []v1.Node
var k8sClient kubernetes.Interface
if !options.CloudOnly {
k8sClient, err = kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("cannot build kube client for %q: %v", contextName, err)
}

nodeList, err := k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
if err != nil {
fmt.Fprintf(os.Stderr, "Unable to reach the kubernetes API.\n")
fmt.Fprintf(os.Stderr, "Use --cloudonly to do a deletion without confirming progress with the k8s API\n\n")
return fmt.Errorf("error listing nodes in cluster: %v", err)
}

if nodeList != nil {
nodes = nodeList.Items
}
}

list, err := clientset.InstanceGroupsFor(cluster).List(ctx, metav1.ListOptions{})
if err != nil {
return err
}

var instanceGroups []*kopsapi.InstanceGroup
for i := range list.Items {
instanceGroups = append(instanceGroups, &list.Items[i])
}

cloud, err := cloudup.BuildCloud(cluster)
if err != nil {
return err
}

groups, err := cloud.GetCloudGroups(cluster, instanceGroups, false, nodes)
if err != nil {
return err
}

var cloudMember *cloudinstances.CloudInstanceGroupMember
groupsloop:
for _, group := range groups {
for _, r := range group.Ready {
if r.ID == options.InstanceID {
cloudMember = r
break groupsloop
}
}
for _, r := range group.NeedUpdate {
if r.ID == options.InstanceID {
cloudMember = r
break groupsloop
}
}
}

if cloudMember == nil {
return fmt.Errorf("could not find instance %v", options.InstanceID)
}

d := &instancegroups.RollingUpdateCluster{
MasterInterval: 0,
NodeInterval: 0,
BastionInterval: 0,
Interactive: false,
Force: true,
Cloud: cloud,
K8sClient: k8sClient,
FailOnDrainError: options.FailOnDrainError,
FailOnValidate: options.FailOnValidate,
CloudOnly: options.CloudOnly,
ClusterName: options.ClusterName,
PostDrainDelay: options.PostDrainDelay,
ValidationTimeout: options.ValidationTimeout,
ValidateCount: int(options.ValidateCount),
// TODO should we expose this to the UI?
ValidateTickDuration: 30 * time.Second,
ValidateSuccessDuration: 10 * time.Second,
}

var clusterValidator validation.ClusterValidator
if !options.CloudOnly {
clusterValidator, err = validation.NewClusterValidator(cluster, cloud, list, k8sClient)
if err != nil {
return fmt.Errorf("cannot create cluster validator: %v", err)
}
}
d.ClusterValidator = clusterValidator

if options.Detatch {
if cloudMember.CloudInstanceGroup.InstanceGroup.IsMaster() {
return fmt.Errorf("cannot detatch master instances. Run with --detatch=false")

}
err = d.DetachInstance(cloudMember)
if err != nil {
return fmt.Errorf("failed to detach instance: %v", err)
}
if err := d.MaybeValidate(" after detaching instance", d.ValidateCount); err != nil {
return err
}
}

return d.DrainTerminateAndWait(ctx, cloudMember, false, 0)
}
4 changes: 4 additions & 0 deletions docs/cli/kops_delete.md
Original file line number Diff line number Diff line change
@@ -16,6 +16,9 @@ kops delete -f FILENAME [--yes] [flags]
### Examples

```
# Delete an instance
kops delete instance i-0a5ed581b862d3425
# Delete a cluster using a manifest file
kops delete -f my-cluster.yaml
@@ -62,6 +65,7 @@ kops delete -f FILENAME [--yes] [flags]

* [kops](kops.md) - kops is Kubernetes ops.
* [kops delete cluster](kops_delete_cluster.md) - Delete a cluster.
* [kops delete instance](kops_delete_instance.md) -
* [kops delete instancegroup](kops_delete_instancegroup.md) - Delete instancegroup
* [kops delete secret](kops_delete_secret.md) - Delete a secret

52 changes: 52 additions & 0 deletions docs/cli/kops_delete_instance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@

<!--- This file is automatically generated by make gen-cli-docs; changes should be made in the go CLI command code (under cmd/kops) -->

## kops delete instance



### Synopsis



```
kops delete instance [flags]
```

### Options

```
--cloudonly Perform deletion update without confirming progress with k8s
--detatch Detatch the node from the ASG before deletion
--fail-on-drain-error The deletion will fail if draining a node fails. (default true)
--fail-on-validate-error The deletion will fail if the cluster fails to validate. (default true)
-h, --help help for instance
--post-drain-delay duration Time to wait after draining each node (default 5s)
--validate-count int32 Amount of times that a cluster needs to be validated after single node update
--validation-timeout duration Maximum time to wait for a cluster to validate
```

### Options inherited from parent commands

```
--add_dir_header If true, adds the file directory to the header of the log messages
--alsologtostderr log to standard error as well as files
--config string yaml config file (default is $HOME/.kops.yaml)
--log_backtrace_at traceLocation when logging hits line file:N, emit a stack trace (default :0)
--log_dir string If non-empty, write log files in this directory
--log_file string If non-empty, use this log file
--log_file_max_size uint Defines the maximum size a log file can grow to. Unit is megabytes. If the value is 0, the maximum file size is unlimited. (default 1800)
--logtostderr log to standard error instead of files (default true)
--name string Name of cluster. Overrides KOPS_CLUSTER_NAME environment variable
--skip_headers If true, avoid header prefixes in the log messages
--skip_log_headers If true, avoid headers when opening log files
--state string Location of state storage (kops 'config' file). Overrides KOPS_STATE_STORE environment variable
--stderrthreshold severity logs at or above this threshold go to stderr (default 2)
-v, --v Level number for the log level verbosity
--vmodule moduleSpec comma-separated list of pattern=N settings for file-filtered logging
```

### SEE ALSO

* [kops delete](kops_delete.md) - Delete clusters,instancegroups, or secrets.

18 changes: 9 additions & 9 deletions pkg/instancegroups/instancegroups.go
Original file line number Diff line number Diff line change
@@ -88,7 +88,7 @@ func (c *RollingUpdateCluster) rollingUpdateInstanceGroup(ctx context.Context, c

if isBastion {
klog.V(3).Info("Not validating the cluster as instance is a bastion.")
} else if err = c.maybeValidate("", 1); err != nil {
} else if err = c.MaybeValidate("", 1); err != nil {
return err
}

@@ -132,7 +132,7 @@ func (c *RollingUpdateCluster) rollingUpdateInstanceGroup(ctx context.Context, c
for numSurge := 1; numSurge <= maxSurge; numSurge++ {
u := update[len(update)-numSurge]
if !u.Detached {
if err := c.detachInstance(u); err != nil {
if err := c.DetachInstance(u); err != nil {
return err
}

@@ -143,7 +143,7 @@ func (c *RollingUpdateCluster) rollingUpdateInstanceGroup(ctx context.Context, c
klog.Infof("waiting for %v after detaching instance", sleepAfterTerminate)
time.Sleep(sleepAfterTerminate)

if err := c.maybeValidate(" after detaching instance", c.ValidateCount); err != nil {
if err := c.MaybeValidate(" after detaching instance", c.ValidateCount); err != nil {
return err
}
noneReady = false
@@ -161,7 +161,7 @@ func (c *RollingUpdateCluster) rollingUpdateInstanceGroup(ctx context.Context, c

for uIdx, u := range update {
go func(m *cloudinstances.CloudInstanceGroupMember) {
terminateChan <- c.drainTerminateAndWait(ctx, m, isBastion, sleepAfterTerminate)
terminateChan <- c.DrainTerminateAndWait(ctx, m, isBastion, sleepAfterTerminate)
}(u)
runningDrains++

@@ -177,7 +177,7 @@ func (c *RollingUpdateCluster) rollingUpdateInstanceGroup(ctx context.Context, c
return waitForPendingBeforeReturningError(runningDrains, terminateChan, err)
}

err = c.maybeValidate(" after terminating instance", c.ValidateCount)
err = c.MaybeValidate(" after terminating instance", c.ValidateCount)
if err != nil {
return waitForPendingBeforeReturningError(runningDrains, terminateChan, err)
}
@@ -223,7 +223,7 @@ func (c *RollingUpdateCluster) rollingUpdateInstanceGroup(ctx context.Context, c
}
}

err = c.maybeValidate(" after terminating instance", c.ValidateCount)
err = c.MaybeValidate(" after terminating instance", c.ValidateCount)
if err != nil {
return err
}
@@ -320,7 +320,7 @@ func (c *RollingUpdateCluster) patchTaint(ctx context.Context, node *corev1.Node
return err
}

func (c *RollingUpdateCluster) drainTerminateAndWait(ctx context.Context, u *cloudinstances.CloudInstanceGroupMember, isBastion bool, sleepAfterTerminate time.Duration) error {
func (c *RollingUpdateCluster) DrainTerminateAndWait(ctx context.Context, u *cloudinstances.CloudInstanceGroupMember, isBastion bool, sleepAfterTerminate time.Duration) error {
instanceID := u.ID

nodeName := ""
@@ -375,7 +375,7 @@ func (c *RollingUpdateCluster) drainTerminateAndWait(ctx context.Context, u *clo
return nil
}

func (c *RollingUpdateCluster) maybeValidate(operation string, validateCount int) error {
func (c *RollingUpdateCluster) MaybeValidate(operation string, validateCount int) error {
if c.CloudOnly {
klog.Warningf("Not validating cluster as cloudonly flag is set.")

@@ -451,7 +451,7 @@ func (c *RollingUpdateCluster) validateClusterWithTimeout(validateCount int) err
}

// detachInstance detaches a Cloud Instance
func (c *RollingUpdateCluster) detachInstance(u *cloudinstances.CloudInstanceGroupMember) error {
func (c *RollingUpdateCluster) DetachInstance(u *cloudinstances.CloudInstanceGroupMember) error {
id := u.ID
nodeName := ""
if u.Node != nil {
3 changes: 3 additions & 0 deletions upup/pkg/fi/cloudup/awsup/aws_cloud.go
Original file line number Diff line number Diff line change
@@ -477,6 +477,9 @@ func deleteInstance(c AWSCloud, i *cloudinstances.CloudInstanceGroupMember) erro

// DetachInstance causes an aws instance to no longer be counted against the ASG's size limits.
func (c *awsCloudImplementation) DetachInstance(i *cloudinstances.CloudInstanceGroupMember) error {
if i.Detached {
return nil
}
if c.spotinst != nil {
return spotinst.DetachInstance(c.spotinst, i)
}

0 comments on commit 611a052

Please sign in to comment.