Merge pull request #15 from Spellchaser/delete%

Delete%
asobti · Jan 3, 2018 · 39f4e79 · 39f4e79
2 parents e48eecd + 321d0eb
commit 39f4e79
Show file tree

Hide file tree

Showing 11 changed files with 225 additions and 89 deletions.
diff --git a/Makefile b/Makefile
@@ -1,11 +1,11 @@
 all: build
 
 ENVVAR = GOOS=linux GOARCH=amd64 CGO_ENABLED=0
-TAG = v0.2.0
+TAG = v0.2.1
 
 .PHONY: all build container clean
 
-build: clean
+build: clean gofmt
 	$(ENVVAR) go build -o kube-monkey
 
 # Supressing docker build avoids printing the env variables

diff --git a/README.md b/README.md
@@ -22,10 +22,15 @@ Opt-in is done by setting the following labels on a Kubernetes k8 app:
 killed approximately every third weekday.  
 **`kube-monkey/identifier`**: A unique identifier for the k8 app (eg. the k8 app's name). This is used to identify the pods 
 that belong to a k8 app as Pods inherit labels from their k8 app.  
-**`kube-monkey/kill-all`**: Set this label's value to `"kill-all"` if you want kube-monkey to kill ALL of your pods. Default behavior in the absence of this label is to kill only ONE pod. **Use this label carefully.**
+**`kube-monkey/kill-mode`**: Set this label's value to  
+* `"kill-all"` if you want kube-monkey to kill ALL of your pods regardless of status. Does not require kill-value. Default behavior in the absence of this label is to kill only ONE pod. **Use this label carefully.**
+* `fixed` if you want to kill a specific number of running pods with kill-value. If you overspecify, it will kill all running pods and issue a warning.
+* `random-max-percent` to specify a maximum % with kill-value that can be killed. At the scheduled time, a uniform random specified % of the running pods will be terminated.
+**`kube-monkey/kill-value`**: Specify value for kill-mode
+* if `fixed`, provide an integer of pods to kill
+* if `random-max-percent`, provide a number from 0-100 to specify the max % of pods kube-monkey can kill
 
-
-#### Example of opted-in Deployment
+#### Example of opted-in Deployment killing one pod per purge
 
 ```yaml
 ---
@@ -41,6 +46,8 @@ spec:
         kube-monkey/enabled: enabled
         kube-monkey/identifier: monkey-victim-pods
         kube-monkey/mtbf: '2'
+        kube-monkey/kill-mode: "fixed"
+        kube-monkey/kill-value: 1
 [... omitted ...]
 ```
 
@@ -57,6 +64,8 @@ metadata:
     kube-monkey/enabled: enabled
     kube-monkey/identifier: monkey-victim
     kube-monkey/mtbf: '2'
+    kube-monkey/kill-mode: "fixed"
+    kube-monkey/kill-value: 1
 spec:
   template:
     metadata:
@@ -82,16 +91,16 @@ host="https://your-apiserver-url.com"
 #### Scheduling time
 Scheduling happens once a day on Weekdays - this is when a schedule for terminations for the current day is generated.   
 During scheduling, kube-monkey will:  
-1. Generate a list of eligible k8 apps (k8 apps that have opted-in and are not blacklisted)  
+1. Generate a list of eligible k8 apps (k8 apps that have opted-in and are not blacklisted, if specified, and are whitelisted, if specified)
 2. For each eligible k8 app, flip a biased coin (bias determined by `kube-monkey/mtbf`) to determine if a pod for that k8 app should be killed today  
 3. For each victim, calculate a random time when a pod will be killed
 
 #### Termination time
-This is the randomly generated time during the day when a victim k8 app will have a pod killed.   
-At termination time, kube-monkey will:  
-1. Check if the k8 app is still eligible (has not opted-out or been blacklisted since scheduling)  
-2. Get a list of running pods for the k8 app  
-3. Select one random pod and delete it  
+This is the randomly generated time during the day when a victim k8 app will have a pod killed.
+At termination time, kube-monkey will:
+1. Check if the k8 app is still eligible (has not opted-out or been blacklisted or removed from the whitelist since scheduling)
+2. Check if the k8 app has updated kill-mode and kill-value
+3. Depending on kill-mode and kill-value, execute pods
 
 ## Building
 
@@ -123,7 +132,7 @@ time_zone = "America/New_York"           # Set tzdata timezone example. Note the
 
 1. First deploy the expected `kube-monkey-config-map` configmap in the namespace you intend to run kube-monkey in (for example, the `kube-system` namespace). Make sure to define the keyname as `config.toml` 
 
-> For example `kubectl create configmap km-config --from-file=config.toml=km-config.toml`
+> For example `kubectl create configmap km-config --from-file=config.toml=km-config.toml` or `kubectl apply -f km-config.yaml`
 
 2. Run kube-monkey as a k8 app within the Kubernetes cluster, in a namespace that has permissions to kill Pods in other namespaces (eg. `kube-system`).
 

diff --git a/chaos/chaos.go b/chaos/chaos.go
@@ -2,10 +2,12 @@ package chaos
 
 import (
 	"fmt"
+	"math/rand"
 	"time"
 
 	"github.com/golang/glog"
 
+	"github.com/asobti/kube-monkey/config"
 	"github.com/asobti/kube-monkey/kubernetes"
 	"github.com/asobti/kube-monkey/victims"
 
@@ -64,6 +66,7 @@ func (c *Chaos) Execute(resultchan chan<- *ChaosResult) {
 	err = c.terminate(clientset)
 	if err != nil {
 		resultchan <- c.NewResult(err)
+		return
 	}
 
 	// Send a success msg
@@ -96,53 +99,44 @@ func (c *Chaos) verifyExecution(clientset *kube.Clientset) error {
 	return nil
 }
 
-// The termination type and termination of pods happens here
+// The termination type and value is processed here
 func (c *Chaos) terminate(clientset *kube.Clientset) error {
-	// Do the termination
-	killAll, err := c.Victim().HasKillAll(clientset)
+	killType, err := c.Victim().KillType(clientset)
+	if err != nil {
+		glog.Errorf("Failed to check KillType label for %s %s. Proceeding with termination of a single pod. Error: %v", c.Victim().Kind(), c.Victim().Name(), err.Error())
+		return c.terminatePod(clientset)
+	}
+	if killType == config.KillAllLabelValue {
+		return c.Victim().TerminateAllPods(clientset)
+	}
+
+	killValue, err := c.Victim().KillValue(clientset)
 	if err != nil {
-		glog.Errorf("Failed to check KillAll label for %s %s. Proceeding with termination of a single pod. Error: %v", c.Victim().Kind(), c.Victim().Name(), err.Error())
+		glog.Errorf("Failed to check KillValue label for %s %s. Proceeding with termination of a single pod. Error: %v", c.Victim().Kind(), c.Victim().Name(), err.Error())
+		return c.terminatePod(clientset)
 	}
 
-	if killAll {
-		err = c.terminateAll(clientset)
-	} else {
-		err = c.terminatePod(clientset)
+	// Validate killtype
+	switch killType {
+	case config.KillFixedLabelValue:
+		return c.Victim().DeleteRandomPods(clientset, killValue)
+	case config.KillRandomLabelValue:
+		r := rand.New(rand.NewSource(time.Now().UnixNano()))
+		return c.Victim().DeleteRandomPods(clientset, killValue*100/r.Intn(101))
+	default:
+		return fmt.Errorf("Failed to recognize KillValue label for %s %s. Error: %v", c.Victim().Kind(), c.Victim().Name(), err.Error())
 	}
 
 	// Send back termination success
 	return nil
 }
 
+// Redundant for DeleteRandomPods(clientset,1) but DeleteRandomPod is faster
 // Terminates one random pod
 func (c *Chaos) terminatePod(clientset *kube.Clientset) error {
 	return c.Victim().DeleteRandomPod(clientset)
 }
 
-// Terminates ALL pods for the victim
-// Not the default, or recommended, behavior
-func (c *Chaos) terminateAll(clientset *kube.Clientset) error {
-	glog.V(1).Infof("Terminating ALL pods for %s %s\n", c.Victim().Kind(), c.Victim().Name())
-
-	pods, err := c.Victim().Pods(clientset)
-	if err != nil {
-		return err
-	}
-
-	if len(pods) == 0 {
-		return fmt.Errorf("%s %s has no pods at the moment", c.Victim().Kind(), c.Victim().Name())
-	}
-
-	for _, pod := range pods {
-		// In case of error, log it and move on to next pod
-		if err = c.Victim().DeletePod(clientset, pod.Name); err != nil {
-			glog.Errorf("Failed to delete pod %s for %s %s", pod.Name, c.Victim().Kind(), c.Victim().Name())
-		}
-	}
-
-	return nil
-}
-
 // Create a ChaosResult instance
 func (c *Chaos) NewResult(e error) *ChaosResult {
 	return &ChaosResult{

diff --git a/config/config.go b/config/config.go
@@ -21,12 +21,15 @@ const (
 	// Currently, there does not appear to be
 	// any value in making these configurable
 	// so defining them as consts
-	IdentLabelKey     = "kube-monkey/identifier"
-	EnabledLabelKey   = "kube-monkey/enabled"
-	EnabledLabelValue = "enabled"
-	MtbfLabelKey      = "kube-monkey/mtbf"
-	KillAllLabelKey   = "kube-monkey/kill-all"
-	KillAllLabelValue = "kill-all"
+	IdentLabelKey        = "kube-monkey/identifier"
+	EnabledLabelKey      = "kube-monkey/enabled"
+	EnabledLabelValue    = "enabled"
+	MtbfLabelKey         = "kube-monkey/mtbf"
+	KillTypeLabelKey     = "kube-monkey/kill-mode"
+	KillValueLabelKey    = "kube-monkey/kill-value"
+	KillRandomLabelValue = "random-max-percent"
+	KillFixedLabelValue  = "fixed"
+	KillAllLabelValue    = "kill-all"
 )
 
 func SetDefaults() {

diff --git a/kubernetes/kubernetes.go b/kubernetes/kubernetes.go
@@ -54,4 +54,4 @@ func NewInClusterClient() (*kube.Clientset, error) {
 func VerifyClient(client *kube.Clientset) bool {
 	_, err := client.ServerVersion()
 	return err == nil
-}
+}
diff --git a/victims/factory/deployments/deployments.go b/victims/factory/deployments/deployments.go
@@ -24,7 +24,7 @@ func New(dep *v1beta1.Deployment) (*Deployment, error) {
 	if err != nil {
 		return nil, err
 	}
-	kind := fmt.Sprintf("%T", dep)
+	kind := fmt.Sprintf("%T", *dep)
 
 	return &Deployment{victims.New(kind, dep.Name, dep.Namespace, ident, mtbf)}, nil
 }

diff --git a/victims/factory/deployments/eligible_deployments.go b/victims/factory/deployments/eligible_deployments.go
@@ -3,6 +3,9 @@ package deployments
 //All these functions require api access specific to the version of the app
 
 import (
+	"fmt"
+	"strconv"
+
 	"github.com/golang/glog"
 
 	"github.com/asobti/kube-monkey/config"
@@ -51,13 +54,37 @@ func (d *Deployment) IsEnrolled(clientset *kube.Clientset) (bool, error) {
 	return deployment.Labels[config.EnabledLabelKey] == config.EnabledLabelValue, nil
 }
 
-// Checks if the deployment is flagged for killall at this time
-func (d *Deployment) HasKillAll(clientset *kube.Clientset) (bool, error) {
+// Returns current killtype config label for update
+func (d *Deployment) KillType(clientset *kube.Clientset) (string, error) {
 	deployment, err := clientset.ExtensionsV1beta1().Deployments(d.Namespace()).Get(d.Name(), metav1.GetOptions{})
 	if err != nil {
-		// Ran into some error: return 'false' for killAll to be safe
-		return false, nil
+		return "", err
+	}
+
+	killType, ok := deployment.Labels[config.KillTypeLabelKey]
+	if !ok {
+		return "", fmt.Errorf("%s %s does not have %s label", d.Kind(), d.Name(), config.KillTypeLabelKey)
+	}
+
+	return killType, nil
+}
+
+// Returns current killvalue config label for update
+func (d *Deployment) KillValue(clientset *kube.Clientset) (int, error) {
+	deployment, err := clientset.ExtensionsV1beta1().Deployments(d.Namespace()).Get(d.Name(), metav1.GetOptions{})
+	if err != nil {
+		return -1, err
+	}
+
+	killMode, ok := deployment.Labels[config.KillValueLabelKey]
+	if !ok {
+		return -1, fmt.Errorf("%s %s does not have %s label", d.Kind(), d.Name(), config.KillValueLabelKey)
+	}
+
+	killModeInt, err := strconv.Atoi(killMode)
+	if !(killModeInt > 0) {
+		return -1, fmt.Errorf("Invalid value for label %s: %d", config.KillValueLabelKey, killModeInt)
 	}
 
-	return deployment.Labels[config.KillAllLabelKey] == config.KillAllLabelValue, nil
+	return killModeInt, nil
 }
diff --git a/victims/factory/factory.go b/victims/factory/factory.go
@@ -36,25 +36,25 @@ func EligibleVictims() (eligibleVictims []victims.Victim, err error) {
 		return nil, err
 	}
 
-        for _, namespace := range config.WhitelistedNamespaces().UnsortedList() {
-                // Fetch deployments
-                deployments, err := deployments.EligibleDeployments(clientset, namespace, filter)
-                if err != nil {
-                        //allow pass through to schedule other kinds and namespaces
-                        glog.Warningf("Failed to fetch eligible deployments for namespace %s due to error: %s", namespace, err.Error())
-                        continue
-                }
-                eligibleVictims = append(eligibleVictims, deployments...)
+	for _, namespace := range config.WhitelistedNamespaces().UnsortedList() {
+		// Fetch deployments
+		deployments, err := deployments.EligibleDeployments(clientset, namespace, filter)
+		if err != nil {
+			//allow pass through to schedule other kinds and namespaces
+			glog.Warningf("Failed to fetch eligible deployments for namespace %s due to error: %s", namespace, err.Error())
+			continue
+		}
+		eligibleVictims = append(eligibleVictims, deployments...)
 
-                // Fetch statefulsets
-                statefulsets, err := statefulsets.EligibleStatefulSets(clientset, namespace, filter)
-                if err != nil {
-                        //allow pass through to schedule other kinds and namespaces
-                        glog.Warningf("Failed to fetch eligible statefulsets for namespace %s due to error: %s", namespace, err.Error())
-                        continue
-                }
-                eligibleVictims = append(eligibleVictims, statefulsets...)
-        }
+		// Fetch statefulsets
+		statefulsets, err := statefulsets.EligibleStatefulSets(clientset, namespace, filter)
+		if err != nil {
+			//allow pass through to schedule other kinds and namespaces
+			glog.Warningf("Failed to fetch eligible statefulsets for namespace %s due to error: %s", namespace, err.Error())
+			continue
+		}
+		eligibleVictims = append(eligibleVictims, statefulsets...)
+	}
 
 	return
 }

diff --git a/victims/factory/statefulsets/eligible_statefulsets.go b/victims/factory/statefulsets/eligible_statefulsets.go
@@ -3,6 +3,9 @@ package statefulsets
 //All these functions require api access specific to the version of the app
 
 import (
+	"fmt"
+	"strconv"
+
 	"github.com/golang/glog"
 
 	"github.com/asobti/kube-monkey/config"
@@ -15,7 +18,7 @@ import (
 
 // Get all eligible statefulsets that opted in (filtered by config.EnabledLabel)
 func EligibleStatefulSets(clientset *kube.Clientset, namespace string, filter *metav1.ListOptions) (eligVictims []victims.Victim, err error) {
-        enabledVictims, err := clientset.AppsV1beta1().StatefulSets(namespace).List(*filter)
+	enabledVictims, err := clientset.AppsV1beta1().StatefulSets(namespace).List(*filter)
 	if err != nil {
 		return nil, err
 	}
@@ -27,9 +30,9 @@ func EligibleStatefulSets(clientset *kube.Clientset, namespace string, filter *m
 			continue
 		}
 
-                // TODO: After generating whitelisting ns list, this will move to factory.
-                // IsBlacklisted will change to something like IsAllowedNamespace
-                // and will only be used to verify at time of scheduled execution
+		// TODO: After generating whitelisting ns list, this will move to factory.
+		// IsBlacklisted will change to something like IsAllowedNamespace
+		// and will only be used to verify at time of scheduled execution
 		if victim.IsBlacklisted() {
 			continue
 		}
@@ -51,13 +54,37 @@ func (ss *StatefulSet) IsEnrolled(clientset *kube.Clientset) (bool, error) {
 	return statefulset.Labels[config.EnabledLabelKey] == config.EnabledLabelValue, nil
 }
 
-// Checks if the statefulset is flagged for killall at this time
-func (ss *StatefulSet) HasKillAll(clientset *kube.Clientset) (bool, error) {
+// Returns current killtype config label for update
+func (ss *StatefulSet) KillType(clientset *kube.Clientset) (string, error) {
 	statefulset, err := clientset.AppsV1beta1().StatefulSets(ss.Namespace()).Get(ss.Name(), metav1.GetOptions{})
 	if err != nil {
-		// Ran into some error: return 'false' for killAll to be safe
-		return false, nil
+		return "", err
+	}
+
+	killType, ok := statefulset.Labels[config.KillTypeLabelKey]
+	if !ok {
+		return "", fmt.Errorf("%s %s does not have %s label", ss.Kind(), ss.Name(), config.KillTypeLabelKey)
+	}
+
+	return killType, nil
+}
+
+// Returns current killvalue config label for update
+func (ss *StatefulSet) KillValue(clientset *kube.Clientset) (int, error) {
+	statefulset, err := clientset.AppsV1beta1().StatefulSets(ss.Namespace()).Get(ss.Name(), metav1.GetOptions{})
+	if err != nil {
+		return -1, err
+	}
+
+	killMode, ok := statefulset.Labels[config.KillValueLabelKey]
+	if !ok {
+		return -1, fmt.Errorf("%s %s does not have %s label", ss.Kind(), ss.Name(), config.KillValueLabelKey)
+	}
+
+	killModeInt, err := strconv.Atoi(killMode)
+	if !(killModeInt > 0) {
+		return -1, fmt.Errorf("Invalid value for label %s: %d", config.KillValueLabelKey, killModeInt)
 	}
 
-	return statefulset.Labels[config.KillAllLabelKey] == config.KillAllLabelValue, nil
+	return killModeInt, nil
 }
diff --git a/victims/factory/statefulsets/statefulsets.go b/victims/factory/statefulsets/statefulsets.go
@@ -24,7 +24,7 @@ func New(ss *v1beta1.StatefulSet) (*StatefulSet, error) {
 	if err != nil {
 		return nil, err
 	}
-	kind := fmt.Sprintf("%T", ss)
+	kind := fmt.Sprintf("%T", *ss)
 
 	return &StatefulSet{victims.New(kind, ss.Name, ss.Namespace, ident, mtbf)}, nil
 }