Skip to content

Commit

Permalink
Merge pull request #15 from Spellchaser/delete%
Browse files Browse the repository at this point in the history
Delete%
  • Loading branch information
Aergonus authored Jan 3, 2018
2 parents e48eecd + 321d0eb commit 39f4e79
Show file tree
Hide file tree
Showing 11 changed files with 225 additions and 89 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
all: build

ENVVAR = GOOS=linux GOARCH=amd64 CGO_ENABLED=0
TAG = v0.2.0
TAG = v0.2.1

.PHONY: all build container clean

build: clean
build: clean gofmt
$(ENVVAR) go build -o kube-monkey

# Supressing docker build avoids printing the env variables
Expand Down
29 changes: 19 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,15 @@ Opt-in is done by setting the following labels on a Kubernetes k8 app:
killed approximately every third weekday.
**`kube-monkey/identifier`**: A unique identifier for the k8 app (eg. the k8 app's name). This is used to identify the pods
that belong to a k8 app as Pods inherit labels from their k8 app.
**`kube-monkey/kill-all`**: Set this label's value to `"kill-all"` if you want kube-monkey to kill ALL of your pods. Default behavior in the absence of this label is to kill only ONE pod. **Use this label carefully.**
**`kube-monkey/kill-mode`**: Set this label's value to
* `"kill-all"` if you want kube-monkey to kill ALL of your pods regardless of status. Does not require kill-value. Default behavior in the absence of this label is to kill only ONE pod. **Use this label carefully.**
* `fixed` if you want to kill a specific number of running pods with kill-value. If you overspecify, it will kill all running pods and issue a warning.
* `random-max-percent` to specify a maximum % with kill-value that can be killed. At the scheduled time, a uniform random specified % of the running pods will be terminated.
**`kube-monkey/kill-value`**: Specify value for kill-mode
* if `fixed`, provide an integer of pods to kill
* if `random-max-percent`, provide a number from 0-100 to specify the max % of pods kube-monkey can kill


#### Example of opted-in Deployment
#### Example of opted-in Deployment killing one pod per purge

```yaml
---
Expand All @@ -41,6 +46,8 @@ spec:
kube-monkey/enabled: enabled
kube-monkey/identifier: monkey-victim-pods
kube-monkey/mtbf: '2'
kube-monkey/kill-mode: "fixed"
kube-monkey/kill-value: 1
[... omitted ...]
```

Expand All @@ -57,6 +64,8 @@ metadata:
kube-monkey/enabled: enabled
kube-monkey/identifier: monkey-victim
kube-monkey/mtbf: '2'
kube-monkey/kill-mode: "fixed"
kube-monkey/kill-value: 1
spec:
template:
metadata:
Expand All @@ -82,16 +91,16 @@ host="https://your-apiserver-url.com"
#### Scheduling time
Scheduling happens once a day on Weekdays - this is when a schedule for terminations for the current day is generated.
During scheduling, kube-monkey will:
1. Generate a list of eligible k8 apps (k8 apps that have opted-in and are not blacklisted)
1. Generate a list of eligible k8 apps (k8 apps that have opted-in and are not blacklisted, if specified, and are whitelisted, if specified)
2. For each eligible k8 app, flip a biased coin (bias determined by `kube-monkey/mtbf`) to determine if a pod for that k8 app should be killed today
3. For each victim, calculate a random time when a pod will be killed

#### Termination time
This is the randomly generated time during the day when a victim k8 app will have a pod killed.
At termination time, kube-monkey will:
1. Check if the k8 app is still eligible (has not opted-out or been blacklisted since scheduling)
2. Get a list of running pods for the k8 app
3. Select one random pod and delete it
This is the randomly generated time during the day when a victim k8 app will have a pod killed.
At termination time, kube-monkey will:
1. Check if the k8 app is still eligible (has not opted-out or been blacklisted or removed from the whitelist since scheduling)
2. Check if the k8 app has updated kill-mode and kill-value
3. Depending on kill-mode and kill-value, execute pods

## Building

Expand Down Expand Up @@ -123,7 +132,7 @@ time_zone = "America/New_York" # Set tzdata timezone example. Note the

1. First deploy the expected `kube-monkey-config-map` configmap in the namespace you intend to run kube-monkey in (for example, the `kube-system` namespace). Make sure to define the keyname as `config.toml`

> For example `kubectl create configmap km-config --from-file=config.toml=km-config.toml`
> For example `kubectl create configmap km-config --from-file=config.toml=km-config.toml` or `kubectl apply -f km-config.yaml`
2. Run kube-monkey as a k8 app within the Kubernetes cluster, in a namespace that has permissions to kill Pods in other namespaces (eg. `kube-system`).

Expand Down
58 changes: 26 additions & 32 deletions chaos/chaos.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package chaos

import (
"fmt"
"math/rand"
"time"

"github.com/golang/glog"

"github.com/asobti/kube-monkey/config"
"github.com/asobti/kube-monkey/kubernetes"
"github.com/asobti/kube-monkey/victims"

Expand Down Expand Up @@ -64,6 +66,7 @@ func (c *Chaos) Execute(resultchan chan<- *ChaosResult) {
err = c.terminate(clientset)
if err != nil {
resultchan <- c.NewResult(err)
return
}

// Send a success msg
Expand Down Expand Up @@ -96,53 +99,44 @@ func (c *Chaos) verifyExecution(clientset *kube.Clientset) error {
return nil
}

// The termination type and termination of pods happens here
// The termination type and value is processed here
func (c *Chaos) terminate(clientset *kube.Clientset) error {
// Do the termination
killAll, err := c.Victim().HasKillAll(clientset)
killType, err := c.Victim().KillType(clientset)
if err != nil {
glog.Errorf("Failed to check KillType label for %s %s. Proceeding with termination of a single pod. Error: %v", c.Victim().Kind(), c.Victim().Name(), err.Error())
return c.terminatePod(clientset)
}
if killType == config.KillAllLabelValue {
return c.Victim().TerminateAllPods(clientset)
}

killValue, err := c.Victim().KillValue(clientset)
if err != nil {
glog.Errorf("Failed to check KillAll label for %s %s. Proceeding with termination of a single pod. Error: %v", c.Victim().Kind(), c.Victim().Name(), err.Error())
glog.Errorf("Failed to check KillValue label for %s %s. Proceeding with termination of a single pod. Error: %v", c.Victim().Kind(), c.Victim().Name(), err.Error())
return c.terminatePod(clientset)
}

if killAll {
err = c.terminateAll(clientset)
} else {
err = c.terminatePod(clientset)
// Validate killtype
switch killType {
case config.KillFixedLabelValue:
return c.Victim().DeleteRandomPods(clientset, killValue)
case config.KillRandomLabelValue:
r := rand.New(rand.NewSource(time.Now().UnixNano()))
return c.Victim().DeleteRandomPods(clientset, killValue*100/r.Intn(101))
default:
return fmt.Errorf("Failed to recognize KillValue label for %s %s. Error: %v", c.Victim().Kind(), c.Victim().Name(), err.Error())
}

// Send back termination success
return nil
}

// Redundant for DeleteRandomPods(clientset,1) but DeleteRandomPod is faster
// Terminates one random pod
func (c *Chaos) terminatePod(clientset *kube.Clientset) error {
return c.Victim().DeleteRandomPod(clientset)
}

// Terminates ALL pods for the victim
// Not the default, or recommended, behavior
func (c *Chaos) terminateAll(clientset *kube.Clientset) error {
glog.V(1).Infof("Terminating ALL pods for %s %s\n", c.Victim().Kind(), c.Victim().Name())

pods, err := c.Victim().Pods(clientset)
if err != nil {
return err
}

if len(pods) == 0 {
return fmt.Errorf("%s %s has no pods at the moment", c.Victim().Kind(), c.Victim().Name())
}

for _, pod := range pods {
// In case of error, log it and move on to next pod
if err = c.Victim().DeletePod(clientset, pod.Name); err != nil {
glog.Errorf("Failed to delete pod %s for %s %s", pod.Name, c.Victim().Kind(), c.Victim().Name())
}
}

return nil
}

// Create a ChaosResult instance
func (c *Chaos) NewResult(e error) *ChaosResult {
return &ChaosResult{
Expand Down
15 changes: 9 additions & 6 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@ const (
// Currently, there does not appear to be
// any value in making these configurable
// so defining them as consts
IdentLabelKey = "kube-monkey/identifier"
EnabledLabelKey = "kube-monkey/enabled"
EnabledLabelValue = "enabled"
MtbfLabelKey = "kube-monkey/mtbf"
KillAllLabelKey = "kube-monkey/kill-all"
KillAllLabelValue = "kill-all"
IdentLabelKey = "kube-monkey/identifier"
EnabledLabelKey = "kube-monkey/enabled"
EnabledLabelValue = "enabled"
MtbfLabelKey = "kube-monkey/mtbf"
KillTypeLabelKey = "kube-monkey/kill-mode"
KillValueLabelKey = "kube-monkey/kill-value"
KillRandomLabelValue = "random-max-percent"
KillFixedLabelValue = "fixed"
KillAllLabelValue = "kill-all"
)

func SetDefaults() {
Expand Down
2 changes: 1 addition & 1 deletion kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@ func NewInClusterClient() (*kube.Clientset, error) {
func VerifyClient(client *kube.Clientset) bool {
_, err := client.ServerVersion()
return err == nil
}
}
2 changes: 1 addition & 1 deletion victims/factory/deployments/deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func New(dep *v1beta1.Deployment) (*Deployment, error) {
if err != nil {
return nil, err
}
kind := fmt.Sprintf("%T", dep)
kind := fmt.Sprintf("%T", *dep)

return &Deployment{victims.New(kind, dep.Name, dep.Namespace, ident, mtbf)}, nil
}
Expand Down
37 changes: 32 additions & 5 deletions victims/factory/deployments/eligible_deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package deployments
//All these functions require api access specific to the version of the app

import (
"fmt"
"strconv"

"github.com/golang/glog"

"github.com/asobti/kube-monkey/config"
Expand Down Expand Up @@ -51,13 +54,37 @@ func (d *Deployment) IsEnrolled(clientset *kube.Clientset) (bool, error) {
return deployment.Labels[config.EnabledLabelKey] == config.EnabledLabelValue, nil
}

// Checks if the deployment is flagged for killall at this time
func (d *Deployment) HasKillAll(clientset *kube.Clientset) (bool, error) {
// Returns current killtype config label for update
func (d *Deployment) KillType(clientset *kube.Clientset) (string, error) {
deployment, err := clientset.ExtensionsV1beta1().Deployments(d.Namespace()).Get(d.Name(), metav1.GetOptions{})
if err != nil {
// Ran into some error: return 'false' for killAll to be safe
return false, nil
return "", err
}

killType, ok := deployment.Labels[config.KillTypeLabelKey]
if !ok {
return "", fmt.Errorf("%s %s does not have %s label", d.Kind(), d.Name(), config.KillTypeLabelKey)
}

return killType, nil
}

// Returns current killvalue config label for update
func (d *Deployment) KillValue(clientset *kube.Clientset) (int, error) {
deployment, err := clientset.ExtensionsV1beta1().Deployments(d.Namespace()).Get(d.Name(), metav1.GetOptions{})
if err != nil {
return -1, err
}

killMode, ok := deployment.Labels[config.KillValueLabelKey]
if !ok {
return -1, fmt.Errorf("%s %s does not have %s label", d.Kind(), d.Name(), config.KillValueLabelKey)
}

killModeInt, err := strconv.Atoi(killMode)
if !(killModeInt > 0) {
return -1, fmt.Errorf("Invalid value for label %s: %d", config.KillValueLabelKey, killModeInt)
}

return deployment.Labels[config.KillAllLabelKey] == config.KillAllLabelValue, nil
return killModeInt, nil
}
36 changes: 18 additions & 18 deletions victims/factory/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,25 @@ func EligibleVictims() (eligibleVictims []victims.Victim, err error) {
return nil, err
}

for _, namespace := range config.WhitelistedNamespaces().UnsortedList() {
// Fetch deployments
deployments, err := deployments.EligibleDeployments(clientset, namespace, filter)
if err != nil {
//allow pass through to schedule other kinds and namespaces
glog.Warningf("Failed to fetch eligible deployments for namespace %s due to error: %s", namespace, err.Error())
continue
}
eligibleVictims = append(eligibleVictims, deployments...)
for _, namespace := range config.WhitelistedNamespaces().UnsortedList() {
// Fetch deployments
deployments, err := deployments.EligibleDeployments(clientset, namespace, filter)
if err != nil {
//allow pass through to schedule other kinds and namespaces
glog.Warningf("Failed to fetch eligible deployments for namespace %s due to error: %s", namespace, err.Error())
continue
}
eligibleVictims = append(eligibleVictims, deployments...)

// Fetch statefulsets
statefulsets, err := statefulsets.EligibleStatefulSets(clientset, namespace, filter)
if err != nil {
//allow pass through to schedule other kinds and namespaces
glog.Warningf("Failed to fetch eligible statefulsets for namespace %s due to error: %s", namespace, err.Error())
continue
}
eligibleVictims = append(eligibleVictims, statefulsets...)
}
// Fetch statefulsets
statefulsets, err := statefulsets.EligibleStatefulSets(clientset, namespace, filter)
if err != nil {
//allow pass through to schedule other kinds and namespaces
glog.Warningf("Failed to fetch eligible statefulsets for namespace %s due to error: %s", namespace, err.Error())
continue
}
eligibleVictims = append(eligibleVictims, statefulsets...)
}

return
}
Expand Down
45 changes: 36 additions & 9 deletions victims/factory/statefulsets/eligible_statefulsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package statefulsets
//All these functions require api access specific to the version of the app

import (
"fmt"
"strconv"

"github.com/golang/glog"

"github.com/asobti/kube-monkey/config"
Expand All @@ -15,7 +18,7 @@ import (

// Get all eligible statefulsets that opted in (filtered by config.EnabledLabel)
func EligibleStatefulSets(clientset *kube.Clientset, namespace string, filter *metav1.ListOptions) (eligVictims []victims.Victim, err error) {
enabledVictims, err := clientset.AppsV1beta1().StatefulSets(namespace).List(*filter)
enabledVictims, err := clientset.AppsV1beta1().StatefulSets(namespace).List(*filter)
if err != nil {
return nil, err
}
Expand All @@ -27,9 +30,9 @@ func EligibleStatefulSets(clientset *kube.Clientset, namespace string, filter *m
continue
}

// TODO: After generating whitelisting ns list, this will move to factory.
// IsBlacklisted will change to something like IsAllowedNamespace
// and will only be used to verify at time of scheduled execution
// TODO: After generating whitelisting ns list, this will move to factory.
// IsBlacklisted will change to something like IsAllowedNamespace
// and will only be used to verify at time of scheduled execution
if victim.IsBlacklisted() {
continue
}
Expand All @@ -51,13 +54,37 @@ func (ss *StatefulSet) IsEnrolled(clientset *kube.Clientset) (bool, error) {
return statefulset.Labels[config.EnabledLabelKey] == config.EnabledLabelValue, nil
}

// Checks if the statefulset is flagged for killall at this time
func (ss *StatefulSet) HasKillAll(clientset *kube.Clientset) (bool, error) {
// Returns current killtype config label for update
func (ss *StatefulSet) KillType(clientset *kube.Clientset) (string, error) {
statefulset, err := clientset.AppsV1beta1().StatefulSets(ss.Namespace()).Get(ss.Name(), metav1.GetOptions{})
if err != nil {
// Ran into some error: return 'false' for killAll to be safe
return false, nil
return "", err
}

killType, ok := statefulset.Labels[config.KillTypeLabelKey]
if !ok {
return "", fmt.Errorf("%s %s does not have %s label", ss.Kind(), ss.Name(), config.KillTypeLabelKey)
}

return killType, nil
}

// Returns current killvalue config label for update
func (ss *StatefulSet) KillValue(clientset *kube.Clientset) (int, error) {
statefulset, err := clientset.AppsV1beta1().StatefulSets(ss.Namespace()).Get(ss.Name(), metav1.GetOptions{})
if err != nil {
return -1, err
}

killMode, ok := statefulset.Labels[config.KillValueLabelKey]
if !ok {
return -1, fmt.Errorf("%s %s does not have %s label", ss.Kind(), ss.Name(), config.KillValueLabelKey)
}

killModeInt, err := strconv.Atoi(killMode)
if !(killModeInt > 0) {
return -1, fmt.Errorf("Invalid value for label %s: %d", config.KillValueLabelKey, killModeInt)
}

return statefulset.Labels[config.KillAllLabelKey] == config.KillAllLabelValue, nil
return killModeInt, nil
}
2 changes: 1 addition & 1 deletion victims/factory/statefulsets/statefulsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func New(ss *v1beta1.StatefulSet) (*StatefulSet, error) {
if err != nil {
return nil, err
}
kind := fmt.Sprintf("%T", ss)
kind := fmt.Sprintf("%T", *ss)

return &StatefulSet{victims.New(kind, ss.Name, ss.Namespace, ident, mtbf)}, nil
}
Expand Down
Loading

0 comments on commit 39f4e79

Please sign in to comment.