Skip to content
This repository has been archived by the owner on Oct 23, 2024. It is now read-only.

Commit

Permalink
Repair cassandra nodes per pod using the 'REPAIR_POD' parameter (#77)
Browse files Browse the repository at this point in the history
When updating the 'REPAIR_POD' parameter a plan will be triggered that runs 'nodetool repair' for the pod specified in the parameter.

Signed-off-by: Jan Schlicht <[email protected]>
  • Loading branch information
Jan Schlicht authored Apr 14, 2020
1 parent d0cbda1 commit 535fbd6
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 9 deletions.
15 changes: 15 additions & 0 deletions operator/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ tasks:
parameter: SERVICE_ACCOUNT_INSTALL
resources:
- node-resolver-rbac.yaml
- name: repair-pod
kind: Apply
spec:
resources:
- repair-job-rbac.yaml
- repair-job.yaml
plans:
deploy:
strategy: serial
Expand All @@ -54,3 +60,12 @@ plans:
tasks:
- node
- ext-service
repair-pod:
strategy: serial
phases:
- name: nodes
strategy: parallel
steps:
- name: repair
tasks:
- repair-pod
9 changes: 9 additions & 0 deletions operator/params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -892,3 +892,12 @@ parameters:
- name: POD_MANAGEMENT_POLICY
description: "podManagementPolicy of the Cassandra Statefulset"
default: "OrderedReady"

################################################################################
################################ Repair options ################################
################################################################################

- name: REPAIR_POD
description: "Name of the pod on which 'nodetool repair' should be run."
default: ""
trigger: repair-pod
31 changes: 31 additions & 0 deletions operator/templates/repair-job-rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ .Name }}-node-repair-role
namespace: {{ .Namespace }}
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get"]
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["create"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ .Name }}-node-repairer
namespace: {{ .Namespace }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ .Name }}-node-repairer-{{ .Namespace }}-binding
subjects:
- kind: ServiceAccount
name: {{ .Name }}-node-repairer
namespace: {{ .Namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ .Name }}-node-repair-role
18 changes: 18 additions & 0 deletions operator/templates/repair-job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ $.Name }}-node-repair-job
namespace: {{ $.Namespace }}
labels:
cassandra: {{ $.OperatorName }}
app: {{ $.Name }}
spec:
template:
spec:
containers:
- name: repair-job
image: bitnami/kubectl:1.18.0
command: [ "kubectl", "exec", "{{ $.Params.REPAIR_POD }}", "--", "nodetool", "repair" ]
restartPolicy: Never
serviceAccountName: {{ .Name }}-node-repairer
15 changes: 15 additions & 0 deletions templates/operator/operator.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ tasks:
parameter: SERVICE_ACCOUNT_INSTALL
resources:
- node-resolver-rbac.yaml
- name: repair-pod
kind: Apply
spec:
resources:
- repair-job-rbac.yaml
- repair-job.yaml
plans:
deploy:
strategy: serial
Expand All @@ -54,3 +60,12 @@ plans:
tasks:
- node
- ext-service
repair-pod:
strategy: serial
phases:
- name: nodes
strategy: parallel
steps:
- name: repair
tasks:
- repair-pod
9 changes: 9 additions & 0 deletions templates/operator/params.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -892,3 +892,12 @@ parameters:
- name: POD_MANAGEMENT_POLICY
description: "podManagementPolicy of the Cassandra Statefulset"
default: "OrderedReady"

################################################################################
################################ Repair options ################################
################################################################################

- name: REPAIR_POD
description: "Name of the pod on which 'nodetool repair' should be run."
default: ""
trigger: repair-pod
49 changes: 40 additions & 9 deletions tests/cassandra/cassandra.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func OverrideOperatorVersion(
return operatorVersion, desiredOperatorVersion, nil
}

func firstPodName(instance kudo.Instance) (string, error) {
func FirstPodName(instance kudo.Instance) (string, error) {
if instance.Spec.Parameters["NODE_TOPOLOGY"] != "" {
topology, err := TopologyFromYaml(instance.Spec.Parameters["NODE_TOPOLOGY"])
if err != nil {
Expand All @@ -100,7 +100,7 @@ func firstPodName(instance kudo.Instance) (string, error) {
}

func Nodes(client client.Client, instance kudo.Instance) ([]map[string]string, error) {
podName, err := firstPodName(instance)
podName, err := FirstPodName(instance)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -180,7 +180,7 @@ func Nodes(client client.Client, instance kudo.Instance) ([]map[string]string, e

// Cqlsh Wrapper to run cql commands in the cqlsh cli of cassandra 0th node
func Cqlsh(client client.Client, instance kudo.Instance, cql string) (string, error) {
podName, err := firstPodName(instance)
podName, err := FirstPodName(instance)
if err != nil {
return "", err
}
Expand Down Expand Up @@ -249,19 +249,31 @@ func NodeJVMOptions(client client.Client, instance kudo.Instance) (map[string]st
",")
}

func configurationFromNodeLogs(
client client.Client,
instance kudo.Instance,
regex string,
separator string) (map[string]string, error) {
func NodeWasRepaired(client client.Client, instance kudo.Instance) (bool, error) {
return nodeLogsContain(
client,
instance,
"o.a.cassandra.repair.RepairRunnable - Starting repair command",
)
}

func nodeLogs(client client.Client, instance kudo.Instance) ([]byte, error) {
podName := fmt.Sprintf("%s-%s-%d", instance.Name, "node", 0)

pod, err := kubernetes.GetPod(client, podName, instance.Namespace)
if err != nil {
return nil, err
}

logs, err := pod.ContainerLogs("cassandra")
return pod.ContainerLogs("cassandra")
}

func configurationFromNodeLogs(
client client.Client,
instance kudo.Instance,
regex string,
separator string) (map[string]string, error) {
logs, err := nodeLogs(client, instance)
if err != nil {
return nil, err
}
Expand All @@ -288,3 +300,22 @@ func configurationFromNodeLogs(

return configuration, nil
}

func nodeLogsContain(client client.Client, instance kudo.Instance, expected string) (bool, error) {
logs, err := nodeLogs(client, instance)
if err != nil {
return false, err
}

scanner := bufio.NewScanner(bytes.NewReader(logs))

var inLogs bool
for scanner.Scan() {
inLogs = strings.Contains(scanner.Text(), expected)
if inLogs {
break
}
}

return inLogs, nil
}
16 changes: 16 additions & 0 deletions tests/suites/sanity/sanity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,22 @@ var _ = Describe(TestName, func() {
Expect(err).To(BeNil())

assertNumberOfCassandraNodes(NodeCount)

By("Triggering a Cassandra node repair")
podName, err := cassandra.FirstPodName(Operator.Instance)
Expect(err).To(BeNil())

err = Operator.Instance.UpdateParameters(map[string]string{
"REPAIR_POD": podName,
})
Expect(err).To(BeNil())

err = Operator.Instance.WaitForPlanComplete("repair-pod")
Expect(err).To(BeNil())

repair, err := cassandra.NodeWasRepaired(Client, Operator.Instance)
Expect(err).To(BeNil())
Expect(repair).To(BeTrue())
})

It("Uninstalls the operator", func() {
Expand Down

0 comments on commit 535fbd6

Please sign in to comment.