Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify startOneNodeRack behavior with 0 target size StS #438

Merged
merged 4 commits into from
Nov 7, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti
## unreleased

* [CHANGE] [#291](https://github.com/k8ssandra/cass-operator/issues/291) Update Ginkgo to v2 (maintain current features, nothing additional from v2)
* [BUGFIX] [#437](https://github.com/k8ssandra/cass-operator/issues/437) Fix startOneNodeRack to not loop forever in case of StS with size 0 (such as decommission of DC)

## v1.13.0

Expand Down
12 changes: 6 additions & 6 deletions pkg/reconciliation/reconcile_racks.go
Original file line number Diff line number Diff line change
Expand Up @@ -1817,7 +1817,9 @@ func (rc *ReconciliationContext) startOneNodePerRack(endpointData httphelper.Cas
labelSeedBeforeStart := readySeeds == 0 && !rc.hasAdditionalSeeds()

for _, statefulSet := range rc.statefulSets {

if *statefulSet.Spec.Replicas < 1 {
continue
}
podName := getStatefulSetPodNameForIdx(statefulSet, 0)
pod := rc.getDCPodByName(podName)
notReady, err := rc.startNode(pod, labelSeedBeforeStart, endpointData)
Expand Down Expand Up @@ -1868,7 +1870,9 @@ func (rc *ReconciliationContext) hasAdditionalSeeds() bool {
}
additionalSeedEndpoints := 0
if additionalSeedEndpoint, err := rc.GetAdditionalSeedEndpoint(); err == nil {
additionalSeedEndpoints = len(additionalSeedEndpoint.Subsets[0].Addresses)
if len(additionalSeedEndpoint.Subsets) > 0 {
burmanm marked this conversation as resolved.
Show resolved Hide resolved
additionalSeedEndpoints = len(additionalSeedEndpoint.Subsets[0].Addresses)
}
}
return additionalSeedEndpoints > 0
}
Expand All @@ -1884,7 +1888,6 @@ func (rc *ReconciliationContext) startNode(pod *corev1.Pod, labelSeedBeforeStart

// this is the one exception to all seed labelling happening in labelSeedPods()
if labelSeedBeforeStart {

patch := client.MergeFrom(pod.DeepCopy())
pod.Labels[api.SeedNodeLabel] = "true"
if err := rc.Client.Patch(rc.Ctx, pod, patch); err != nil {
Expand All @@ -1893,9 +1896,6 @@ func (rc *ReconciliationContext) startNode(pod *corev1.Pod, labelSeedBeforeStart

rc.Recorder.Eventf(rc.Datacenter, corev1.EventTypeNormal, events.LabeledPodAsSeed,
"Labeled pod a seed node %s", pod.Name)

// sleeping five seconds for DNS paranoia
time.Sleep(5 * time.Second)
}

err := rc.startCassandra(endpointData, pod)
Expand Down
161 changes: 160 additions & 1 deletion pkg/reconciliation/reconcile_racks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ import (
"context"
"fmt"
"io"
"k8s.io/utils/pointer"
"net/http"
"reflect"
"strings"
"testing"
"time"

"k8s.io/utils/pointer"

api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1"
taskapi "github.com/k8ssandra/cass-operator/apis/control/v1alpha1"
"github.com/k8ssandra/cass-operator/pkg/httphelper"
Expand Down Expand Up @@ -1725,6 +1726,15 @@ func TestReconciliationContext_startAllNodes(t *testing.T) {
wantNotReady: true,
wantEvents: []string{"Normal StartingCassandra Starting Cassandra for pod rack3-2"},
},
{
name: "unbalanced racks, part of decommission",
racks: racks{
"rack1": {},
"rack2": {true},
"rack3": {true},
},
wantNotReady: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -1796,3 +1806,152 @@ func TestReconciliationContext_startAllNodes(t *testing.T) {
})
}
}

func TestStartOneNodePerRack(t *testing.T) {
// A boolean representing the state of a pod (started or not).
type pod bool

// racks is a map of rack names to a list of pods in that rack.
type racks map[string][]pod

tests := []struct {
name string
racks racks
wantNotReady bool
seedCount int
}{
{
name: "balanced racks, all nodes started",
racks: racks{
"rack1": {true, true, true},
"rack2": {true, true, true},
"rack3": {true, true, true},
},
wantNotReady: false,
seedCount: 3,
},
{
name: "balanced racks, missing nodes",
racks: racks{
"rack1": {true},
"rack2": {true},
"rack3": {false},
},
wantNotReady: true,
seedCount: 2,
},
{
name: "balanced racks, nothing started",
racks: racks{
"rack1": {false, false, false},
"rack2": {false, false, false},
"rack3": {false, false, false},
},
wantNotReady: true,
seedCount: 0,
},
{
name: "unbalanced racks, part of decommission",
racks: racks{
"rack1": {},
"rack2": {true},
"rack3": {true},
},
wantNotReady: false,
seedCount: 2,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rc, _, _ := setupTest()
for rackName, rackPods := range tt.racks {
sts := &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{Name: rackName},
Spec: appsv1.StatefulSetSpec{Replicas: pointer.Int32(int32(len(rackPods)))},
}
rc.statefulSets = append(rc.statefulSets, sts)
for i, started := range rackPods {
p := &corev1.Pod{}
p.Name = getStatefulSetPodNameForIdx(sts, int32(i))
p.Labels = map[string]string{}
p.Status.ContainerStatuses = []corev1.ContainerStatus{
{
Name: "cassandra",
State: corev1.ContainerState{
Running: &corev1.ContainerStateRunning{
StartedAt: metav1.Time{Time: time.Now().Add(-time.Minute)},
},
},
Ready: bool(started),
},
}
if started {
p.Labels[api.CassNodeState] = stateStarted
} else {
p.Labels[api.CassNodeState] = stateReadyToStart
}
rc.dcPods = append(rc.dcPods, p)
}
}

mockClient := &mocks.Client{}
rc.Client = mockClient

// if tt.wantNotReady {
res := &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader("OK")),
}

mockHttpClient := &mocks.HttpClient{}
mockHttpClient.On("Do",
mock.MatchedBy(
func(req *http.Request) bool {
return req != nil
})).
Return(res, nil).
Once()

client := httphelper.NodeMgmtClient{
Client: mockHttpClient,
Log: rc.ReqLogger,
Protocol: "http",
}

rc.NodeMgmtClient = client

// mock the calls in labelServerPodStarting:
// patch the pod: pod.Labels[api.CassNodeState] = stateStarting
k8sMockClientPatch(mockClient, nil)
// get the status client
k8sMockClientStatus(mockClient, mockClient)
// patch the dc status: dc.Status.LastServerNodeStarted = metav1.Now()
k8sMockClientPatch(mockClient, nil)

k8sMockClientPatch(mockClient, nil)
k8sMockClientPatch(mockClient, nil)
k8sMockClientPatch(mockClient, nil)
k8sMockClientPatch(mockClient, nil)

// 3. GET - return completed task
k8sMockClientGet(rc.Client.(*mocks.Client), nil).
Run(func(args mock.Arguments) {
arg := args.Get(2).(*corev1.Endpoints)
arg.Subsets = []corev1.EndpointSubset{}
// return &corev1.Endpoints{}
}).Once()

k8sMockClientGet(mockClient, nil)
// }

epData := httphelper.CassMetadataEndpoints{
Entity: []httphelper.EndpointState{},
}

gotNotReady, err := rc.startOneNodePerRack(epData, tt.seedCount)

assert.NoError(t, err)
assert.Equalf(t, tt.wantNotReady, gotNotReady, "expected not ready to be %v", tt.wantNotReady)
})
}
}
76 changes: 2 additions & 74 deletions tests/decommission_dc/decommission_dc_suite_test.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
package decommission_dc

/*
Create DC1, wait for it

Create DC2 that starts with additional seeds set to the dc1's seed-service

Verify DC1<->DC2 see each other

Delete DC2, wait for it to complete

Check that DC1 has no longer any DC2 linkage (the DC was correctly decommissioned)

TODO Should we add scale up?
*/
import (
"fmt"
"regexp"
Expand All @@ -32,8 +19,8 @@ var (
namespace = "test-decommission-dc"
dc1Name = "dc1"
dc2Name = "dc2"
dc1Yaml = "../testdata/default-single-rack-2-node-dc1.yaml"
dc2Yaml = "../testdata/default-single-rack-2-node-dc.yaml"
dc1Yaml = "../testdata/default-two-rack-two-node-dc1.yaml"
dc2Yaml = "../testdata/default-two-rack-two-node-dc2.yaml"
dc1Label = fmt.Sprintf("cassandra.datastax.com/datacenter=%s", dc1Name)
dc2Label = fmt.Sprintf("cassandra.datastax.com/datacenter=%s", dc2Name)
seedLabel = "cassandra.datastax.com/seed-node=true"
Expand Down Expand Up @@ -170,65 +157,6 @@ var _ = Describe(testName, func() {
k = kubectl.Get("statefulsets").
FormatOutput(json)
ns.WaitForOutputAndLog(step, k, "[]", 300)

// Get seed-node Pod: "cassandra.datastax.com/seed-node"
// kubectl get pods -l cassandra.datastax.com/seed-node=true --output=jsonpath='{.items[*].status.podIP}'

/*
Parse the datacenter lines and verify there's two

➜ cass-operator git:(decommission_dc) ✗ kubectl exec -i -t -c cassandra cluster2-dc1-r1-sts-0 -- nodetool status
Datacenter: dc1
===============
Status=Up/Down
|/ State=Normal/Leaving/Joining/Moving
-- Address Load Owns (effective) Host ID Token Rack
UN 10.244.2.3 97.09 KiB 100.0% 11e24d4a-a85f-4429-b701-3865c80bf887 3537893417023478360 r1

Datacenter: dc2
===============
Status=Up/Down
|/ State=Normal/Leaving/Joining/Moving
-- Address Load Owns (effective) Host ID Token Rack
UN 10.244.6.3 68.72 KiB 100.0% 008e6938-3934-4ac2-9d03-2cd070f1cf1f -8131327229031358013 r1

➜ cass-operator git:(decommission_dc) ✗

➜ cass-operator git:(decommission_dc) ✗ kubectl exec -i -t -c cassandra cluster2-dc2-r1-sts-0 -- nodetool decommission
nodetool: Unsupported operation: Not enough live nodes to maintain replication factor in keyspace system_distributed (RF = 3, N = 2). Perform a forceful decommission to ignore.
*/
/*
After decommission:
➜ cass-operator git:(decommission_dc) ✗ kubectl exec -i -t -c cassandra cluster2-dc1-r1-sts-0 -- nodetool status
Datacenter: dc1
===============
Status=Up/Down
|/ State=Normal/Leaving/Joining/Moving
-- Address Load Owns (effective) Host ID Token Rack
UN 10.244.2.3 107.68 KiB 100.0% 11e24d4a-a85f-4429-b701-3865c80bf887 3537893417023478360 r1

➜ cass-operator git:(decommission_dc) ✗
*/
/*
Incorrect deletion:

➜ cass-operator git:(decommission_dc) ✗ kubectl exec -i -t -c cassandra cluster2-dc1-r1-sts-0 -- nodetool status
Datacenter: dc1
===============
Status=Up/Down
|/ State=Normal/Leaving/Joining/Moving
-- Address Load Owns (effective) Host ID Token Rack
UN 10.244.2.3 112.79 KiB 100.0% 11e24d4a-a85f-4429-b701-3865c80bf887 3537893417023478360 r1

Datacenter: dc2
===============
Status=Up/Down
|/ State=Normal/Leaving/Joining/Moving
-- Address Load Owns (effective) Host ID Token Rack
DN 10.244.6.6 78.45 KiB 100.0% fcdf813e-5861-4363-86f0-12e894b6e957 5934102370350368596 r1

➜ cass-operator git:(decommission_dc) ✗
*/
})
})
})
5 changes: 4 additions & 1 deletion tests/testdata/default-two-rack-two-node-dc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
spec:
clusterName: cluster1
serverType: cassandra
serverVersion: "4.0.3"
serverVersion: "4.0.6"
managementApiAuth:
insecure: {}
size: 2
Expand All @@ -24,3 +24,6 @@ spec:
jvm-server-options:
initial_heap_size: "512m"
max_heap_size: "512m"
additional-jvm-opts:
- "-Dcassandra.system_distributed_replication_dc_names=dc1"
- "-Dcassandra.system_distributed_replication_per_dc=1"
29 changes: 29 additions & 0 deletions tests/testdata/default-two-rack-two-node-dc2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
ßapiVersion: cassandra.datastax.com/v1beta1
kind: CassandraDatacenter
metadata:
name: dc2
spec:
clusterName: cluster1
serverType: cassandra
serverVersion: "4.0.6"
managementApiAuth:
insecure: {}
size: 2
storageConfig:
cassandraDataVolumeClaimSpec:
storageClassName: standard
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
racks:
- name: r1
- name: r2
config:
jvm-server-options:
initial_heap_size: "512m"
max_heap_size: "512m"
additional-jvm-opts:
- "-Dcassandra.system_distributed_replication_dc_names=dc2"
- "-Dcassandra.system_distributed_replication_per_dc=1"