Skip to content

Commit

Permalink
Extract etcd membership check into a separate phase (#770)
Browse files Browse the repository at this point in the history
Signed-off-by: Kimmo Lehto <[email protected]>
  • Loading branch information
kke authored Oct 22, 2024
1 parent 4c6450d commit 09cfd11
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 49 deletions.
1 change: 1 addition & 0 deletions action/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func NewApply(opts ApplyOptions) *Apply {
&phase.ValidateHosts{},
&phase.GatherK0sFacts{},
&phase.ValidateFacts{SkipDowngradeCheck: opts.DisableDowngradeCheck},
&phase.ValidateEtcdMembers{},

// if UploadBinaries: true
&phase.DownloadBinaries{}, // downloads k0s binaries to local cache
Expand Down
90 changes: 90 additions & 0 deletions phase/validate_etcd_members.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package phase

import (
"fmt"
"slices"

"github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1"
"github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster"
log "github.com/sirupsen/logrus"
)

// ValidateEtcdMembers checks for existing etcd members with the same IP as a new controller
type ValidateEtcdMembers struct {
GenericPhase
hosts cluster.Hosts
}

// Title for the phase
func (p *ValidateEtcdMembers) Title() string {
return "Validate etcd members"
}

// Prepare the phase
func (p *ValidateEtcdMembers) Prepare(config *v1beta1.Cluster) error {
p.Config = config
p.hosts = p.Config.Spec.Hosts.Controllers().Filter(func(h *cluster.Host) bool {
return h.Metadata.K0sRunningVersion == nil // only check new controllers
})

return nil
}

// ShouldRun is true when there are new controllers and etcd
func (p *ValidateEtcdMembers) ShouldRun() bool {
if p.Config.Spec.K0sLeader().Metadata.K0sRunningVersion == nil {
log.Debugf("%s: leader has no k0s running, assuming a fresh cluster", p.Config.Spec.K0sLeader())
return false
}

if p.Config.Spec.K0sLeader().Role == "single" {
log.Debugf("%s: leader is a single node, assuming no etcd", p.Config.Spec.K0sLeader())
return false
}

if len(p.Config.Spec.K0s.Config) > 0 {
storageType := p.Config.Spec.K0s.Config.DigString("spec", "storage", "type")
if storageType != "" && storageType != "etcd" {
log.Debugf("%s: storage type is %q, not k0s managed etcd", p.Config.Spec.K0sLeader(), storageType)
return false
}
}
return len(p.hosts) > 0
}

// Run the phase
func (p *ValidateEtcdMembers) Run() error {
if err := p.validateControllerSwap(); err != nil {
return err
}

return nil
}

func (p *ValidateEtcdMembers) validateControllerSwap() error {
if len(p.Config.Metadata.EtcdMembers) > len(p.Config.Spec.Hosts.Controllers()) {
log.Warnf("there are more etcd members in the cluster than controllers listed in the configuration")
}

for _, h := range p.hosts {
log.Debugf("%s: host is new, checking if etcd members list already contains %s", h, h.PrivateAddress)
if slices.Contains(p.Config.Metadata.EtcdMembers, h.PrivateAddress) {
if Force {
log.Infof("%s: force used, running 'k0s etcd leave' for the host", h)
leader := p.Config.Spec.K0sLeader()
leaveCommand := leader.Configurer.K0sCmdf("etcd leave --peer-address %s", h.PrivateAddress)
err := p.Wet(h, fmt.Sprintf("remove host from etcd using %v", leaveCommand), func() error {
return leader.Exec(leaveCommand)
})
if err != nil {
return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. attempted etcd leave for the address %s but it failed: %w", h, h.PrivateAddress, err)
}
continue
}
return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. check the host and use `k0s etcd leave --peer-address %s on a controller or re-run apply with --force", h, h.PrivateAddress)
}
log.Debugf("%s: no match, assuming its safe to install", h)
}

return nil
}
49 changes: 0 additions & 49 deletions phase/validate_facts.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package phase

import (
"fmt"
"slices"

log "github.com/sirupsen/logrus"
)
Expand All @@ -28,10 +27,6 @@ func (p *ValidateFacts) Run() error {
return err
}

if err := p.validateControllerSwap(); err != nil {
return err
}

return nil
}

Expand Down Expand Up @@ -74,47 +69,3 @@ func (p *ValidateFacts) validateDefaultVersion() error {

return nil
}

func (p *ValidateFacts) validateControllerSwap() error {
log.Debugf("validating controller list vs etcd member list")
if p.Config.Spec.K0sLeader().Metadata.K0sRunningVersion == nil {
log.Debugf("%s: leader has no k0s running, assuming a fresh cluster", p.Config.Spec.K0sLeader())
return nil
}

if p.Config.Spec.K0sLeader().Role == "single" {
log.Debugf("%s: leader is a single node, assuming no etcd", p.Config.Spec.K0sLeader())
return nil
}

if len(p.Config.Metadata.EtcdMembers) > len(p.Config.Spec.Hosts.Controllers()) {
log.Warnf("there are more etcd members in the cluster than controllers listed in the k0sctl configuration")
}

for _, h := range p.Config.Spec.Hosts.Controllers() {
if h.Metadata.K0sRunningVersion != nil {
log.Debugf("%s: host has k0s running, no need to check if it was replaced", h)
continue
}

log.Debugf("%s: host is new, checking if etcd members list already contains %s", h, h.PrivateAddress)
if slices.Contains(p.Config.Metadata.EtcdMembers, h.PrivateAddress) {
if Force {
log.Infof("%s: force used, running 'k0s etcd leave' for the host", h)
leader := p.Config.Spec.K0sLeader()
leaveCommand := leader.Configurer.K0sCmdf("etcd leave --peer-address %s", h.PrivateAddress)
err := p.Wet(h, fmt.Sprintf("remove host from etcd using %v", leaveCommand), func() error {
return leader.Exec(leaveCommand)
})
if err != nil {
return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. attempted etcd leave for the address %s but it failed: %w", h, h.PrivateAddress, err)
}
continue
}
return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. check the host and use `k0s etcd leave --peer-address %s on a controller or re-run apply with --force", h, h.PrivateAddress)
}
log.Debugf("%s: no match, assuming its safe to install", h)
}

return nil
}

0 comments on commit 09cfd11

Please sign in to comment.