Skip to content
This repository has been archived by the owner on Jan 30, 2020. It is now read-only.

Commit

Permalink
fleetctl: periodically check for systemd states using waitForState
Browse files Browse the repository at this point in the history
Although checkSystemdActiveState() doesn't have to depend on
cAPI.UnitStates(), it's still impossible to remove additional sleep
in case of error from assertSystemdActiveState(). That's actually a
known issue. Sometimes it simply takes much time until fleetctl became
able to get valid unit states. Adding additional sleeps or tuning the
sleep time wouldn't be a good approach, as an optimal sleep interval
could vary a lot case by case.

To gracefully handle this case, let's do similar checking as done in
functional tests.

* Introduce a new helper waitForState(), just like util.WaitForState()
  from functional tests.
* Squash assertFetchSystemdActiveState() into assertSystemdActiveState()
  to make it retry the assertion periodically up to defaultSleepTime.
* Increase defaultSleepTime from 500 to 2000 msec.
* Remove the additional sleep call as well as cAPI.UnitState() call.
  • Loading branch information
Dongsu Park committed Nov 9, 2016
1 parent 8aa001f commit 57443ca
Showing 1 changed file with 44 additions and 21 deletions.
65 changes: 44 additions & 21 deletions fleetctl/fleetctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ recommended to upgrade fleetctl to prevent incompatibility issues.
clientDriverEtcd = "etcd"

defaultEndpoint = "unix:///var/run/fleet.sock"
defaultSleepTime = 500 * time.Millisecond
defaultSleepTime = 2000 * time.Millisecond
)

var (
Expand Down Expand Up @@ -1116,7 +1116,7 @@ func checkSystemdActiveState(name string, maxAttempts int, wg *sync.WaitGroup, e
}

for attempt := 0; attempt < maxAttempts; attempt++ {
if err := assertFetchSystemdActiveState(name); err == nil {
if err := assertSystemdActiveState(name); err == nil {
return
} else {
errchan <- err
Expand All @@ -1128,29 +1128,29 @@ func checkSystemdActiveState(name string, maxAttempts int, wg *sync.WaitGroup, e
}
}

func assertFetchSystemdActiveState(name string) error {
if err := assertSystemdActiveState(name); err == nil {
return nil
}
time.Sleep(defaultSleepTime)
if _, err := cAPI.UnitState(name); err != nil {
return err
}
return nil
}

// assertSystemdActiveState determines if a given systemd unit is actually
// in the active state, making use of cAPI. It returns true if ActiveState
// of the given unit is active and LoadState of the given unit is loaded.
// in the active state, making use of cAPI.
// It repeatedly checks up to defaultSleepTimeout. If ActiveState of the given
// unit is active and LoadState of the given unit is loaded.
// If it cannot get the expected states within the period, return error.
func assertSystemdActiveState(unitName string) error {
us, err := cAPI.UnitState(unitName)
if err != nil {
return fmt.Errorf("Error retrieving list of units: %v", err)
fetchSystemdActiveState := func() error {
us, err := cAPI.UnitState(unitName)
if err != nil {
return fmt.Errorf("Error getting unit state of %s: %v", unitName, err)
}

// Get systemd state and check the state is active & loaded.
if us.SystemdActiveState != "active" || us.SystemdLoadState != "loaded" {
return fmt.Errorf("Failed to find an active unit %s", unitName)
}
return nil
}

// Get systemd state and check the state is active & loaded.
if us.SystemdActiveState != "active" || us.SystemdLoadState != "loaded" {
return fmt.Errorf("Failed to find an active unit %s", unitName)
timeout, err := waitForState(fetchSystemdActiveState)
if err != nil {
return fmt.Errorf("Failed to find an active unit %s within %v, err: %v",
unitName, timeout, err)
}

return nil
Expand Down Expand Up @@ -1221,3 +1221,26 @@ func runWrapper(cf func(cCmd *cobra.Command, args []string) (exit int)) func(cCm
cmdExitCode = cf(cCmd, args)
}
}

// waitForState is a generic helper for repeatedly checking the status.
// It gets a generic function stateCheckFunc() to be checked, which returns
// nil on success, error otherwise. In case of failure, waitForState
// retries to run stateCheckFunc, once in 250 msec, up to defaultSleepTime.
func waitForState(stateCheckFunc func() error) (time.Duration, error) {
timeout := defaultSleepTime
alarm := time.After(timeout)
ticker := time.Tick(250 * time.Millisecond)

for {
select {
case <-alarm:
return timeout, fmt.Errorf("Failed to fetch systemd active states within %v", timeout)
case <-ticker:
err := stateCheckFunc()
if err == nil {
return timeout, nil
}
log.Debug("Retrying assertion of systemd active states. err: %v", err)
}
}
}

0 comments on commit 57443ca

Please sign in to comment.