Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CPU system plugin that get stuck after suspend #3342

Merged
merged 1 commit into from
Oct 16, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
- [#3136](https://github.com/influxdata/telegraf/issues/3136): Fix webhooks input address in use during reload.
- [#3258](https://github.com/influxdata/telegraf/issues/3258): Unlock Statsd when stopping to prevent deadlock.
- [#3319](https://github.com/influxdata/telegraf/issues/3319): Fix cloudwatch output requires unneeded permissions.
- [#3342](https://github.com/influxdata/telegraf/pull/3342): Fix CPU input plugin stuck after suspend on Linux.

## v1.4.3 [unreleased]

Expand Down
5 changes: 3 additions & 2 deletions plugins/inputs/system/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ func (s *CPUStats) Gather(acc telegraf.Accumulator) error {
totalDelta := total - lastTotal

if totalDelta < 0 {
return fmt.Errorf("Error: current total CPU time is less than previous total CPU time")
err = fmt.Errorf("Error: current total CPU time is less than previous total CPU time")
break
}

if totalDelta == 0 {
Expand Down Expand Up @@ -126,7 +127,7 @@ func (s *CPUStats) Gather(acc telegraf.Accumulator) error {
s.lastStats[cts.CPU] = cts
}

return nil
return err
}

func totalCpuTime(t cpu.TimesStat) float64 {
Expand Down
69 changes: 69 additions & 0 deletions plugins/inputs/system/cpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,72 @@ func TestCPUCountIncrease(t *testing.T) {
err = cs.Gather(&acc)
require.NoError(t, err)
}

// TestCPUTimesDecrease tests that telegraf continue to works after
// CPU times decrease, which seems to occur when Linux system is suspended.
func TestCPUTimesDecrease(t *testing.T) {
var mps MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator

cts := cpu.TimesStat{
CPU: "cpu0",
User: 18,
Idle: 80,
Iowait: 2,
}

cts2 := cpu.TimesStat{
CPU: "cpu0",
User: 38, // increased by 20
Idle: 40, // decreased by 40
Iowait: 1, // decreased by 1
}

cts3 := cpu.TimesStat{
CPU: "cpu0",
User: 56, // increased by 18
Idle: 120, // increased by 80
Iowait: 3, // increased by 2
}

mps.On("CPUTimes").Return([]cpu.TimesStat{cts}, nil)

cs := NewCPUStats(&mps)

cputags := map[string]string{
"cpu": "cpu0",
}

err := cs.Gather(&acc)
require.NoError(t, err)

// Computed values are checked with delta > 0 becasue of floating point arithmatic
// imprecision
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 18, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 80, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 2, 0, cputags)

mps2 := MockPS{}
mps2.On("CPUTimes").Return([]cpu.TimesStat{cts2}, nil)
cs.ps = &mps2

// CPU times decreased. An error should be raised
err = cs.Gather(&acc)
require.Error(t, err)

mps3 := MockPS{}
mps3.On("CPUTimes").Return([]cpu.TimesStat{cts3}, nil)
cs.ps = &mps3

err = cs.Gather(&acc)
require.NoError(t, err)

assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 56, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 120, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 3, 0, cputags)

assertContainsTaggedFloat(t, &acc, "cpu", "usage_user", 18, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_idle", 80, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_iowait", 2, 0.0005, cputags)
}