From f3ba00577040a6f12eb48c0af5a7b92a9f729737 Mon Sep 17 00:00:00 2001 From: Frank Riley Date: Wed, 15 Apr 2020 17:50:57 -0700 Subject: [PATCH] Fix #7344: Add percentiles to the ping plugin --- plugins/inputs/ping/README.md | 12 +++-- plugins/inputs/ping/ping.go | 77 +++++++++++++++++++++++++++----- plugins/inputs/ping/ping_test.go | 4 ++ 3 files changed, 79 insertions(+), 14 deletions(-) diff --git a/plugins/inputs/ping/README.md b/plugins/inputs/ping/README.md index 91af1b2ae33ed..83a91a2eeb96d 100644 --- a/plugins/inputs/ping/README.md +++ b/plugins/inputs/ping/README.md @@ -57,6 +57,9 @@ native Go by the Telegraf process, eliminating the need to execute the system ## option of the ping command. # interface = "" + ## Percentiles to calculate. This only works with the native method. + # percentiles = [50, 95, 99] + ## Specify the ping executable binary. # binary = "ping" @@ -147,10 +150,11 @@ sockets and the `ping_group_range` setting. - packets_received (integer) - percent_packet_loss (float) - ttl (integer, Not available on Windows) - - average_response_ms (integer) - - minimum_response_ms (integer) - - maximum_response_ms (integer) - - standard_deviation_ms (integer, Available on Windows only with native ping) + - average_response_ms (float) + - minimum_response_ms (float) + - maximum_response_ms (float) + - standard_deviation_ms (float, Available on Windows only with method = "native") + - percentile\_ms (float, Where `` is the percentile specified in `percentiles`. Available with method = "native" only) - errors (float, Windows only) - reply_received (integer, Windows with method = "exec" only) - percent_reply_loss (float, Windows with method = "exec" only) diff --git a/plugins/inputs/ping/ping.go b/plugins/inputs/ping/ping.go index 008cfceacc5b9..da9ab8698e83b 100644 --- a/plugins/inputs/ping/ping.go +++ b/plugins/inputs/ping/ping.go @@ -3,11 +3,13 @@ package ping import ( "context" "errors" + "fmt" "log" "math" "net" "os/exec" "runtime" + "sort" "strings" "sync" "time" @@ -69,6 +71,9 @@ type Ping struct { // listenAddr is the address associated with the interface defined. listenAddr string + + // Calculate the given percentiles when using native method + Percentiles []int } func (*Ping) Description() string { @@ -108,6 +113,9 @@ const sampleConfig = ` ## option of the ping command. # interface = "" + ## Percentiles to calculate. This only works with the native method. + # percentiles = [50, 95, 99] + ## Specify the ping executable binary. # binary = "ping" @@ -345,11 +353,41 @@ finish: log.Printf("D! [inputs.ping] %s", doErr.Error()) } - tags, fields := onFin(packetsSent, rsps, doErr, destination) + tags, fields := onFin(packetsSent, rsps, doErr, destination, p.Percentiles) acc.AddFields("ping", fields, tags) } -func onFin(packetsSent int, resps []*ping.Response, err error, destination string) (map[string]string, map[string]interface{}) { +type durationSlice []time.Duration + +func (p durationSlice) Len() int { return len(p) } +func (p durationSlice) Less(i, j int) bool { return p[i] < p[j] } +func (p durationSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +// R7 from Hyndman and Fan (1996), which matches Excel +func percentile(values durationSlice, perc int) time.Duration { + if perc < 0 { + perc = 0 + } + if perc > 100 { + perc = 100 + } + var percFloat = float64(perc) / 100.0 + + var count = len(values) + var rank = percFloat * float64(count-1) + var rankInteger = int(rank) + var rankFraction = rank - math.Floor(rank) + + if rankInteger >= count-1 { + return values[count-1] + } else { + upper := values[rankInteger+1] + lower := values[rankInteger] + return lower + time.Duration(rankFraction*float64(upper-lower)) + } +} + +func onFin(packetsSent int, resps []*ping.Response, err error, destination string, percentiles []int) (map[string]string, map[string]interface{}) { packetsRcvd := len(resps) tags := map[string]string{"url": destination} @@ -378,17 +416,35 @@ func onFin(packetsSent int, resps []*ping.Response, err error, destination strin ttl := resps[0].TTL var min, max, avg, total time.Duration - min = resps[0].RTT - max = resps[0].RTT - for _, res := range resps { - if res.RTT < min { - min = res.RTT + if len(percentiles) > 0 { + var rtt []time.Duration + for _, resp := range resps { + rtt = append(rtt, resp.RTT) + total += resp.RTT + } + sort.Sort(durationSlice(rtt)) + min = rtt[0] + max = rtt[len(rtt)-1] + + for _, perc := range percentiles { + var value = percentile(durationSlice(rtt), perc) + var field = fmt.Sprintf("percentile%v_ms", perc) + fields[field] = float64(value.Nanoseconds()) / float64(time.Millisecond) } - if res.RTT > max { - max = res.RTT + } else { + min = resps[0].RTT + max = resps[0].RTT + + for _, res := range resps { + if res.RTT < min { + min = res.RTT + } + if res.RTT > max { + max = res.RTT + } + total += res.RTT } - total += res.RTT } avg = total / time.Duration(packetsRcvd) @@ -433,6 +489,7 @@ func init() { Method: "exec", Binary: "ping", Arguments: []string{}, + Percentiles: []int{}, } }) } diff --git a/plugins/inputs/ping/ping_test.go b/plugins/inputs/ping/ping_test.go index 0c8cfb0939daa..e3d725de33253 100644 --- a/plugins/inputs/ping/ping_test.go +++ b/plugins/inputs/ping/ping_test.go @@ -413,11 +413,15 @@ func TestPingGatherNative(t *testing.T) { Method: "native", Count: 5, resolveHost: mockHostResolver, + Percentiles: []int{50, 95, 99}, } assert.NoError(t, acc.GatherError(p.Gather)) assert.True(t, acc.HasPoint("ping", map[string]string{"url": "localhost"}, "packets_transmitted", 5)) assert.True(t, acc.HasPoint("ping", map[string]string{"url": "localhost"}, "packets_received", 5)) + assert.True(t, acc.HasField("ping", "percentile50_ms")) + assert.True(t, acc.HasField("ping", "percentile95_ms")) + assert.True(t, acc.HasField("ping", "percentile99_ms")) } func mockHostResolverError(ctx context.Context, ipv6 bool, host string) (*net.IPAddr, error) {