From b81617d727646ddde3ac12c03e801582cb9a1587 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Fri, 14 Oct 2022 11:35:23 +0800 Subject: [PATCH 1/6] feat: support the failure/success threadhold for probe --- cmd/easeprobe/probe.go | 5 ++- conf/conf.go | 5 ++- docs/Manual.md | 6 ++- global/global.go | 2 + global/probe.go | 17 ++++++++ global/probe_test.go | 62 ++++++++++++++++++++++++++ probe/base/base.go | 77 ++++++++++++++++++++++---------- probe/base/base_test.go | 62 ++++++++++++++++++++++++++ probe/data_test.go | 10 +++-- probe/result.go | 14 +++--- probe/result_test.go | 19 +++++--- probe/status.go | 15 +++++++ probe/status_counter.go | 85 ++++++++++++++++++++++++++++++++++++ probe/status_counter_test.go | 55 +++++++++++++++++++++++ resources/config.yaml | 6 +++ 15 files changed, 397 insertions(+), 43 deletions(-) create mode 100644 probe/status_counter.go create mode 100644 probe/status_counter_test.go diff --git a/cmd/easeprobe/probe.go b/cmd/easeprobe/probe.go index ec6b1828..9f386ab2 100644 --- a/cmd/easeprobe/probe.go +++ b/cmd/easeprobe/probe.go @@ -30,8 +30,9 @@ import ( func configProbers(probers []probe.Prober) []probe.Prober { gProbeConf := global.ProbeSettings{ - Interval: conf.Get().Settings.Probe.Interval, - Timeout: conf.Get().Settings.Probe.Timeout, + Interval: conf.Get().Settings.Probe.Interval, + Timeout: conf.Get().Settings.Probe.Timeout, + StatusChangeThresholdSettings: conf.Get().Settings.Probe.StatusChangeThresholdSettings, } log.Debugf("Global Probe Configuration: %+v", gProbeConf) diff --git a/conf/conf.go b/conf/conf.go index 4a50689e..40c9ee77 100644 --- a/conf/conf.go +++ b/conf/conf.go @@ -94,8 +94,9 @@ type Notify struct { // Probe is the settings of prober type Probe struct { - Interval time.Duration `yaml:"interval" json:"interval,omitempty" jsonschema:"type=string,format=duration,title=Probe Interval,description=the interval of probe,default=1m"` - Timeout time.Duration `yaml:"timeout" json:"timeout,omitempty" jsonschema:"type=string,format=duration,title=Probe Timeout,description=the timeout of probe,default=30s"` + Interval time.Duration `yaml:"interval" json:"interval,omitempty" jsonschema:"type=string,format=duration,title=Probe Interval,description=the interval of probe,default=1m"` + Timeout time.Duration `yaml:"timeout" json:"timeout,omitempty" jsonschema:"type=string,format=duration,title=Probe Timeout,description=the timeout of probe,default=30s"` + global.StatusChangeThresholdSettings `yaml:",inline" json:",inline"` } // SLAReport is the settings for SLA report diff --git a/docs/Manual.md b/docs/Manual.md index 34a633fe..3964af88 100644 --- a/docs/Manual.md +++ b/docs/Manual.md @@ -627,7 +627,8 @@ The following example configurations illustrate the EaseProbe supported features - `timeout` - the maximum time to wait for the probe to complete. default: `30s`. - `interval` - the interval time to run the probe. default: `1m`. - +- `failure` - number of continuously failed probes needed to determine the status down, default: 1 +- `success` - number of continuously successful probes needed to determine the status down, default: 1 ## 7.1 HTTP Probe Configuration @@ -1308,6 +1309,9 @@ settings: probe: timeout: 30s # the time out for all probes interval: 1m # probe every minute for all probes + failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 + success: 1 # number of continuously successful probes needed to determine the status down, default: 1 + # easeprobe program running log file. log: diff --git a/global/global.go b/global/global.go index 1475b75d..bbfabc0b 100644 --- a/global/global.go +++ b/global/global.go @@ -64,6 +64,8 @@ const ( DefaultTimeOut = time.Second * 30 // DefaultChannelName is the default wide channel name DefaultChannelName = "__EaseProbe_Channel__" + // DefaultStatusChangeThresholdSetting is the threshold of status change + DefaultStatusChangeThresholdSetting = 1 ) const ( diff --git a/global/probe.go b/global/probe.go index ca2d9b20..f3e6abfd 100644 --- a/global/probe.go +++ b/global/probe.go @@ -19,10 +19,19 @@ package global import "time" +// StatusChangeThresholdSettings is the settings for probe threshold +type StatusChangeThresholdSettings struct { + // the failures threshold such as 2, 5 + Failure int `yaml:"failure,omitempty" json:"failure,omitempty" jsonschema:"description=the failures threshold to change the status such as 3, 5,default=1"` + // the success threshold such as 2, 5 + Success int `yaml:"success,omitempty" json:"success,omitempty" jsonschema:"description=the success threshold to change the status such as 3, 5,default=1"` +} + // ProbeSettings is the global probe setting type ProbeSettings struct { Interval time.Duration Timeout time.Duration + StatusChangeThresholdSettings } // NormalizeTimeOut return a normalized timeout value @@ -34,3 +43,11 @@ func (p *ProbeSettings) NormalizeTimeOut(t time.Duration) time.Duration { func (p *ProbeSettings) NormalizeInterval(t time.Duration) time.Duration { return normalize(p.Interval, t, 0, DefaultProbeInterval) } + +// NormalizeThreshold return a normalized threshold value +func (p *ProbeSettings) NormalizeThreshold(t StatusChangeThresholdSettings) StatusChangeThresholdSettings { + return StatusChangeThresholdSettings{ + Failure: normalize(p.Failure, t.Failure, 0, DefaultStatusChangeThresholdSetting), + Success: normalize(p.Success, t.Success, 0, DefaultStatusChangeThresholdSetting), + } +} diff --git a/global/probe_test.go b/global/probe_test.go index 38d72d0f..67f68bce 100644 --- a/global/probe_test.go +++ b/global/probe_test.go @@ -46,5 +46,67 @@ func TestProbe(t *testing.T) { p.Interval = 20 r = p.NormalizeInterval(0) assert.Equal(t, time.Duration(20), r) +} + +func TestStatusChangeThresholdSettings(t *testing.T) { + p := ProbeSettings{} + + r := p.NormalizeThreshold(StatusChangeThresholdSettings{}) + assert.Equal(t, StatusChangeThresholdSettings{ + Failure: DefaultStatusChangeThresholdSetting, + Success: DefaultStatusChangeThresholdSetting, + }, r) + + p.Failure = 2 + p.Success = 3 + + r = p.NormalizeThreshold(StatusChangeThresholdSettings{ + Failure: 1, + }) + assert.Equal(t, StatusChangeThresholdSettings{ + Failure: 1, + Success: 3, + }, r) + + r = p.NormalizeThreshold(StatusChangeThresholdSettings{ + Success: 2, + }) + assert.Equal(t, StatusChangeThresholdSettings{ + Failure: 2, + Success: 2, + }, r) + + r = p.NormalizeThreshold(StatusChangeThresholdSettings{ + Failure: 5, + Success: 6, + }) + assert.Equal(t, StatusChangeThresholdSettings{ + Failure: 5, + Success: 6, + }, r) + + r = p.NormalizeThreshold(StatusChangeThresholdSettings{ + Failure: 0, + }) + assert.Equal(t, StatusChangeThresholdSettings{ + Failure: 2, + Success: 3, + }, r) + + r = p.NormalizeThreshold(StatusChangeThresholdSettings{ + Success: -1, + }) + assert.Equal(t, StatusChangeThresholdSettings{ + Failure: 2, + Success: 3, + }, r) + p.Failure = -1 + r = p.NormalizeThreshold(StatusChangeThresholdSettings{ + Failure: 0, + }) + assert.Equal(t, StatusChangeThresholdSettings{ + Failure: 1, + Success: 3, + }, r) } diff --git a/probe/base/base.go b/probe/base/base.go index bcd4b0fe..4b5d66de 100644 --- a/probe/base/base.go +++ b/probe/base/base.go @@ -44,15 +44,16 @@ type ProbeFuncType func() (bool, string) // DefaultProbe is the default options for all probe type DefaultProbe struct { - ProbeKind string `yaml:"-" json:"-"` - ProbeTag string `yaml:"-" json:"-"` - ProbeName string `yaml:"name" json:"name" jsonschema:"required,title=Probe Name,description=the name of probe must be unique"` - ProbeChannels []string `yaml:"channels" json:"channels,omitempty" jsonschema:"title=Probe Channels,description=the channels of probe message need to send to"` - ProbeTimeout time.Duration `yaml:"timeout,omitempty" json:"timeout,omitempty" jsonschema:"type=string,format=duration,title=Probe Timeout,description=the timeout of probe"` - ProbeTimeInterval time.Duration `yaml:"interval,omitempty" json:"interval,omitempty" jsonschema:"type=string,format=duration,title=Probe Interval,description=the interval of probe"` - ProbeFunc ProbeFuncType `yaml:"-" json:"-"` - ProbeResult *probe.Result `yaml:"-" json:"-"` - metrics *metrics `yaml:"-" json:"-"` + ProbeKind string `yaml:"-" json:"-"` + ProbeTag string `yaml:"-" json:"-"` + ProbeName string `yaml:"name" json:"name" jsonschema:"required,title=Probe Name,description=the name of probe must be unique"` + ProbeChannels []string `yaml:"channels" json:"channels,omitempty" jsonschema:"title=Probe Channels,description=the channels of probe message need to send to"` + ProbeTimeout time.Duration `yaml:"timeout,omitempty" json:"timeout,omitempty" jsonschema:"type=string,format=duration,title=Probe Timeout,description=the timeout of probe"` + ProbeTimeInterval time.Duration `yaml:"interval,omitempty" json:"interval,omitempty" jsonschema:"type=string,format=duration,title=Probe Interval,description=the interval of probe"` + global.StatusChangeThresholdSettings `yaml:",inline" json:",inline"` + ProbeFunc ProbeFuncType `yaml:"-" json:"-"` + ProbeResult *probe.Result `yaml:"-" json:"-"` + metrics *metrics `yaml:"-" json:"-"` } // Kind return the probe kind @@ -85,6 +86,35 @@ func (d *DefaultProbe) Result() *probe.Result { return d.ProbeResult } +// LogTitle return the log title +func (d *DefaultProbe) LogTitle() string { + if len(d.ProbeTag) > 0 { + return fmt.Sprintf("[%s / %s / %s]", d.ProbeKind, d.ProbeTag, d.ProbeName) + } + return fmt.Sprintf("[%s / %s]", d.ProbeKind, d.ProbeName) +} + +// CheckStatusThreshold check the status threshold +func (d *DefaultProbe) CheckStatusThreshold() probe.Status { + s := d.StatusChangeThresholdSettings + c := d.ProbeResult.Stat.StatusCounter + title := d.LogTitle() + log.Debugf(" %s - Status Threshold Checking - Current[%v], StatusCnt[%d], FailureThread[%d], SuccessThread[%d]", + title, c.CurrentStatus, c.StatusCount, s.Failure, s.Success) + + if c.CurrentStatus == true && c.StatusCount >= s.Success { + log.Infof("%s - Status is UP! Meet the Success Threshold [%d/%d]", title, c.StatusCount, s.Success) + return probe.StatusUp + } + if c.CurrentStatus == false && c.StatusCount >= s.Failure { + log.Infof("%s - Status is DOWN! Meet the Failure Threshold [%d/%d]", title, c.StatusCount, s.Failure) + return probe.StatusDown + } + log.Infof("%s - Keep the Status as %s! Not meet the Threshold - Current[%v], StatusCnt[%d], FailureThread[%d], SuccessThread[%d]", + title, d.ProbeResult.PreStatus, c.CurrentStatus, c.StatusCount, s.Failure, s.Success) + return d.ProbeResult.PreStatus +} + // Config default config func (d *DefaultProbe) Config(gConf global.ProbeSettings, kind, tag, name, endpoint string, fn ProbeFuncType) error { @@ -96,20 +126,25 @@ func (d *DefaultProbe) Config(gConf global.ProbeSettings, d.ProbeTimeout = gConf.NormalizeTimeOut(d.ProbeTimeout) d.ProbeTimeInterval = gConf.NormalizeInterval(d.ProbeTimeInterval) + d.StatusChangeThresholdSettings = gConf.NormalizeThreshold(d.StatusChangeThresholdSettings) d.ProbeResult = probe.NewResultWithName(name) d.ProbeResult.Name = name d.ProbeResult.Endpoint = endpoint + // Set the new length of the status counter + maxLen := d.StatusChangeThresholdSettings.Failure + if d.StatusChangeThresholdSettings.Success > maxLen { + maxLen = d.StatusChangeThresholdSettings.Success + } + d.ProbeResult.Stat.StatusCounter.SetMaxLen(maxLen) + + // if there no channels, use the default channel if len(d.ProbeChannels) == 0 { d.ProbeChannels = append(d.ProbeChannels, global.DefaultChannelName) } - if len(d.ProbeTag) > 0 { - log.Infof("Probe [%s / %s] - [%s] base options are configured!", d.ProbeKind, d.ProbeTag, d.ProbeName) - } else { - log.Infof("Probe [%s] - [%s] base options are configured!", d.ProbeKind, d.ProbeName) - } + log.Infof("Probe %s base options are configured!", d.LogTitle()) d.metrics = newMetrics(kind, tag) @@ -130,21 +165,19 @@ func (d *DefaultProbe) Probe() probe.Result { d.ProbeResult.RoundTripTime = time.Since(now) - status := probe.StatusUp - title := "Success" - if stat != true { - status = probe.StatusDown - title = "Error" - } + // check the status threshold + d.ProbeResult.Stat.StatusCounter.AppendStatus(stat, msg) + status := d.CheckStatusThreshold() + title := status.Title() if len(d.ProbeTag) > 0 { d.ProbeResult.Message = fmt.Sprintf("%s (%s/%s): %s", title, d.ProbeKind, d.ProbeTag, msg) - log.Debugf("[%s / %s / %s] - %s", d.ProbeKind, d.ProbeTag, d.ProbeName, msg) } else { d.ProbeResult.Message = fmt.Sprintf("%s (%s): %s", title, d.ProbeKind, msg) - log.Debugf("[%s / %s] - %s", d.ProbeKind, d.ProbeName, msg) } + log.Debugf("%s - %s", d.LogTitle(), msg) + d.ProbeResult.PreStatus = d.ProbeResult.Status d.ProbeResult.Status = status diff --git a/probe/base/base_test.go b/probe/base/base_test.go index 3d32cd47..f860c3fb 100644 --- a/probe/base/base_test.go +++ b/probe/base/base_test.go @@ -18,6 +18,7 @@ package base import ( + "fmt" "math/rand" "net" "os" @@ -147,3 +148,64 @@ func TestProxyConnection(t *testing.T) { monkey.UnpatchAll() } + +func TestStatusThreshold(t *testing.T) { + p := newDummyProber("probe") + p.StatusChangeThresholdSettings = global.StatusChangeThresholdSettings{ + Failure: 2, + Success: 3, + } + p.Config(global.ProbeSettings{ + StatusChangeThresholdSettings: global.StatusChangeThresholdSettings{ + Failure: 2, + Success: 1, + }, + }) + assert.Equal(t, 3, p.ProbeResult.Stat.MaxLen) + assert.Equal(t, 2, p.StatusChangeThresholdSettings.Failure) + assert.Equal(t, 3, p.StatusChangeThresholdSettings.Success) + + p.ProbeResult.Status = probe.StatusInit + + cnt := 0 + p.ProbeFunc = func() (bool, string) { + cnt++ + return true, fmt.Sprintf("success - %d", cnt) + } + + n := p.ProbeResult.Stat.MaxLen + 2 + for i := 1; i <= n; i++ { + p.Probe() + if i < p.StatusChangeThresholdSettings.Success { + + assert.Equal(t, probe.StatusInit, p.Result().Status) + } else { + assert.Equal(t, probe.StatusUp, p.Result().Status) + } + if i < p.ProbeResult.Stat.MaxLen { + assert.Equal(t, i, p.ProbeResult.Stat.StatusCount) + } else { + assert.Equal(t, p.ProbeResult.Stat.MaxLen, p.ProbeResult.Stat.StatusCount) + } + } + + cnt = 0 + p.ProbeFunc = func() (bool, string) { + cnt++ + return false, fmt.Sprintf("failure - %d", cnt) + } + + for i := 1; i <= n; i++ { + p.Probe() + if i < p.StatusChangeThresholdSettings.Failure { + assert.Equal(t, probe.StatusUp, p.Result().Status) + } else { + assert.Equal(t, probe.StatusDown, p.Result().Status) + } + if i < p.ProbeResult.Stat.MaxLen { + assert.Equal(t, i, p.ProbeResult.Stat.StatusCount) + } else { + assert.Equal(t, p.ProbeResult.Stat.MaxLen, p.ProbeResult.Stat.StatusCount) + } + } +} diff --git a/probe/data_test.go b/probe/data_test.go index e4b5f729..551c0bee 100644 --- a/probe/data_test.go +++ b/probe/data_test.go @@ -51,8 +51,9 @@ var testResults = []Result{ StatusUp: 70, StatusDown: 30, }, - UpTime: 70 * time.Minute, - DownTime: 30 * time.Minute, + UpTime: 70 * time.Minute, + DownTime: 30 * time.Minute, + StatusCounter: *NewStatusCounter(1), }, }, { @@ -74,8 +75,9 @@ var testResults = []Result{ StatusUp: 270, StatusDown: 30, }, - UpTime: 270 * time.Minute, - DownTime: 30 * time.Minute, + UpTime: 270 * time.Minute, + DownTime: 30 * time.Minute, + StatusCounter: *NewStatusCounter(2), }, }, } diff --git a/probe/result.go b/probe/result.go index a01270e3..390bacac 100644 --- a/probe/result.go +++ b/probe/result.go @@ -22,6 +22,7 @@ import ( "fmt" "time" + "github.com/megaease/easeprobe/global" log "github.com/sirupsen/logrus" ) @@ -32,6 +33,7 @@ type Stat struct { Status map[Status]int64 `json:"status" yaml:"status"` UpTime time.Duration `json:"uptime" yaml:"uptime"` DownTime time.Duration `json:"downtime" yaml:"downtime"` + StatusCounter } // Result is the status of health check @@ -63,11 +65,12 @@ func NewResult() *Result { LatestDownTime: time.Time{}, RecoveryDuration: 0, Stat: Stat{ - Since: time.Now().UTC(), - Total: 0, - Status: map[Status]int64{}, - UpTime: 0, - DownTime: 0, + Since: time.Now().UTC(), + Total: 0, + Status: map[Status]int64{}, + UpTime: 0, + DownTime: 0, + StatusCounter: *NewStatusCounter(global.DefaultStatusChangeThresholdSetting), }, } } @@ -112,6 +115,7 @@ func (s *Stat) Clone() Stat { } dst.UpTime = s.UpTime dst.DownTime = s.DownTime + dst.StatusCounter = s.StatusCounter.Clone() return dst } diff --git a/probe/result_test.go b/probe/result_test.go index 0a8e8d8d..6870f011 100644 --- a/probe/result_test.go +++ b/probe/result_test.go @@ -47,11 +47,12 @@ func CreateTestResult() *Result { LatestDownTime: now.Add(-20 * time.Hour), RecoveryDuration: 5 * time.Minute, Stat: Stat{ - Since: now, - Total: 1000, - Status: m, - UpTime: 50 * time.Second, - DownTime: 10 * time.Second, + Since: now, + Total: 1000, + Status: m, + UpTime: 50 * time.Second, + DownTime: 10 * time.Second, + StatusCounter: *NewStatusCounter(2), }, } return r @@ -180,7 +181,7 @@ func TestDebug(t *testing.T) { up := fmt.Sprintf("%d", StatusUp) down := fmt.Sprintf("%d", StatusDown) - expected := `{"name":"Test Name","endpoint":"http://example.com","time":"2022-01-01T00:00:00Z","timestamp":1640995200,"rtt":30000000000,"status":"up","prestatus":"down","message":"This is a test message","latestdowntime":"2021-12-31T04:00:00Z","recoverytime":300000000000,"stat":{"since":"2022-01-01T00:00:00Z","total":1001,"status":{"` + up + `":51,"` + down + `":10},"uptime":1850000000000,"downtime":10000000000}}` + expected := `{"name":"Test Name","endpoint":"http://example.com","time":"2022-01-01T00:00:00Z","timestamp":1640995200,"rtt":30000000000,"status":"up","prestatus":"down","message":"This is a test message","latestdowntime":"2021-12-31T04:00:00Z","recoverytime":300000000000,"stat":{"since":"2022-01-01T00:00:00Z","total":1001,"status":{"1":51,"2":10},"uptime":1850000000000,"downtime":10000000000,"StatusHistory":[],"MaxLen":1,"CurrentStatus":true,"StatusCount":0}}` if r.DebugJSON() != expected { t.Errorf("%s != %s", r.DebugJSON(), expected) } @@ -204,7 +205,11 @@ func TestDebug(t *testing.T) { "` + down + `": 10 }, "uptime": 1850000000000, - "downtime": 10000000000 + "downtime": 10000000000, + "StatusHistory": [], + "MaxLen": 1, + "CurrentStatus": true, + "StatusCount": 0 } }` diff --git a/probe/status.go b/probe/status.go index 7b319b3d..a5e2725c 100644 --- a/probe/status.go +++ b/probe/status.go @@ -36,6 +36,13 @@ const ( ) var ( + toTitle = map[Status]string{ + StatusInit: "Initialization", + StatusUp: "Success", + StatusDown: "Error", + StatusUnknown: "Unknown", + StatusBad: "Bad", + } toString = map[Status]string{ StatusInit: "init", StatusUp: "up", @@ -55,6 +62,14 @@ var ( } ) +// Title convert the Status to title +func (s Status) Title() string { + if val, ok := toTitle[s]; ok { + return val + } + return "Unknown" +} + // String convert the Status to string func (s Status) String() string { if val, ok := toString[s]; ok { diff --git a/probe/status_counter.go b/probe/status_counter.go new file mode 100644 index 00000000..cd6a0966 --- /dev/null +++ b/probe/status_counter.go @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022, MegaEase + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package probe + +// StatusHistory is a history of status +type StatusHistory struct { + Status bool + Message string +} + +// StatusCounter is the object to count the status +type StatusCounter struct { + StatusHistory []StatusHistory // the status history + MaxLen int // the max length of the status history + CurrentStatus bool // the current status + StatusCount int // the count of the same status +} + +// NewStatusCounter return a StatusCounter object +func NewStatusCounter(maxLen int) *StatusCounter { + threshold := &StatusCounter{ + StatusHistory: make([]StatusHistory, 0), + MaxLen: maxLen, + CurrentStatus: true, + StatusCount: 0, + } + return threshold +} + +// AppendStatus appends the status +func (s *StatusCounter) AppendStatus(status bool, message string) { + + if status != s.CurrentStatus { // status change, reset the status count + s.StatusCount = 0 + s.CurrentStatus = status + } + if s.StatusCount < s.MaxLen { // count the status if it is less than the max length + s.StatusCount++ + } + + h := StatusHistory{ + Status: status, + Message: message, + } + // append the status + s.StatusHistory = append(s.StatusHistory, h) + + // pop up the first element + if len(s.StatusHistory) > s.MaxLen { + s.StatusHistory = s.StatusHistory[1:] + } +} + +// SetMaxLen sets the max length of the status history +func (s *StatusCounter) SetMaxLen(maxLen int) { + s.MaxLen = maxLen + if len(s.StatusHistory) > s.MaxLen { + s.StatusHistory = s.StatusHistory[len(s.StatusHistory)-s.MaxLen:] + } +} + +// Clone returns a copy of the StatusThreshold +func (s *StatusCounter) Clone() StatusCounter { + return StatusCounter{ + StatusHistory: s.StatusHistory, + MaxLen: s.MaxLen, + CurrentStatus: s.CurrentStatus, + StatusCount: s.StatusCount, + } +} diff --git a/probe/status_counter_test.go b/probe/status_counter_test.go new file mode 100644 index 00000000..a3cefe72 --- /dev/null +++ b/probe/status_counter_test.go @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022, MegaEase + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package probe + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewStatusCounter(t *testing.T) { + const Len = 3 + s := NewStatusCounter(Len) + assert.Equal(t, Len, s.MaxLen) + assert.True(t, s.CurrentStatus) + + for i := 1; i <= Len+2; i++ { + s.AppendStatus(false, "failure") + assert.False(t, s.CurrentStatus) + if i <= Len { + assert.Equal(t, i, s.StatusCount) + } else { + assert.Equal(t, Len, s.StatusCount) + } + } + + for i := 1; i <= Len+2; i++ { + s.AppendStatus(true, "success") + assert.True(t, s.CurrentStatus) + if i <= Len { + assert.Equal(t, i, s.StatusCount) + } else { + assert.Equal(t, Len, s.StatusCount) + } + } + + s1 := s.Clone() + assert.True(t, reflect.DeepEqual(s, &s1)) +} diff --git a/resources/config.yaml b/resources/config.yaml index a489c7c9..3e61c3b3 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -42,6 +42,8 @@ # expression: "x_time('//feed/updated') > '2022-07-01'" # the expression to evaluate. # # configuration # timeout: 10s # default is 30 seconds +# failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 +# success: 1 # number of continuously successful probes needed to determine the status down , default: 1 http: # http probes - name: EaseProbe Github @@ -57,6 +59,8 @@ http: # http probes # interval: 2m # default is 60 seconds # proxy: socks5://proxy.server:1080 # Optional. Only support socks5. # # Also support the `ALL_PROXY` environment. +# failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 +# success: 1 # number of continuously successful probes needed to determine the status down, default: 1 # --------------------- Shell Probe Configuration --------------------- # @@ -386,6 +390,8 @@ notify: # probe: # timeout: 30s # the time out for all probes # interval: 1m # probe every minute for all probes +# failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 +# success: 1 # number of continuously successful probes needed to determine the status down, default: 1 # # easeprobe program running log file. # log: From 5ea911dc03d3e1420489bf70dc4d4798e607d542 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Fri, 14 Oct 2022 11:52:12 +0800 Subject: [PATCH 2/6] add more unit test --- probe/status_counter_test.go | 4 ++++ probe/status_test.go | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/probe/status_counter_test.go b/probe/status_counter_test.go index a3cefe72..26ef56e2 100644 --- a/probe/status_counter_test.go +++ b/probe/status_counter_test.go @@ -52,4 +52,8 @@ func TestNewStatusCounter(t *testing.T) { s1 := s.Clone() assert.True(t, reflect.DeepEqual(s, &s1)) + + s1.SetMaxLen(2) + assert.Equal(t, 2, s1.MaxLen) + assert.Equal(t, 2, len(s1.StatusHistory)) } diff --git a/probe/status_test.go b/probe/status_test.go index 452db654..de593bc9 100644 --- a/probe/status_test.go +++ b/probe/status_test.go @@ -106,3 +106,23 @@ func TestStatus(t *testing.T) { err = yaml.Unmarshal([]byte{1, 2}, &s) assert.NotNil(t, err) } + +func TestStatusTitle(t *testing.T) { + s := StatusInit + assert.Equal(t, "Initialization", s.Title()) + + s = StatusUp + assert.Equal(t, "Success", s.Title()) + + s = StatusDown + assert.Equal(t, "Error", s.Title()) + + s = StatusUnknown + assert.Equal(t, "Unknown", s.Title()) + + s = StatusBad + assert.Equal(t, "Bad", s.Title()) + + s = -1 + assert.Equal(t, "Unknown", s.Title()) +} From 2cd00bb5fba770961b2c1f73dd377a7121de3a37 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Fri, 14 Oct 2022 12:02:21 +0800 Subject: [PATCH 3/6] fix the typo --- docs/Manual.md | 4 ++-- resources/config.yaml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/Manual.md b/docs/Manual.md index 3964af88..35c7b0b0 100644 --- a/docs/Manual.md +++ b/docs/Manual.md @@ -628,7 +628,7 @@ The following example configurations illustrate the EaseProbe supported features - `timeout` - the maximum time to wait for the probe to complete. default: `30s`. - `interval` - the interval time to run the probe. default: `1m`. - `failure` - number of continuously failed probes needed to determine the status down, default: 1 -- `success` - number of continuously successful probes needed to determine the status down, default: 1 +- `success` - number of continuously successful probes needed to determine the status up, default: 1 ## 7.1 HTTP Probe Configuration @@ -1310,7 +1310,7 @@ settings: timeout: 30s # the time out for all probes interval: 1m # probe every minute for all probes failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 - success: 1 # number of continuously successful probes needed to determine the status down, default: 1 + success: 1 # number of continuously successful probes needed to determine the status up, default: 1 # easeprobe program running log file. diff --git a/resources/config.yaml b/resources/config.yaml index 3e61c3b3..efdd146c 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -43,7 +43,7 @@ # # configuration # timeout: 10s # default is 30 seconds # failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 -# success: 1 # number of continuously successful probes needed to determine the status down , default: 1 +# success: 1 # number of continuously successful probes needed to determine the status up , default: 1 http: # http probes - name: EaseProbe Github @@ -60,7 +60,7 @@ http: # http probes # proxy: socks5://proxy.server:1080 # Optional. Only support socks5. # # Also support the `ALL_PROXY` environment. # failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 -# success: 1 # number of continuously successful probes needed to determine the status down, default: 1 +# success: 1 # number of continuously successful probes needed to determine the status up, default: 1 # --------------------- Shell Probe Configuration --------------------- # @@ -391,7 +391,7 @@ notify: # timeout: 30s # the time out for all probes # interval: 1m # probe every minute for all probes # failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 -# success: 1 # number of continuously successful probes needed to determine the status down, default: 1 +# success: 1 # number of continuously successful probes needed to determine the status up, default: 1 # # easeprobe program running log file. # log: From c3f4cddc0182c6dd27d31e556960cd39bda188b9 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Fri, 14 Oct 2022 12:04:26 +0800 Subject: [PATCH 4/6] fix the jsonschema comma problem --- global/probe.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/global/probe.go b/global/probe.go index f3e6abfd..0dba2926 100644 --- a/global/probe.go +++ b/global/probe.go @@ -22,9 +22,9 @@ import "time" // StatusChangeThresholdSettings is the settings for probe threshold type StatusChangeThresholdSettings struct { // the failures threshold such as 2, 5 - Failure int `yaml:"failure,omitempty" json:"failure,omitempty" jsonschema:"description=the failures threshold to change the status such as 3, 5,default=1"` + Failure int `yaml:"failure,omitempty" json:"failure,omitempty" jsonschema:"title=Failure Threshold,description=the failures threshold to change the status such as 3,default=1"` // the success threshold such as 2, 5 - Success int `yaml:"success,omitempty" json:"success,omitempty" jsonschema:"description=the success threshold to change the status such as 3, 5,default=1"` + Success int `yaml:"success,omitempty" json:"success,omitempty" jsonschema:"title=Success Threshold,description=the success threshold to change the status such as 2,default=1"` } // ProbeSettings is the global probe setting From fc6d6b830cb81f4358c862b40391122854d4b96a Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Fri, 14 Oct 2022 19:28:48 +0800 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Pantelis Roditis --- docs/Manual.md | 8 ++++---- resources/config.yaml | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/Manual.md b/docs/Manual.md index 35c7b0b0..1e63882f 100644 --- a/docs/Manual.md +++ b/docs/Manual.md @@ -627,8 +627,8 @@ The following example configurations illustrate the EaseProbe supported features - `timeout` - the maximum time to wait for the probe to complete. default: `30s`. - `interval` - the interval time to run the probe. default: `1m`. -- `failure` - number of continuously failed probes needed to determine the status down, default: 1 -- `success` - number of continuously successful probes needed to determine the status up, default: 1 +- `failure` - number of consecutive failed probes needed to determine the status down, default: 1 +- `success` - number of consecutive successful probes needed to determine the status up, default: 1 ## 7.1 HTTP Probe Configuration @@ -1309,8 +1309,8 @@ settings: probe: timeout: 30s # the time out for all probes interval: 1m # probe every minute for all probes - failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 - success: 1 # number of continuously successful probes needed to determine the status up, default: 1 + failure: 2 # number of consecutive failed probes needed to determine the status down, default: 1 + success: 1 # number of consecutive successful probes needed to determine the status up, default: 1 # easeprobe program running log file. diff --git a/resources/config.yaml b/resources/config.yaml index efdd146c..cdfacd21 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -42,8 +42,8 @@ # expression: "x_time('//feed/updated') > '2022-07-01'" # the expression to evaluate. # # configuration # timeout: 10s # default is 30 seconds -# failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 -# success: 1 # number of continuously successful probes needed to determine the status up , default: 1 +# failure: 2 # number of consecutive failed probes needed to determine the status down, default: 1 +# success: 1 # number of consecutive successful probes needed to determine the status up , default: 1 http: # http probes - name: EaseProbe Github @@ -59,8 +59,8 @@ http: # http probes # interval: 2m # default is 60 seconds # proxy: socks5://proxy.server:1080 # Optional. Only support socks5. # # Also support the `ALL_PROXY` environment. -# failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 -# success: 1 # number of continuously successful probes needed to determine the status up, default: 1 +# failure: 2 # number of consecutive failed probes needed to determine the status down, default: 1 +# success: 1 # number of consecutive successful probes needed to determine the status up, default: 1 # --------------------- Shell Probe Configuration --------------------- # @@ -390,8 +390,8 @@ notify: # probe: # timeout: 30s # the time out for all probes # interval: 1m # probe every minute for all probes -# failure: 2 # number of continuously failed probes needed to determine the status down, default: 1 -# success: 1 # number of continuously successful probes needed to determine the status up, default: 1 +# failure: 2 # number of consecutive failed probes needed to determine the status down, default: 1 +# success: 1 # number of consecutive successful probes needed to determine the status up, default: 1 # # easeprobe program running log file. # log: From 85a89ad663fcffd708c9c690db8d734a1af24144 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Mon, 17 Oct 2022 20:09:11 +0800 Subject: [PATCH 6/6] rewording the log message --- probe/base/base.go | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/probe/base/base.go b/probe/base/base.go index 4b5d66de..7bcd1871 100644 --- a/probe/base/base.go +++ b/probe/base/base.go @@ -20,6 +20,7 @@ package base import ( "fmt" + "math" "net" "net/url" "os" @@ -103,15 +104,26 @@ func (d *DefaultProbe) CheckStatusThreshold() probe.Status { title, c.CurrentStatus, c.StatusCount, s.Failure, s.Success) if c.CurrentStatus == true && c.StatusCount >= s.Success { - log.Infof("%s - Status is UP! Meet the Success Threshold [%d/%d]", title, c.StatusCount, s.Success) + if d.ProbeResult.Status != probe.StatusUp { + cnt := math.Max(float64(c.StatusCount), float64(s.Success)) + log.Infof("%s - Status is UP! Threshold reached for success [%d/%d]", title, int(cnt), s.Success) + } return probe.StatusUp } if c.CurrentStatus == false && c.StatusCount >= s.Failure { - log.Infof("%s - Status is DOWN! Meet the Failure Threshold [%d/%d]", title, c.StatusCount, s.Failure) + if d.ProbeResult.Status != probe.StatusDown { + cnt := math.Max(float64(c.StatusCount), float64(s.Failure)) + log.Infof("%s - Status is DOWN! Threshold reached for failure [%d/%d]", title, int(cnt), s.Failure) + } return probe.StatusDown } - log.Infof("%s - Keep the Status as %s! Not meet the Threshold - Current[%v], StatusCnt[%d], FailureThread[%d], SuccessThread[%d]", - title, d.ProbeResult.PreStatus, c.CurrentStatus, c.StatusCount, s.Failure, s.Success) + if c.CurrentStatus == true { + log.Infof("%s - Status unchanged [%s]! Threshold is not reached for success [%d/%d].", + title, d.ProbeResult.PreStatus, c.StatusCount, s.Success) + } else { + log.Infof("%s - Status unchanged [%s]! Threshold is not reached for failure [%d/%d].", + title, d.ProbeResult.PreStatus, c.StatusCount, s.Failure) + } return d.ProbeResult.PreStatus } @@ -146,6 +158,10 @@ func (d *DefaultProbe) Config(gConf global.ProbeSettings, log.Infof("Probe %s base options are configured!", d.LogTitle()) + if d.Failure > 1 || d.Success > 1 { + log.Infof("Probe %s Status Threshold are configured! failure[%d], success[%d]", d.LogTitle(), d.Failure, d.Success) + } + d.metrics = newMetrics(kind, tag) return nil