Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug when event was sent after maintenance even if state didn't change #87

Merged
merged 5 commits into from
Jul 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions checker/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func (triggerChecker *TriggerChecker) checkTimeSeries(timeSeries *target.TimeSer
return
}
for _, currentState := range metricStates {
lastState, err = triggerChecker.compareStates(timeSeries.Name, currentState, lastState)
lastState, err = triggerChecker.compareMetricStates(timeSeries.Name, currentState, lastState)
if err != nil {
return
}
Expand All @@ -130,7 +130,7 @@ func (triggerChecker *TriggerChecker) checkTimeSeries(timeSeries *target.TimeSer
return
}
if noDataState != nil {
lastState, err = triggerChecker.compareStates(timeSeries.Name, *noDataState, lastState)
lastState, err = triggerChecker.compareMetricStates(timeSeries.Name, *noDataState, lastState)
}
return
}
Expand Down Expand Up @@ -167,7 +167,7 @@ func (triggerChecker *TriggerChecker) handleErrorCheck(checkData moira.CheckData
triggerChecker.Logger.Errorf("Trigger %s check failed: %s", triggerChecker.TriggerID, checkingError.Error())
checkData.State = EXCEPTION
}
return triggerChecker.compareChecks(checkData)
return triggerChecker.compareTriggerStates(checkData)
}

func (triggerChecker *TriggerChecker) checkForNoData(timeSeries *target.TimeSeries, metricLastState moira.MetricState) (bool, *moira.MetricState) {
Expand Down
63 changes: 50 additions & 13 deletions checker/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,21 @@ package checker

import (
"fmt"
"github.com/moira-alert/moira"
"time"

"github.com/moira-alert/moira"
)

var badStateReminder = map[string]int64{
ERROR: 86400,
NODATA: 86400,
}

func (triggerChecker *TriggerChecker) compareChecks(currentCheck moira.CheckData) (moira.CheckData, error) {
func (triggerChecker *TriggerChecker) compareTriggerStates(currentCheck moira.CheckData) (moira.CheckData, error) {
currentStateValue := currentCheck.State
lastStateValue := triggerChecker.lastCheck.State
lastStateSuppressed := triggerChecker.lastCheck.Suppressed
lastStateSuppressedValue := triggerChecker.lastCheck.SuppressedState
timestamp := currentCheck.Timestamp

if triggerChecker.lastCheck.EventTimestamp != 0 {
Expand All @@ -22,18 +25,30 @@ func (triggerChecker *TriggerChecker) compareChecks(currentCheck moira.CheckData
currentCheck.EventTimestamp = timestamp
}

needSend, message := needSendEvent(currentStateValue, lastStateValue, timestamp, triggerChecker.lastCheck.GetEventTimestamp(), triggerChecker.lastCheck.Suppressed)
if lastStateSuppressed && lastStateSuppressedValue == "" {
lastStateSuppressedValue = lastStateValue
}

currentCheck.SuppressedState = lastStateSuppressedValue

needSend, message := needSendEvent(currentStateValue, lastStateValue, timestamp, triggerChecker.lastCheck.GetEventTimestamp(), lastStateSuppressed, lastStateSuppressedValue)
if !needSend {
return currentCheck, nil
}
if message == nil {
message = &currentCheck.Message
}

eventOldState := lastStateValue
if lastStateSuppressed {
eventOldState = lastStateSuppressedValue
}

event := moira.NotificationEvent{
IsTriggerEvent: true,
TriggerID: triggerChecker.TriggerID,
State: currentStateValue,
OldState: lastStateValue,
OldState: eventOldState,
Timestamp: timestamp,
Metric: triggerChecker.trigger.Name,
Message: message,
Expand All @@ -44,29 +59,45 @@ func (triggerChecker *TriggerChecker) compareChecks(currentCheck moira.CheckData

if triggerChecker.isTriggerSuppressed(&event, timestamp, 0, "") {
currentCheck.Suppressed = true
if !lastStateSuppressed {
currentCheck.SuppressedState = lastStateValue
}
return currentCheck, nil
}

currentCheck.SuppressedState = ""
triggerChecker.Logger.Infof("Writing new event: %v", event)
err := triggerChecker.Database.PushNotificationEvent(&event, true)
return currentCheck, err
}

func (triggerChecker *TriggerChecker) compareStates(metric string, currentState moira.MetricState, lastState moira.MetricState) (moira.MetricState, error) {
func (triggerChecker *TriggerChecker) compareMetricStates(metric string, currentState moira.MetricState, lastState moira.MetricState) (moira.MetricState, error) {
if lastState.EventTimestamp != 0 {
currentState.EventTimestamp = lastState.EventTimestamp
} else {
currentState.EventTimestamp = currentState.Timestamp
}

needSend, message := needSendEvent(currentState.State, lastState.State, currentState.Timestamp, lastState.GetEventTimestamp(), lastState.Suppressed)
if lastState.Suppressed && lastState.SuppressedState == "" {
lastState.SuppressedState = lastState.State
}

currentState.SuppressedState = lastState.SuppressedState

needSend, message := needSendEvent(currentState.State, lastState.State, currentState.Timestamp, lastState.GetEventTimestamp(), lastState.Suppressed, lastState.SuppressedState)
if !needSend {
return currentState, nil
}

eventOldState := lastState.State
if lastState.Suppressed {
eventOldState = lastState.SuppressedState
}

event := moira.NotificationEvent{
TriggerID: triggerChecker.TriggerID,
State: currentState.State,
OldState: lastState.State,
OldState: eventOldState,
Timestamp: currentState.Timestamp,
Metric: metric,
Message: message,
Expand All @@ -78,8 +109,13 @@ func (triggerChecker *TriggerChecker) compareStates(metric string, currentState

if triggerChecker.isTriggerSuppressed(&event, currentState.Timestamp, currentState.Maintenance, metric) {
currentState.Suppressed = true
if !lastState.Suppressed {
currentState.SuppressedState = lastState.State
}
return currentState, nil
}

currentState.SuppressedState = ""
triggerChecker.Logger.Infof("Writing new event: %v", event)
err := triggerChecker.Database.PushNotificationEvent(&event, true)
return currentState, err
Expand All @@ -97,19 +133,20 @@ func (triggerChecker *TriggerChecker) isTriggerSuppressed(event *moira.Notificat
return false
}

func needSendEvent(currentStateValue string, lastStateValue string, currentStateTimestamp int64, lastStateEventTimestamp int64, isLastStateSuppressed bool) (needSend bool, message *string) {
if currentStateValue != lastStateValue {
func needSendEvent(currentStateValue string, lastStateValue string, currentStateTimestamp int64, lastStateEventTimestamp int64, isLastCheckSuppressed bool, lastStateSuppressedValue string) (needSend bool, message *string) {
if !isLastCheckSuppressed && currentStateValue != lastStateValue {
return true, nil
}
if isLastCheckSuppressed && currentStateValue != lastStateSuppressedValue {
message := "This metric changed its state during maintenance interval."
return true, &message
}
remindInterval, ok := badStateReminder[currentStateValue]
if ok && needRemindAgain(currentStateTimestamp, lastStateEventTimestamp, remindInterval) {
message := fmt.Sprintf("This metric has been in bad state for more than %v hours - please, fix.", remindInterval/3600)
return true, &message
}
if !isLastStateSuppressed || currentStateValue == OK {
return false, nil
}
return true, nil
return false, nil
}

func needRemindAgain(currentStateTimestamp, lastStateEventTimestamp, remindInterval int64) bool {
Expand Down
Loading