Skip to content
This repository has been archived by the owner on Jan 12, 2024. It is now read-only.

fix: error messages are no longer attached to indicator from event if dashboard processing fails #687

Merged
merged 7 commits into from
Jan 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions internal/sli/dashboard/dashboard_error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package dashboard

import "fmt"

// ProcessingError represents a base error that happened while processing a dashboard
type ProcessingError struct {
cause error
}

func (pe *ProcessingError) Error() string {
return pe.cause.Error()
}

func (pe *ProcessingError) Unwrap() error {
return pe.cause
}

// QueryError represents an error that happened while querying a dashboard
type QueryError struct {
err *ProcessingError
}

// NewQueryError will create a new QueryError
func NewQueryError(cause error) *QueryError {
return &QueryError{
err: &ProcessingError{
cause: cause,
},
}
}

func (e *QueryError) Error() string {
return fmt.Sprintf("could not query Dynatrace dashboard for SLIs: %v", e.err.Error())
}

func (e *QueryError) Unwrap() error {
return e.err
}

type UploadFileError struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[golint] reported by reviewdog 🐶
exported type UploadFileError should have comment or be unexported

context string
err *ProcessingError
}

// NewUploadFileError will create a new UploadFileError
func NewUploadFileError(context string, cause error) *UploadFileError {
return &UploadFileError{
context: context,
err: &ProcessingError{
cause: cause,
},
}
}

func (e *UploadFileError) Error() string {
return fmt.Sprintf("could not upload %s file: %v", e.context, e.err.Error())
}

func (e *UploadFileError) Unwrap() error {
return e.err
}
100 changes: 42 additions & 58 deletions internal/sli/get_sli_triggered_event_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
)

const ProblemOpenSLI = "problem_open"
const NoMetricIndicator = "no metric"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[golint] reported by reviewdog 🐶
exported const NoMetricIndicator should have comment or be unexported


type GetSLIEventHandler struct {
event GetSLITriggeredAdapterInterface
Expand Down Expand Up @@ -115,9 +116,7 @@ func ensureRightTimestamps(start string, end string) (time.Time, time.Time, erro
return startUnix, endUnix, nil
}

/**
* Adds an SLO Entry to the SLO.yaml
*/
// addSLO adds an SLO Entry to the SLO.yaml
func (eh GetSLIEventHandler) addSLO(newSLO *keptncommon.SLO) error {

// first - lets load the SLO.yaml from the config repo
Expand Down Expand Up @@ -159,45 +158,35 @@ func (eh GetSLIEventHandler) addSLO(newSLO *keptncommon.SLO) error {
return nil
}

// Tries to find a dynatrace dashboard that matches our project. If so - returns the SLI, SLO and SLIResults
// getDataFromDynatraceDashboard will process dynatrace dashboard (if found) and return the SLI, SLO and SLIResults
func (eh *GetSLIEventHandler) getDataFromDynatraceDashboard(startUnix time.Time, endUnix time.Time) (*dashboard.DashboardLink, []*keptnv2.SLIResult, error) {

// creating Dynatrace Retrieval which allows us to call the Dynatrace API
sliQuerying := dashboard.NewQuerying(eh.event, eh.event.GetCustomSLIFilters(), eh.dtClient)

//
// Option 1: We query the data from a dashboard instead of the uploaded SLI.yaml
// ==============================================================================
// Lets see if we have a Dashboard in Dynatrace that we should parse
result, err := sliQuerying.GetSLIValues(eh.dashboard, startUnix, endUnix)
if err != nil {
return nil, nil, fmt.Errorf("could not query Dynatrace dashboard for SLIs: %v", err)
return nil, nil, dashboard.NewQueryError(err)
}

// lets write the SLI to the config repo
// let's write the SLI to the config repo
if result.HasSLIs() {
err = eh.resourceClient.UploadSLI(eh.event.GetProject(), eh.event.GetStage(), eh.event.GetService(), result.SLI())
if err != nil {
return nil, nil, err
return nil, nil, dashboard.NewUploadFileError("SLI", err)
}
}

// lets write the SLO to the config repo
// let's write the SLO to the config repo
if result.HasSLOs() {
err = eh.resourceClient.UploadSLOs(eh.event.GetProject(), eh.event.GetStage(), eh.event.GetService(), result.SLO())
if err != nil {
return nil, nil, err
return nil, nil, dashboard.NewUploadFileError("SLO", err)
}
}

return result.DashboardLink(), result.SLIResults(), nil
}

/**
* getDynatraceProblemContext
*
* Will evaluate the event and - if it finds a dynatrace problem ID - will return this - otherwise it will return 0
*/
//getDynatraceProblemContext will evaluate the event and - returns dynatrace problem ID if found, 0 otherwise
func getDynatraceProblemContext(eventData GetSLITriggeredAdapterInterface) string {

// iterate through the labels and find Problem URL
Expand All @@ -220,7 +209,6 @@ func getDynatraceProblemContext(eventData GetSLITriggeredAdapterInterface) strin
return ""
}

//
func (eh *GetSLIEventHandler) getSLIResultsFromCustomQueries(startUnix time.Time, endUnix time.Time) ([]*keptnv2.SLIResult, error) {
// get custom metrics for project if they exist
projectCustomQueries, err := eh.kClient.GetCustomQueries(eh.event.GetProject(), eh.event.GetStage(), eh.event.GetService())
Expand Down Expand Up @@ -275,7 +263,7 @@ func (eh *GetSLIEventHandler) getSLIResultsFromProblemContext(problemID string)
success := false
message := ""

// lets query the status of this problem and add it to the SLI Result
// let's query the status of this problem and add it to the SLI Result
status, err := dynatrace.NewProblemsV2Client(eh.dtClient).GetStatusByID(problemID)
if err != nil {
message = err.Error()
Expand All @@ -288,15 +276,16 @@ func (eh *GetSLIEventHandler) getSLIResultsFromProblemContext(problemID string)
}
}

// lets add this to the sliResults
// let's add this to the sliResults
sliResult := &keptnv2.SLIResult{
Metric: problemIndicator,
Value: openProblemValue,
Success: success,
Message: message,
}

// lets add this to the SLO in case this indicator is not yet in SLO.yaml. Becuase if it doesnt get added the lighthouse wont evaluate the SLI values
// let's add this to the SLO in case this indicator is not yet in SLO.yaml.
// Because if it does not get added the lighthouse will not evaluate the SLI values
// we default it to open_problems<=0
sloString := fmt.Sprintf("sli=%s;pass=<=0;key=true", problemIndicator)
sloDefinition := common.ParsePassAndWarningWithoutDefaultsFrom(sloString)
Expand All @@ -310,10 +299,7 @@ func (eh *GetSLIEventHandler) getSLIResultsFromProblemContext(problemID string)
return sliResult
}

// retrieveMetrics Handles keptn.InternalGetSLIEventType
//
// First tries to find a Dynatrace dashboard and then parses it for SLIs and SLOs
// Second will go to parse the SLI.yaml and returns the SLI as passed in by the event
// retrieveMetrics will retrieve metrics either from a dashboard or from an SLI file
func (eh *GetSLIEventHandler) retrieveMetrics() error {
// send get-sli.started event
if err := eh.sendGetSLIStartedEvent(); err != nil {
Expand Down Expand Up @@ -379,51 +365,49 @@ func (eh *GetSLIEventHandler) retrieveMetrics() error {
return eh.sendGetSLIFinishedEvent(sliResults, err)
}

/**
* Sends the SLI Done Event. If err != nil it will send an error message
*/
func (eh *GetSLIEventHandler) sendGetSLIFinishedEvent(indicatorValues []*keptnv2.SLIResult, err error) error {
// if an error was set - the indicators will be set to failed and error message is set to each
indicatorValues = resetIndicatorsInCaseOfError(err, eh.event, indicatorValues)
// sendGetSLIFinishedEvent sends the SLI finished event. If err != nil it will send an error message
func (eh *GetSLIEventHandler) sendGetSLIFinishedEvent(sliResults []*keptnv2.SLIResult, err error) error {
// if an error was set - the SLI results will be set to failed and an error message is set to each
sliResults = resetSLIResultsInCaseOfError(err, eh.event, sliResults)

return eh.sendEvent(NewSucceededGetSLIFinishedEventFactory(eh.event, indicatorValues, err))
return eh.sendEvent(NewSucceededGetSLIFinishedEventFactory(eh.event, sliResults, err))
}

func resetIndicatorsInCaseOfError(err error, eventData GetSLITriggeredAdapterInterface, indicatorValues []*keptnv2.SLIResult) []*keptnv2.SLIResult {
if err != nil {
indicators := eventData.GetIndicators()
if (indicatorValues == nil) || (len(indicatorValues) == 0) {
if indicators == nil || len(indicators) == 0 {
indicators = []string{"no metric"}
}
func resetSLIResultsInCaseOfError(err error, eventData GetSLITriggeredAdapterInterface, sliResults []*keptnv2.SLIResult) []*keptnv2.SLIResult {
if err == nil {
return sliResults
}

for _, indicatorName := range indicators {
indicatorValues = []*keptnv2.SLIResult{
{
Metric: indicatorName,
Value: 0.0,
},
}
}
indicators := eventData.GetIndicators()
if len(sliResults) == 0 {
var errType *dashboard.QueryError
if len(indicators) == 0 || errors.As(err, &errType) {
indicators = []string{NoMetricIndicator}
}

errMessage := err.Error()
for _, indicator := range indicatorValues {
indicator.Success = false
indicator.Message = errMessage
for _, indicatorName := range indicators {
sliResults = []*keptnv2.SLIResult{
{
Metric: indicatorName,
Value: 0.0,
},
}
}
}

return indicatorValues
errMessage := err.Error()
for _, sliResult := range sliResults {
sliResult.Success = false
sliResult.Message = errMessage
}

return sliResults
}

func (eh *GetSLIEventHandler) sendGetSLIStartedEvent() error {
return eh.sendEvent(NewGetSliStartedEventFactory(eh.event))
}

/**
* sends cloud event back to keptn
*/
func (eh *GetSLIEventHandler) sendEvent(factory adapter.CloudEventFactoryInterface) error {
err := eh.kClient.SendCloudEvent(factory)
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package sli

import (
"net/http"
"strconv"
"testing"

keptnv2 "github.com/keptn/go-utils/pkg/lib/v0_2_0"
"github.com/stretchr/testify/assert"

"github.com/keptn-contrib/dynatrace-service/internal/dynatrace"
"github.com/keptn-contrib/dynatrace-service/internal/keptn"
"github.com/keptn-contrib/dynatrace-service/internal/test"
)

// Retrieving a dashboard by an invalid ID returns an error
//
// prerequisites:
// * we use
// * an invalid dashboard ID and Dynatrace API returns a 400 error, or
// * a valid, but not found dashboard ID and Dynatrace API returns a 404
// * the event can have multiple indicators or none. (There is an SLO file in Keptn and the SLO files may contain indicators)
//
// We do not want to see the error attached to any indicator coming from SLO files, but attached to a "no metric" indicator
func TestThatInvalidDashboardIDProducesErrorMessageInNoMetricIndicatorEvenIfThereAreIndicators(t *testing.T) {

type definition struct {
errorCode int
errorMessage string
dashboardID string
payload string
}

invalidID := definition{
errorCode: 400,
errorMessage: "Constraints violated",
dashboardID: "some-invalid-dashboard-id",
payload: "./testdata/sli_via_dashboard_test/dashboard_invalid_uuid_400.json",
}
idNotFound := definition{
errorCode: 404,
errorMessage: "not found",
dashboardID: testDashboardID,
payload: "./testdata/sli_via_dashboard_test/dashboard_id_not_found_404.json",
}

testConfigs := []struct {
name string
eventIndicators []string
def definition
}{
{
name: "no indicators defined in event (SLO file) will produce single SLI result with name 'no metric'",
eventIndicators: []string{},
def: invalidID,
},
{
name: "one indicator defined in event (SLO file) will produce single SLI result with name 'no metric'",
eventIndicators: []string{"single-indicator-from-slo-file"},
def: invalidID,
},
{
name: "multiple indicators defined in event (SLO file) will produce single SLI result with name 'no metric'",
eventIndicators: []string{"first-indicator-from-slo-file", "second-indicator-from-slo-file", "third-indicator-from-slo-file"},
def: invalidID,
},
{
name: "no indicators defined in event (SLO file) will produce single SLI result with name 'no metric'",
eventIndicators: []string{},
def: idNotFound,
},
{
name: "one indicator defined in event (SLO file) will produce single SLI result with name 'no metric'",
eventIndicators: []string{"single-indicator-from-slo-file"},
def: idNotFound,
},
{
name: "multiple indicators defined in event (SLO file) will produce single SLI result with name 'no metric'",
eventIndicators: []string{"first-indicator-from-slo-file", "second-indicator-from-slo-file", "third-indicator-from-slo-file"},
def: idNotFound,
},
}

for _, testConfig := range testConfigs {
tc := testConfig
t.Run(tc.name, func(t *testing.T) {
testEvent := &getSLIEventData{
project: "sockshop",
stage: "staging",
service: "carts",
indicators: tc.eventIndicators,
sliStart: "", // use defaults here
sliEnd: "", // use defaults here
}

handler := test.NewFileBasedURLHandler(t)
handler.AddExactError(dynatrace.DashboardsPath+"/"+tc.def.dashboardID, tc.def.errorCode, tc.def.payload)

// sli and slo upload works
rClient := &uploadErrorResourceClientMock{t: t}

getSLIFinishedEventAssertionsFunc := func(t *testing.T, actual *keptnv2.GetSLIFinishedEventData) {
assert.EqualValues(t, keptnv2.ResultFailed, actual.Result)
assert.Contains(t, actual.Message, tc.def.dashboardID)
assert.Contains(t, actual.Message, strconv.Itoa(tc.def.errorCode))
assert.Contains(t, actual.Message, tc.def.errorMessage)
}

runAndAssertDashboardTest(t, testEvent, handler, rClient, tc.def.dashboardID, getSLIFinishedEventAssertionsFunc, createFailedSLIResultAssertionsFunc(NoMetricIndicator))
})
}
}

func runAndAssertDashboardTest(t *testing.T, getSLIEventData *getSLIEventData, handler http.Handler, rClient keptn.ResourceClientInterface, dashboardID string, getSLIFinishedEventAssertionsFunc func(t *testing.T, actual *keptnv2.GetSLIFinishedEventData), sliResultAssertionsFuncs ...func(t *testing.T, actual *keptnv2.SLIResult)) {

// we do not need custom queries, as we are using the dashboard
kClient := &keptnClientMock{}

runTestAndAssertNoError(t, getSLIEventData, handler, kClient, rClient, dashboardID)

assertCorrectGetSLIEvents(t, kClient.eventSink, getSLIFinishedEventAssertionsFunc, sliResultAssertionsFuncs...)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"error": {
"code": 404,
"message": "Dashboard e03f4be0-4712-4f12-96ee-8c486d001e9c not found"
}
}
Loading