From 2ac198c5f770973412c9a8195f11928f0f500952 Mon Sep 17 00:00:00 2001 From: Madhav Bhargava Date: Fri, 16 Jun 2023 16:12:19 +0530 Subject: [PATCH] added histogram metrics to capture invocation duration of provide APIs changed the name of the API request duration metric added metric to capture overall duration for driver api calls reverted back to using service label for provider api metric added cause to status.Status added WrapError function corrected docstrings corrected leftover docstring Update pkg/util/provider/metrics/metrics.go Co-authored-by: Himanshu Sharma <79965161+himanshu-kun@users.noreply.github.com> Apply suggestions from code review Co-authored-by: Himanshu Sharma <79965161+himanshu-kun@users.noreply.github.com> addressed review comment - removed populating cause when in FromError when err is already status.Status --- .../provider/machinecodes/status/status.go | 30 +++++++++++++++++-- pkg/util/provider/metrics/metrics.go | 17 ++++++++++- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/pkg/util/provider/machinecodes/status/status.go b/pkg/util/provider/machinecodes/status/status.go index 00058af3c..33cbda2d1 100644 --- a/pkg/util/provider/machinecodes/status/status.go +++ b/pkg/util/provider/machinecodes/status/status.go @@ -48,6 +48,8 @@ type Status struct { // [google.rpc.Status.details][google.rpc.Status.details] field, or localized // by the client. message string + // cause captures the underlying error + cause error } // Code returns the status code contained in status. @@ -63,7 +65,15 @@ func (s *Status) Message() string { return s.message } +// Cause returns the underlying error if captured. +func (s *Status) Cause() error { + return s.cause +} + // Error returns the error message for the status. +// WARNING: There is an unwritten contract for anyone using status.Status. One MUST never change +// the message text. It expects error code to be in the first square brackets and error message in the next. Therefore, +// any change made here should never change that. Any square brackets added after code and error are ignored when parsing. func (s *Status) Error() string { return fmt.Sprintf("machine codes error: code = [%s] message = [%s]", s.Code(), s.Message()) } @@ -78,6 +88,15 @@ func Error(c codes.Code, msg string) error { return New(c, msg) } +// WrapError creates an instance of status.Status wrapping the underlying cause along with the code and custom error message. +func WrapError(c codes.Code, msg string, cause error) *Status { + return &Status{ + code: int32(c), + message: msg, + cause: cause, + } +} + // FromError returns a Status representing err if it was produced from this // package or has a method `GRPCStatus() *Status`. Otherwise, ok is false and a // Status is returned with codes.Unknown and the original error message. @@ -88,10 +107,17 @@ func FromError(err error) (s *Status, ok bool) { if matches, errInFind := findInString(err.Error()); errInFind == nil { code := codes.StringToCode(matches[0]) - return New(code, matches[1]), true + return &Status{ + code: int32(code), + message: matches[1], + }, true } - return New(codes.Unknown, err.Error()), false + return &Status{ + code: int32(codes.Unknown), + message: err.Error(), + cause: err, + }, false } // findInString need to check if this logic can be optimized diff --git a/pkg/util/provider/metrics/metrics.go b/pkg/util/provider/metrics/metrics.go index 6e73e6724..7cec7cc0f 100644 --- a/pkg/util/provider/metrics/metrics.go +++ b/pkg/util/provider/metrics/metrics.go @@ -80,6 +80,20 @@ var ( Help: "Number of Failed Cloud Service API requests, partitioned by provider, and service.", }, []string{"provider", "service"}, ) + + APIRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: cloudAPISubsystem, + Name: "api_request_duration_seconds", + Help: "Time(in seconds) it takes for a provider API request to complete", + }, []string{"provider", "service"}) + + DriverAPIRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: cloudAPISubsystem, + Name: "driver_request_duration_seconds", + Help: "Total time (in seconds) taken for a driver request to complete", + }, []string{"provider", "operation"}) ) // variables for subsystem: misc @@ -103,6 +117,7 @@ func registerMachineSubsystemMetrics() { func registerCloudAPISubsystemMetrics() { prometheus.MustRegister(APIRequestCount) prometheus.MustRegister(APIFailedRequestCount) + prometheus.MustRegister(APIRequestDuration) } func registerMiscellaneousMetrics() { @@ -113,4 +128,4 @@ func init() { registerMachineSubsystemMetrics() registerCloudAPISubsystemMetrics() registerMiscellaneousMetrics() -} \ No newline at end of file +}