diff --git a/pkg/util/provider/machinecodes/status/status.go b/pkg/util/provider/machinecodes/status/status.go index 00058af3c..33cbda2d1 100644 --- a/pkg/util/provider/machinecodes/status/status.go +++ b/pkg/util/provider/machinecodes/status/status.go @@ -48,6 +48,8 @@ type Status struct { // [google.rpc.Status.details][google.rpc.Status.details] field, or localized // by the client. message string + // cause captures the underlying error + cause error } // Code returns the status code contained in status. @@ -63,7 +65,15 @@ func (s *Status) Message() string { return s.message } +// Cause returns the underlying error if captured. +func (s *Status) Cause() error { + return s.cause +} + // Error returns the error message for the status. +// WARNING: There is an unwritten contract for anyone using status.Status. One MUST never change +// the message text. It expects error code to be in the first square brackets and error message in the next. Therefore, +// any change made here should never change that. Any square brackets added after code and error are ignored when parsing. func (s *Status) Error() string { return fmt.Sprintf("machine codes error: code = [%s] message = [%s]", s.Code(), s.Message()) } @@ -78,6 +88,15 @@ func Error(c codes.Code, msg string) error { return New(c, msg) } +// WrapError creates an instance of status.Status wrapping the underlying cause along with the code and custom error message. +func WrapError(c codes.Code, msg string, cause error) *Status { + return &Status{ + code: int32(c), + message: msg, + cause: cause, + } +} + // FromError returns a Status representing err if it was produced from this // package or has a method `GRPCStatus() *Status`. Otherwise, ok is false and a // Status is returned with codes.Unknown and the original error message. @@ -88,10 +107,17 @@ func FromError(err error) (s *Status, ok bool) { if matches, errInFind := findInString(err.Error()); errInFind == nil { code := codes.StringToCode(matches[0]) - return New(code, matches[1]), true + return &Status{ + code: int32(code), + message: matches[1], + }, true } - return New(codes.Unknown, err.Error()), false + return &Status{ + code: int32(codes.Unknown), + message: err.Error(), + cause: err, + }, false } // findInString need to check if this logic can be optimized diff --git a/pkg/util/provider/metrics/metrics.go b/pkg/util/provider/metrics/metrics.go index 6e73e6724..50e0f72be 100644 --- a/pkg/util/provider/metrics/metrics.go +++ b/pkg/util/provider/metrics/metrics.go @@ -80,6 +80,33 @@ var ( Help: "Number of Failed Cloud Service API requests, partitioned by provider, and service.", }, []string{"provider", "service"}, ) + + // APIRequestDuration records duration of all successful provider API calls. + // This metric can be filtered by provider and service. + APIRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: cloudAPISubsystem, + Name: "api_request_duration_seconds", + Help: "Time(in seconds) it takes for a provider API request to complete", + }, []string{"provider", "service"}) + + // DriverAPIRequestDuration records duration of all successful driver API calls. + // This metric can be filtered by provider and operation. + DriverAPIRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: cloudAPISubsystem, + Name: "driver_request_duration_seconds", + Help: "Total time (in seconds) taken for a driver API request to complete", + }, []string{"provider", "operation"}) + + // DriverFailedAPIRequests records number of failed driver API calls. + // This metric can be filtered by provider, operation and error code. + DriverFailedAPIRequests = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cloudAPISubsystem, + Name: "driver_requests_failed_total", + Help: "Number of failed Driver API requests, partitioned by provider, operation and error code", + }, []string{"provider", "operation", "error_code"}) ) // variables for subsystem: misc @@ -103,6 +130,9 @@ func registerMachineSubsystemMetrics() { func registerCloudAPISubsystemMetrics() { prometheus.MustRegister(APIRequestCount) prometheus.MustRegister(APIFailedRequestCount) + prometheus.MustRegister(APIRequestDuration) + prometheus.MustRegister(DriverAPIRequestDuration) + prometheus.MustRegister(DriverFailedAPIRequests) } func registerMiscellaneousMetrics() { @@ -113,4 +143,4 @@ func init() { registerMachineSubsystemMetrics() registerCloudAPISubsystemMetrics() registerMiscellaneousMetrics() -} \ No newline at end of file +}