Skip to content

Commit

Permalink
ddtrace/tracer: report datadog.tracer.api.errors health metric (#3024)
Browse files Browse the repository at this point in the history
  • Loading branch information
mtoffl01 authored Dec 19, 2024
1 parent 144d592 commit 822d81b
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 6 deletions.
30 changes: 24 additions & 6 deletions ddtrace/tracer/transport.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,16 +151,18 @@ func (t *httpTransport) send(p *payload) (body io.ReadCloser, err error) {
}
req.Header.Set(traceCountHeader, strconv.Itoa(p.itemCount()))
req.Header.Set(headerComputedTopLevel, "yes")
if t, ok := traceinternal.GetGlobalTracer().(*tracer); ok {
if t.config.tracingAsTransport || t.config.canComputeStats() {
var tr *tracer
var haveTracer bool
if tr, haveTracer = traceinternal.GetGlobalTracer().(*tracer); haveTracer {
if tr.config.tracingAsTransport || tr.config.canComputeStats() {
// tracingAsTransport uses this header to disable the trace agent's stats computation
// while making canComputeStats() always false to also disable client stats computation.
req.Header.Set("Datadog-Client-Computed-Stats", "yes")
}
droppedTraces := int(atomic.SwapUint32(&t.droppedP0Traces, 0))
partialTraces := int(atomic.SwapUint32(&t.partialTraces, 0))
droppedSpans := int(atomic.SwapUint32(&t.droppedP0Spans, 0))
if stats := t.statsd; stats != nil {
droppedTraces := int(atomic.SwapUint32(&tr.droppedP0Traces, 0))
partialTraces := int(atomic.SwapUint32(&tr.partialTraces, 0))
droppedSpans := int(atomic.SwapUint32(&tr.droppedP0Spans, 0))
if stats := tr.statsd; stats != nil {
stats.Count("datadog.tracer.dropped_p0_traces", int64(droppedTraces),
[]string{fmt.Sprintf("partial:%s", strconv.FormatBool(partialTraces > 0))}, 1)
stats.Count("datadog.tracer.dropped_p0_spans", int64(droppedSpans), nil, 1)
Expand All @@ -170,9 +172,11 @@ func (t *httpTransport) send(p *payload) (body io.ReadCloser, err error) {
}
response, err := t.client.Do(req)
if err != nil {
reportAPIErrorsMetric(haveTracer, response, err, tr)
return nil, err
}
if code := response.StatusCode; code >= 400 {
reportAPIErrorsMetric(haveTracer, response, err, tr)
// error, check the body for context information and
// return a nice error.
msg := make([]byte, 1000)
Expand All @@ -187,6 +191,20 @@ func (t *httpTransport) send(p *payload) (body io.ReadCloser, err error) {
return response.Body, nil
}

func reportAPIErrorsMetric(haveTracer bool, response *http.Response, err error, t *tracer) {
if !haveTracer {
return
}
var reason string
if err != nil {
reason = "network_failure"
}
if response != nil {
reason = fmt.Sprintf("server_response_%d", response.StatusCode)
}
t.statsd.Incr("datadog.tracer.api.errors", []string{"reason:" + reason}, 1)
}

func (t *httpTransport) endpoint() string {
return t.traceURL
}
85 changes: 85 additions & 0 deletions ddtrace/tracer/transport_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ import (

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
traceinternal "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal"
"gopkg.in/DataDog/dd-trace-go.v1/internal"
"gopkg.in/DataDog/dd-trace-go.v1/internal/statsdtest"
)

// getTestSpan returns a Span with different fields set
Expand Down Expand Up @@ -241,6 +243,89 @@ func TestCustomTransport(t *testing.T) {
assert.Equal(hits, 1)
}

type ErrTransport struct{}

func (t *ErrTransport) RoundTrip(req *http.Request) (*http.Response, error) {
return nil, fmt.Errorf("error in RoundTripper")
}

type ErrResponseTransport struct{}

func (t *ErrResponseTransport) RoundTrip(req *http.Request) (*http.Response, error) {
return &http.Response{StatusCode: 400}, nil
}

type OkTransport struct{}

func (t *OkTransport) RoundTrip(req *http.Request) (*http.Response, error) {
return &http.Response{StatusCode: 200}, nil
}

func TestApiErrorsMetric(t *testing.T) {
t.Run("error", func(t *testing.T) {
assert := assert.New(t)
c := &http.Client{
Transport: &ErrTransport{},
}
var tg statsdtest.TestStatsdClient
trc := newTracer(WithHTTPClient(c), withStatsdClient(&tg))
traceinternal.SetGlobalTracer(trc)
defer trc.Stop()

p, err := encode(getTestTrace(1, 1))
assert.NoError(err)

// We're expecting an error
_, err = trc.config.transport.send(p)
assert.Error(err)
calls := statsdtest.FilterCallsByName(tg.IncrCalls(), "datadog.tracer.api.errors")
assert.Len(calls, 1)
call := calls[0]
assert.Equal([]string{"reason:network_failure"}, call.Tags())

})
t.Run("response with err code", func(t *testing.T) {
assert := assert.New(t)
c := &http.Client{
Transport: &ErrResponseTransport{},
}
var tg statsdtest.TestStatsdClient
trc := newTracer(WithHTTPClient(c), withStatsdClient(&tg))
traceinternal.SetGlobalTracer(trc)
defer trc.Stop()

p, err := encode(getTestTrace(1, 1))
assert.NoError(err)

_, err = trc.config.transport.send(p)
assert.Error(err)

calls := statsdtest.FilterCallsByName(tg.IncrCalls(), "datadog.tracer.api.errors")
assert.Len(calls, 1)
call := calls[0]
assert.Equal([]string{"reason:server_response_400"}, call.Tags())
})
t.Run("successful send - no metric", func(t *testing.T) {
assert := assert.New(t)
var tg statsdtest.TestStatsdClient
c := &http.Client{
Transport: &OkTransport{},
}
trc := newTracer(WithHTTPClient(c), withStatsdClient(&tg))
traceinternal.SetGlobalTracer(trc)
defer trc.Stop()

p, err := encode(getTestTrace(1, 1))
assert.NoError(err)

_, err = trc.config.transport.send(p)
assert.NoError(err)

calls := statsdtest.FilterCallsByName(tg.IncrCalls(), "datadog.tracer.api.errors")
assert.Len(calls, 0)
})
}

func TestWithHTTPClient(t *testing.T) {
// disable instrumentation telemetry to prevent flaky number of requests
t.Setenv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", "false")
Expand Down
14 changes: 14 additions & 0 deletions internal/statsdtest/statsdtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ type TestStatsdCall struct {
rate float64
}

func (c *TestStatsdCall) Tags() []string {
return c.tags
}

func (tg *TestStatsdClient) addCount(name string, value int64) {
tg.mu.Lock()
defer tg.mu.Unlock()
Expand Down Expand Up @@ -221,6 +225,16 @@ func (tg *TestStatsdClient) CallsByName() map[string]int {
return counts
}

func FilterCallsByName(calls []TestStatsdCall, name string) []TestStatsdCall {
var matches []TestStatsdCall
for _, c := range calls {
if c.name == name {
matches = append(matches, c)
}
}
return matches
}

func (tg *TestStatsdClient) Counts() map[string]int64 {
tg.mu.RLock()
defer tg.mu.RUnlock()
Expand Down

0 comments on commit 822d81b

Please sign in to comment.