Skip to content

Commit

Permalink
Add admission_blocked span to otel-arrow admission controller (open-t…
Browse files Browse the repository at this point in the history
…elemetry#35031)

**Description:** Adds a span to make admission controller-induced
latency visible via traces.

**Link to tracking Issue:**
open-telemetry/otel-arrow#244 previous effort, I
forgot I had moved this package to contrib. 😁

**Testing:** ✅
  • Loading branch information
jmacd authored and jriguera committed Oct 4, 2024
1 parent 339edeb commit 5481eed
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 9 deletions.
12 changes: 10 additions & 2 deletions internal/otelarrow/admission/boundedqueue.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import (

"github.com/google/uuid"
orderedmap "github.com/wk8/go-ordered-map/v2"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
)

var ErrTooManyWaiters = fmt.Errorf("rejecting request, too many waiters")
Expand All @@ -21,6 +24,7 @@ type BoundedQueue struct {
currentWaiters int64
lock sync.Mutex
waiters *orderedmap.OrderedMap[uuid.UUID, waiter]
tracer trace.Tracer
}

type waiter struct {
Expand All @@ -29,11 +33,12 @@ type waiter struct {
ID uuid.UUID
}

func NewBoundedQueue(maxLimitBytes, maxLimitWaiters int64) *BoundedQueue {
func NewBoundedQueue(tp trace.TracerProvider, maxLimitBytes, maxLimitWaiters int64) *BoundedQueue {
return &BoundedQueue{
maxLimitBytes: maxLimitBytes,
maxLimitWaiters: maxLimitWaiters,
waiters: orderedmap.New[uuid.UUID, waiter](),
tracer: tp.Tracer("github.com/open-telemetry/opentelemetry-collector-contrib/internal/otelarrow"),
}
}

Expand Down Expand Up @@ -87,7 +92,9 @@ func (bq *BoundedQueue) Acquire(ctx context.Context, pendingBytes int64) error {
}

bq.lock.Unlock()
// @@@ instrument this code path
ctx, span := bq.tracer.Start(ctx, "admission_blocked",
trace.WithAttributes(attribute.Int64("pending", pendingBytes)))
defer span.End()

select {
case <-curWaiter.readyCh:
Expand All @@ -97,6 +104,7 @@ func (bq *BoundedQueue) Acquire(ctx context.Context, pendingBytes int64) error {
bq.lock.Lock()
defer bq.lock.Unlock()
err = fmt.Errorf("context canceled: %w ", ctx.Err())
span.SetStatus(codes.Error, "context canceled")

_, found := bq.waiters.Delete(curWaiter.ID)
if !found {
Expand Down
30 changes: 27 additions & 3 deletions internal/otelarrow/admission/boundedqueue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import (

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/sdk/trace/tracetest"
"go.opentelemetry.io/otel/trace/noop"
"go.uber.org/multierr"
)

Expand All @@ -34,13 +38,16 @@ func abs(x int64) int64 {
}
return x
}

var noopTraces = noop.NewTracerProvider()

func TestAcquireSimpleNoWaiters(t *testing.T) {
maxLimitBytes := 1000
maxLimitWaiters := 10
numRequests := 40
requestSize := 21

bq := NewBoundedQueue(int64(maxLimitBytes), int64(maxLimitWaiters))
bq := NewBoundedQueue(noopTraces, int64(maxLimitBytes), int64(maxLimitWaiters))

ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
Expand Down Expand Up @@ -92,7 +99,7 @@ func TestAcquireBoundedWithWaiters(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
bq := NewBoundedQueue(tt.maxLimitBytes, tt.maxLimitWaiters)
bq := NewBoundedQueue(noopTraces, tt.maxLimitBytes, tt.maxLimitWaiters)
var blockedRequests int64
numReqsUntilBlocked := tt.maxLimitBytes / tt.requestSize
requestsAboveLimit := abs(tt.numRequests - numReqsUntilBlocked)
Expand Down Expand Up @@ -151,7 +158,10 @@ func TestAcquireContextCanceled(t *testing.T) {

blockedRequests := min(int64(maxLimitWaiters), requestsAboveLimit)

bq := NewBoundedQueue(int64(maxLimitBytes), int64(maxLimitWaiters))
exp := tracetest.NewInMemoryExporter()
tp := trace.NewTracerProvider(trace.WithSyncer(exp))

bq := NewBoundedQueue(tp, int64(maxLimitBytes), int64(maxLimitWaiters))

ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
var errs error
Expand All @@ -178,6 +188,16 @@ func TestAcquireContextCanceled(t *testing.T) {
wg.Wait()
assert.ErrorContains(t, errs, "context canceled")

// Expect spans named admission_blocked w/ context canceled.
spans := exp.GetSpans()
exp.Reset()
assert.NotEmpty(t, spans)
for _, span := range spans {
assert.Equal(t, "admission_blocked", span.Name)
assert.Equal(t, codes.Error, span.Status.Code)
assert.Equal(t, "context canceled", span.Status.Description)
}

// Now all waiters should have returned and been removed.
assert.Equal(t, 0, bq.waiters.Len())

Expand All @@ -186,4 +206,8 @@ func TestAcquireContextCanceled(t *testing.T) {
assert.Equal(t, int64(0), bq.currentWaiters)
}
assert.True(t, bq.TryAcquire(int64(maxLimitBytes)))

// Expect no more spans, because admission was not blocked.
spans = exp.GetSpans()
require.Empty(t, spans)
}
9 changes: 6 additions & 3 deletions receiver/otelarrowreceiver/internal/arrow/arrow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/trace"
"go.opentelemetry.io/otel/trace/noop"
"go.uber.org/mock/gomock"
"go.uber.org/zap/zaptest"
"golang.org/x/net/http2/hpack"
Expand All @@ -49,8 +50,10 @@ import (
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/otelarrowreceiver/internal/arrow/mock"
)

var noopTraces = noop.NewTracerProvider()

func defaultBQ() *admission.BoundedQueue {
return admission.NewBoundedQueue(int64(100000), int64(10))
return admission.NewBoundedQueue(noopTraces, int64(100000), int64(10))
}

type compareJSONTraces struct{ ptrace.Traces }
Expand Down Expand Up @@ -464,10 +467,10 @@ func TestBoundedQueueWithPdataHeaders(t *testing.T) {
var bq *admission.BoundedQueue
if tt.rejected {
ctc.stream.EXPECT().Send(statusOKFor(batch.BatchId)).Times(0)
bq = admission.NewBoundedQueue(int64(sizer.TracesSize(td)-100), 10)
bq = admission.NewBoundedQueue(noopTraces, int64(sizer.TracesSize(td)-100), 10)
} else {
ctc.stream.EXPECT().Send(statusOKFor(batch.BatchId)).Times(1).Return(nil)
bq = admission.NewBoundedQueue(defaultBoundedQueueLimit, 10)
bq = admission.NewBoundedQueue(noopTraces, defaultBoundedQueueLimit, 10)
}

ctc.start(ctc.newRealConsumer, bq)
Expand Down
2 changes: 1 addition & 1 deletion receiver/otelarrowreceiver/otelarrow.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func (r *otelArrowReceiver) startProtocolServers(ctx context.Context, host compo
return err
}
}
bq := admission.NewBoundedQueue(int64(r.cfg.Arrow.AdmissionLimitMiB<<20), r.cfg.Arrow.WaiterLimit)
bq := admission.NewBoundedQueue(r.settings.TracerProvider, int64(r.cfg.Arrow.AdmissionLimitMiB<<20), r.cfg.Arrow.WaiterLimit)

r.arrowReceiver, err = arrow.New(arrow.Consumers(r), r.settings, r.obsrepGRPC, r.cfg.GRPC, authServer, func() arrowRecord.ConsumerAPI {
var opts []arrowRecord.Option
Expand Down

0 comments on commit 5481eed

Please sign in to comment.