From d26265e8533b3c4a8f91a75518687a2c4d2d7774 Mon Sep 17 00:00:00 2001 From: Zirak Date: Wed, 24 Jan 2024 14:57:13 +0000 Subject: [PATCH] fix(sdk-trace-base): Export processed spans while exporter failed (#4287) * fix(sdk-trace-base): Export processed spans while exporter failed While the exporter deals with a batch of spans, new spans may come in and wait to be exported. As previously implemented, a successful export would notice these waiting spans, triggering a renewed timer check, but not so for an unsuccessful export. The result was that, prior to this commit, a failing export may end up in a situation where no further spans will be exported. This is due to the behaviour of `_addToBuffer` when the queue is full: Imagine an export which fails after a long timeout (because of, for instance, network troubles). While the connection waits to be timed out, the span queue fills up. Once completely full, no new calls to recheck the timer will be done. On its own, this behaviour is fine. When combined with the patched bug, this leads to a rather confusing case where the exporter never tries exporting. * fix(changelog): add entry --------- Co-authored-by: Marc Pichler --- CHANGELOG.md | 1 + .../src/export/BatchSpanProcessorBase.ts | 2 +- .../export/BatchSpanProcessorBase.test.ts | 51 +++++++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fe3bc7c0ff..bb6489a66f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ For experimental package changes, see the [experimental CHANGELOG](experimental/ ### :bug: (Bug Fix) * fix(sdk-trace-base): ensure attribute value length limit is enforced on span creation [#4417](https://github.com/open-telemetry/opentelemetry-js/pull/4417) @pichlermarc +* fix(sdk-trace-base): Export processed spans while exporter failed [#4287](https://github.com/open-telemetry/opentelemetry-js/pull/4287) @Zirak ### :books: (Refine Doc) diff --git a/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts b/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts index fef5c80dc0f..f069aac0c7d 100644 --- a/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts +++ b/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts @@ -221,7 +221,7 @@ export abstract class BatchSpanProcessorBase const flush = () => { this._isExporting = true; this._flushOneBatch() - .then(() => { + .finally(() => { this._isExporting = false; if (this._finishedSpans.length > 0) { this._clearTimer(); diff --git a/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts b/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts index 83fb3ebe44f..251971385c1 100644 --- a/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts +++ b/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts @@ -387,6 +387,57 @@ describe('BatchSpanProcessorBase', () => { }); }); + it('should still export when previously failed', async () => { + // The scenario is made of several parts: + // 1. The exporter tries to export some spans + // 2. While it does so, more spans are processed + // 3. The exporter fails + // 4. Spans arriving during step 2 should be exported + + let firstCall = true; + const fillingExportStub = sinon + .stub(exporter, 'export') + .callsFake((spans, cb) => { + // The first time export is called, add some spans to the processor. + // Any other time, call through. We don't simply restore the stub + // so we can count the calls with `sinon.assert` + if (!firstCall) { + return fillingExportStub.wrappedMethod.call(exporter, spans, cb); + } + + // Step 2: During export, add another span + firstCall = false; + processSpan(); + + return fillingExportStub.wrappedMethod.call(exporter, spans, () => { + // Step 3: Mock failure + cb({ + code: ExportResultCode.FAILED, + }); + }); + }); + + const clock = sinon.useFakeTimers(); + + // Step 1: Export a span + processSpan(); + await clock.runAllAsync(); + + clock.restore(); + fillingExportStub.restore(); + + // Step 4: Make sure all spans were processed + assert.equal(exporter['_finishedSpans'].length, 2); + assert.equal(processor['_finishedSpans'].length, 0); + sinon.assert.calledTwice(fillingExportStub); + + function processSpan() { + const span = createSampledSpan('test'); + processor.onStart(span, ROOT_CONTEXT); + processor.onEnd(span); + } + }); + it('should wait for pending resource on flush', async () => { const tracer = new BasicTracerProvider({ resource: new Resource(