From 2f1d7a564adf13d11b30d7857940d88654f7973d Mon Sep 17 00:00:00 2001 From: ben Date: Fri, 10 Nov 2023 12:02:11 +0200 Subject: [PATCH 1/2] fix(sdk-trace-base): Export processed spans while exporter failed While the exporter deals with a batch of spans, new spans may come in and wait to be exported. As previously implemented, a successful export would notice these waiting spans, triggering a renewed timer check, but not so for an unsuccessful export. The result was that, prior to this commit, a failing export may end up in a situation where no further spans will be exported. This is due to the behaviour of `_addToBuffer` when the queue is full: Imagine an export which fails after a long timeout (because of, for instance, network troubles). While the connection waits to be timed out, the span queue fills up. Once completely full, no new calls to recheck the timer will be done. On its own, this behaviour is fine. When combined with the patched bug, this leads to a rather confusing case where the exporter never tries exporting. --- .../src/export/BatchSpanProcessorBase.ts | 2 +- .../export/BatchSpanProcessorBase.test.ts | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts b/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts index fef5c80dc0..f069aac0c7 100644 --- a/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts +++ b/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts @@ -221,7 +221,7 @@ export abstract class BatchSpanProcessorBase const flush = () => { this._isExporting = true; this._flushOneBatch() - .then(() => { + .finally(() => { this._isExporting = false; if (this._finishedSpans.length > 0) { this._clearTimer(); diff --git a/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts b/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts index 83fb3ebe44..251971385c 100644 --- a/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts +++ b/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts @@ -387,6 +387,57 @@ describe('BatchSpanProcessorBase', () => { }); }); + it('should still export when previously failed', async () => { + // The scenario is made of several parts: + // 1. The exporter tries to export some spans + // 2. While it does so, more spans are processed + // 3. The exporter fails + // 4. Spans arriving during step 2 should be exported + + let firstCall = true; + const fillingExportStub = sinon + .stub(exporter, 'export') + .callsFake((spans, cb) => { + // The first time export is called, add some spans to the processor. + // Any other time, call through. We don't simply restore the stub + // so we can count the calls with `sinon.assert` + if (!firstCall) { + return fillingExportStub.wrappedMethod.call(exporter, spans, cb); + } + + // Step 2: During export, add another span + firstCall = false; + processSpan(); + + return fillingExportStub.wrappedMethod.call(exporter, spans, () => { + // Step 3: Mock failure + cb({ + code: ExportResultCode.FAILED, + }); + }); + }); + + const clock = sinon.useFakeTimers(); + + // Step 1: Export a span + processSpan(); + await clock.runAllAsync(); + + clock.restore(); + fillingExportStub.restore(); + + // Step 4: Make sure all spans were processed + assert.equal(exporter['_finishedSpans'].length, 2); + assert.equal(processor['_finishedSpans'].length, 0); + sinon.assert.calledTwice(fillingExportStub); + + function processSpan() { + const span = createSampledSpan('test'); + processor.onStart(span, ROOT_CONTEXT); + processor.onEnd(span); + } + }); + it('should wait for pending resource on flush', async () => { const tracer = new BasicTracerProvider({ resource: new Resource( From 95a78295b1b9267833330fdc187748c745612958 Mon Sep 17 00:00:00 2001 From: Marc Pichler Date: Wed, 24 Jan 2024 15:21:34 +0100 Subject: [PATCH 2/2] fix(changelog): add entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fe3bc7c0f..bb6489a66f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ For experimental package changes, see the [experimental CHANGELOG](experimental/ ### :bug: (Bug Fix) * fix(sdk-trace-base): ensure attribute value length limit is enforced on span creation [#4417](https://github.com/open-telemetry/opentelemetry-js/pull/4417) @pichlermarc +* fix(sdk-trace-base): Export processed spans while exporter failed [#4287](https://github.com/open-telemetry/opentelemetry-js/pull/4287) @Zirak ### :books: (Refine Doc)