From abb3cf678cd1cae0b1e44198c42ec77bab700d70 Mon Sep 17 00:00:00 2001 From: suika Date: Tue, 21 Feb 2023 17:08:19 +0800 Subject: [PATCH 01/11] fix(plugin): OTEL exporting with translate We represent trace spans with a universal format for all kinds of mainstream tracers and translate them to different formats when propagating headers and exporting traces. However, OTEL misses the handling of trace ID length. Fix KAG-652 --- CHANGELOG.md | 4 +++- kong/plugins/opentelemetry/handler.lua | 4 +++- kong/plugins/opentelemetry/otlp.lua | 21 +++++++++++++++++++ .../37-opentelemetry/03-propagation_spec.lua | 18 ++++++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e44675f2314..6f92d845924a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -193,8 +193,10 @@ [#10069](https://github.com/Kong/kong/pull/10069) - For `http.status_code`. It should be present on spans for requests that have a status code. [#10160](https://github.com/Kong/kong/pull/10160) - - For `http.flavor`. It should be a string value, not a double. + - For `http.flavor`. It should be a string value, not a double [#10160](https://github.com/Kong/kong/pull/10160) +- **OpenTelemetry**: Fix a bug that when get trace of other formats, the trace ID reported and propagated could be of incorrect length. + [#10332](https://github.com/Kong/kong/pull/10332) - **OAuth2**: `refresh_token_ttl` is now limited between `0` and `100000000` by schema validator. Previously numbers that are too large causes requests to fail. [#10068](https://github.com/Kong/kong/pull/10068) diff --git a/kong/plugins/opentelemetry/handler.lua b/kong/plugins/opentelemetry/handler.lua index 5856c1cea340..0605f0911e6b 100644 --- a/kong/plugins/opentelemetry/handler.lua +++ b/kong/plugins/opentelemetry/handler.lua @@ -19,6 +19,7 @@ local propagation_parse = propagation.parse local propagation_set = propagation.set local null = ngx.null local encode_traces = otlp.encode_traces +local translate_span = otlp.translate_span local transform_span = otlp.transform_span local _log_prefix = "[otel] " @@ -132,6 +133,7 @@ function OpenTelemetryHandler:access() end -- overwrite trace id + -- as we are in a chain of existing trace if trace_id then root_span.trace_id = trace_id kong.ctx.plugin.trace_id = trace_id @@ -145,7 +147,7 @@ function OpenTelemetryHandler:access() root_span.parent_id = parent_id end - propagation_set("preserve", header_type, root_span, "w3c") + propagation_set("preserve", header_type, translate_span(root_span), "w3c") end function OpenTelemetryHandler:log(conf) diff --git a/kong/plugins/opentelemetry/otlp.lua b/kong/plugins/opentelemetry/otlp.lua index d84c5c7d97fb..f31108adf1d6 100644 --- a/kong/plugins/opentelemetry/otlp.lua +++ b/kong/plugins/opentelemetry/otlp.lua @@ -72,9 +72,29 @@ local function transform_events(events) return pb_events end +-- this function is to translaste the span to otlp span, but of universal span format +local function translate_span(span) + local trace_id = span.trace_id + + -- make sure the trace id is of 16 bytes + if #trace_id > 16 then + trace_id = string.sub(trace_id, -16) + + elseif #trace_id < 16 then + trace_id = string.rep("\0", 16 - #trace_id) .. trace_id + end + + local translated = deep_copy(span) + translated.trace_id = trace_id + return translated +end + +-- this function is to tranform universal span to otlp span that fits the protobuf local function transform_span(span) assert(type(span) == "table") + span = translate_span(span) + local pb_span = { trace_id = span.trace_id, span_id = span.span_id, @@ -161,6 +181,7 @@ do end return { + translate_span = translate_span, transform_span = transform_span, encode_traces = encode_traces, } diff --git a/spec/03-plugins/37-opentelemetry/03-propagation_spec.lua b/spec/03-plugins/37-opentelemetry/03-propagation_spec.lua index 8ce89d2a1f09..bb3a72792a6d 100644 --- a/spec/03-plugins/37-opentelemetry/03-propagation_spec.lua +++ b/spec/03-plugins/37-opentelemetry/03-propagation_spec.lua @@ -160,5 +160,23 @@ describe("propagation tests #" .. strategy, function() assert.equals(trace_id, json.headers["ot-tracer-traceid"]) end) + + it("propagates dd headers", function() + local trace_id = gen_trace_id() + local trace_id_truncated = trace_id:sub(1, 16) + local span_id = gen_span_id() + local r = proxy_client:get("/", { + headers = { + ["ot-tracer-traceid"] = trace_id_truncated, + ["ot-tracer-spanid"] = span_id, + ["ot-tracer-sampled"] = "1", + host = "http-route", + }, + }) + local body = assert.response(r).has.status(200) + local json = cjson.decode(body) + + assert.equals(string.rep("0", 8) .. trace_id_truncated, json.headers["ot-tracer-traceid"]) + end) end) end From db0d403f46c2aba1903a1827d4fe85693cd8adb4 Mon Sep 17 00:00:00 2001 From: Xumin <100666470+StarlightIbuki@users.noreply.github.com> Date: Tue, 21 Feb 2023 17:36:35 +0800 Subject: [PATCH 02/11] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f92d845924a..8b9da1813801 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -193,7 +193,7 @@ [#10069](https://github.com/Kong/kong/pull/10069) - For `http.status_code`. It should be present on spans for requests that have a status code. [#10160](https://github.com/Kong/kong/pull/10160) - - For `http.flavor`. It should be a string value, not a double + - For `http.flavor`. It should be a string value, not a double. [#10160](https://github.com/Kong/kong/pull/10160) - **OpenTelemetry**: Fix a bug that when get trace of other formats, the trace ID reported and propagated could be of incorrect length. [#10332](https://github.com/Kong/kong/pull/10332) From 2065cfc76a0f0dc7e3757782792eb655840d669b Mon Sep 17 00:00:00 2001 From: suika Date: Tue, 21 Feb 2023 17:39:28 +0800 Subject: [PATCH 03/11] shallow copy to prevent infinite recusive --- kong/plugins/opentelemetry/otlp.lua | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kong/plugins/opentelemetry/otlp.lua b/kong/plugins/opentelemetry/otlp.lua index f31108adf1d6..bffbdf34dcdb 100644 --- a/kong/plugins/opentelemetry/otlp.lua +++ b/kong/plugins/opentelemetry/otlp.lua @@ -10,6 +10,7 @@ local insert = table.insert local tablepool_fetch = tablepool.fetch local tablepool_release = tablepool.release local deep_copy = utils.deep_copy +local shallow_copy = utils.shallow_copy local table_merge = utils.table_merge local POOL_OTLP = "KONG_OTLP" @@ -84,7 +85,7 @@ local function translate_span(span) trace_id = string.rep("\0", 16 - #trace_id) .. trace_id end - local translated = deep_copy(span) + local translated = shallow_copy(span) translated.trace_id = trace_id return translated end From 9ce8c634996327ab707585bf23f8ccb2e9c477e5 Mon Sep 17 00:00:00 2001 From: Michael Martin <3277009+flrgh@users.noreply.github.com> Date: Wed, 22 Feb 2023 15:26:41 -0800 Subject: [PATCH 04/11] fix(plugin): copy span metatable --- kong/plugins/opentelemetry/otlp.lua | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kong/plugins/opentelemetry/otlp.lua b/kong/plugins/opentelemetry/otlp.lua index bffbdf34dcdb..4f3b79a31dae 100644 --- a/kong/plugins/opentelemetry/otlp.lua +++ b/kong/plugins/opentelemetry/otlp.lua @@ -12,6 +12,8 @@ local tablepool_release = tablepool.release local deep_copy = utils.deep_copy local shallow_copy = utils.shallow_copy local table_merge = utils.table_merge +local getmetatable = getmetatable +local setmetatable = setmetatable local POOL_OTLP = "KONG_OTLP" local EMPTY_TAB = {} @@ -87,6 +89,7 @@ local function translate_span(span) local translated = shallow_copy(span) translated.trace_id = trace_id + setmetatable(translated, getmetatable(span)) return translated end From ce6021620b9e7f125360ac3633ab53936d1de77a Mon Sep 17 00:00:00 2001 From: Michael Martin <3277009+flrgh@users.noreply.github.com> Date: Wed, 22 Feb 2023 15:29:12 -0800 Subject: [PATCH 05/11] tests(plugin): update test case for trace id padding --- spec/03-plugins/37-opentelemetry/03-propagation_spec.lua | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spec/03-plugins/37-opentelemetry/03-propagation_spec.lua b/spec/03-plugins/37-opentelemetry/03-propagation_spec.lua index bb3a72792a6d..d116cfb78dc3 100644 --- a/spec/03-plugins/37-opentelemetry/03-propagation_spec.lua +++ b/spec/03-plugins/37-opentelemetry/03-propagation_spec.lua @@ -176,7 +176,11 @@ describe("propagation tests #" .. strategy, function() local body = assert.response(r).has.status(200) local json = cjson.decode(body) - assert.equals(string.rep("0", 8) .. trace_id_truncated, json.headers["ot-tracer-traceid"]) + assert.equals(#trace_id, #json.headers["ot-tracer-traceid"], + "trace ID was not padded correctly") + + local expected = string.rep("0", 16) .. trace_id_truncated + assert.equals(expected, json.headers["ot-tracer-traceid"]) end) end) end From bde853a372959fbd130efd4319b15f9d44cd09ad Mon Sep 17 00:00:00 2001 From: Michael Martin <3277009+flrgh@users.noreply.github.com> Date: Wed, 22 Feb 2023 15:40:11 -0800 Subject: [PATCH 06/11] perf(plugin): avoid needless copying of span --- kong/plugins/opentelemetry/otlp.lua | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/kong/plugins/opentelemetry/otlp.lua b/kong/plugins/opentelemetry/otlp.lua index 4f3b79a31dae..ddcf10340156 100644 --- a/kong/plugins/opentelemetry/otlp.lua +++ b/kong/plugins/opentelemetry/otlp.lua @@ -15,6 +15,7 @@ local table_merge = utils.table_merge local getmetatable = getmetatable local setmetatable = setmetatable +local NULL = "\0" local POOL_OTLP = "KONG_OTLP" local EMPTY_TAB = {} @@ -75,22 +76,29 @@ local function transform_events(events) return pb_events end --- this function is to translaste the span to otlp span, but of universal span format +-- this function is to translate the span to otlp span, but of universal span format local function translate_span(span) local trace_id = span.trace_id + local len = #trace_id + local new_id = trace_id -- make sure the trace id is of 16 bytes - if #trace_id > 16 then - trace_id = string.sub(trace_id, -16) + if len > 16 then + new_id = trace_id:sub(-16) - elseif #trace_id < 16 then - trace_id = string.rep("\0", 16 - #trace_id) .. trace_id + elseif len < 16 then + new_id = NULL:rep(16 - len) .. trace_id end - local translated = shallow_copy(span) - translated.trace_id = trace_id - setmetatable(translated, getmetatable(span)) - return translated + if new_id ~= trace_id then + local translated = shallow_copy(span) + setmetatable(translated, getmetatable(span)) + + translated.trace_id = new_id + span = translated + end + + return span end -- this function is to tranform universal span to otlp span that fits the protobuf From 4aba447ff53e37c24c648a28e4aa2aeff5843f65 Mon Sep 17 00:00:00 2001 From: suika Date: Thu, 23 Feb 2023 13:14:31 +0800 Subject: [PATCH 07/11] apply suggestion --- kong/plugins/opentelemetry/otlp.lua | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kong/plugins/opentelemetry/otlp.lua b/kong/plugins/opentelemetry/otlp.lua index ddcf10340156..0a160f8e8c82 100644 --- a/kong/plugins/opentelemetry/otlp.lua +++ b/kong/plugins/opentelemetry/otlp.lua @@ -15,6 +15,7 @@ local table_merge = utils.table_merge local getmetatable = getmetatable local setmetatable = setmetatable +local TRACE_ID_LEN = 16 local NULL = "\0" local POOL_OTLP = "KONG_OTLP" local EMPTY_TAB = {} @@ -83,11 +84,11 @@ local function translate_span(span) local new_id = trace_id -- make sure the trace id is of 16 bytes - if len > 16 then - new_id = trace_id:sub(-16) + if len > TRACE_ID_LEN then + new_id = trace_id:sub(-TRACE_ID_LEN) - elseif len < 16 then - new_id = NULL:rep(16 - len) .. trace_id + elseif len < TRACE_ID_LEN then + new_id = NULL:rep(TRACE_ID_LEN - len) .. trace_id end if new_id ~= trace_id then From 331415dfbd87a4985e1cdcf2d0751b214d1665f7 Mon Sep 17 00:00:00 2001 From: Xumin <100666470+StarlightIbuki@users.noreply.github.com> Date: Thu, 23 Feb 2023 14:23:20 +0800 Subject: [PATCH 08/11] Update CHANGELOG.md Co-authored-by: Chrono --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b9da1813801..d741a3317cc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -195,7 +195,7 @@ [#10160](https://github.com/Kong/kong/pull/10160) - For `http.flavor`. It should be a string value, not a double. [#10160](https://github.com/Kong/kong/pull/10160) -- **OpenTelemetry**: Fix a bug that when get trace of other formats, the trace ID reported and propagated could be of incorrect length. +- **OpenTelemetry**: Fix a bug that when getting the trace of other formats, the trace ID reported and propagated could be of incorrect length. [#10332](https://github.com/Kong/kong/pull/10332) - **OAuth2**: `refresh_token_ttl` is now limited between `0` and `100000000` by schema validator. Previously numbers that are too large causes requests to fail. [#10068](https://github.com/Kong/kong/pull/10068) From dc3813d8e11d6e20c8efd4ccec9ddecb27dfadf8 Mon Sep 17 00:00:00 2001 From: suika Date: Thu, 23 Feb 2023 14:24:02 +0800 Subject: [PATCH 09/11] apply suggestion --- kong/plugins/opentelemetry/otlp.lua | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kong/plugins/opentelemetry/otlp.lua b/kong/plugins/opentelemetry/otlp.lua index 0a160f8e8c82..9bff123513df 100644 --- a/kong/plugins/opentelemetry/otlp.lua +++ b/kong/plugins/opentelemetry/otlp.lua @@ -83,6 +83,10 @@ local function translate_span(span) local len = #trace_id local new_id = trace_id + if len == TRACE_ID_LEN then + return span + end + -- make sure the trace id is of 16 bytes if len > TRACE_ID_LEN then new_id = trace_id:sub(-TRACE_ID_LEN) From 610bafb529471a842b8c6244b2297760dc5013f8 Mon Sep 17 00:00:00 2001 From: suika Date: Thu, 23 Feb 2023 14:24:51 +0800 Subject: [PATCH 10/11] no need to check equility --- kong/plugins/opentelemetry/otlp.lua | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kong/plugins/opentelemetry/otlp.lua b/kong/plugins/opentelemetry/otlp.lua index 9bff123513df..8e1657ba45ac 100644 --- a/kong/plugins/opentelemetry/otlp.lua +++ b/kong/plugins/opentelemetry/otlp.lua @@ -95,13 +95,11 @@ local function translate_span(span) new_id = NULL:rep(TRACE_ID_LEN - len) .. trace_id end - if new_id ~= trace_id then - local translated = shallow_copy(span) - setmetatable(translated, getmetatable(span)) + local translated = shallow_copy(span) + setmetatable(translated, getmetatable(span)) - translated.trace_id = new_id - span = translated - end + translated.trace_id = new_id + span = translated return span end From a5a80081c670d980614f4eaeafd914eefd3b7c44 Mon Sep 17 00:00:00 2001 From: suika Date: Thu, 23 Feb 2023 18:05:51 +0800 Subject: [PATCH 11/11] naming&explaination --- kong/plugins/opentelemetry/handler.lua | 8 ++++---- kong/plugins/opentelemetry/otlp.lua | 14 +++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/kong/plugins/opentelemetry/handler.lua b/kong/plugins/opentelemetry/handler.lua index 0605f0911e6b..b41ce20cdb3c 100644 --- a/kong/plugins/opentelemetry/handler.lua +++ b/kong/plugins/opentelemetry/handler.lua @@ -19,8 +19,8 @@ local propagation_parse = propagation.parse local propagation_set = propagation.set local null = ngx.null local encode_traces = otlp.encode_traces -local translate_span = otlp.translate_span -local transform_span = otlp.transform_span +local translate_span_trace_id = otlp.translate_span +local encode_span = otlp.transform_span local _log_prefix = "[otel] " @@ -109,7 +109,7 @@ local function process_span(span, queue) span.trace_id = trace_id end - local pb_span = transform_span(span) + local pb_span = encode_span(span) queue:add(pb_span) end @@ -147,7 +147,7 @@ function OpenTelemetryHandler:access() root_span.parent_id = parent_id end - propagation_set("preserve", header_type, translate_span(root_span), "w3c") + propagation_set("preserve", header_type, translate_span_trace_id(root_span), "w3c") end function OpenTelemetryHandler:log(conf) diff --git a/kong/plugins/opentelemetry/otlp.lua b/kong/plugins/opentelemetry/otlp.lua index 8e1657ba45ac..2b209e7f7639 100644 --- a/kong/plugins/opentelemetry/otlp.lua +++ b/kong/plugins/opentelemetry/otlp.lua @@ -77,8 +77,11 @@ local function transform_events(events) return pb_events end --- this function is to translate the span to otlp span, but of universal span format -local function translate_span(span) +-- translate the span to otlp format +-- currently it only adjust the trace id to 16 bytes +-- we don't do it in place because the span is shared +-- and we need to preserve the original information as much as possible +local function translate_span_trace_id(span) local trace_id = span.trace_id local len = #trace_id local new_id = trace_id @@ -104,11 +107,12 @@ local function translate_span(span) return span end --- this function is to tranform universal span to otlp span that fits the protobuf +-- this function is to prepare span to be encoded and sent via grpc +-- TODO: renaming this to encode_span local function transform_span(span) assert(type(span) == "table") - span = translate_span(span) + span = translate_span_trace_id(span) local pb_span = { trace_id = span.trace_id, @@ -196,7 +200,7 @@ do end return { - translate_span = translate_span, + translate_span = translate_span_trace_id, transform_span = transform_span, encode_traces = encode_traces, }