From 61dfa68899d00140c437540dba85ae4daadfff0f Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Fri, 10 Jul 2020 09:52:10 +1200 Subject: [PATCH] Change URL obfuscation behavior (#145) --- src/span.cpp | 16 ++++++--- src/span.h | 4 ++- src/tracer.cpp | 21 ++++++++++-- src/tracer.h | 1 + test/integration/nginx/Dockerfile | 5 +-- test/integration/nginx/expected_tc6.json | 2 +- test/span_test.cpp | 43 ++++++++++++++++++++++-- 7 files changed, 79 insertions(+), 13 deletions(-) diff --git a/src/span.cpp b/src/span.cpp index 2e763de9..8524c72b 100644 --- a/src/span.cpp +++ b/src/span.cpp @@ -60,13 +60,14 @@ Span::Span(std::shared_ptr tracer, std::shared_ptr buf TimeProvider get_time, uint64_t span_id, uint64_t trace_id, uint64_t parent_id, SpanContext context, TimePoint start_time, std::string span_service, std::string span_type, std::string span_name, std::string resource, - std::string operation_name_override) + std::string operation_name_override, bool legacy_obfuscation) : tracer_(std::move(tracer)), buffer_(std::move(buffer)), get_time_(get_time), context_(std::move(context)), start_time_(start_time), operation_name_override_(operation_name_override), + legacy_obfuscation_(legacy_obfuscation), span_(makeSpanData(span_type, span_service, resource, span_name, trace_id, span_id, parent_id, std::chrono::duration_cast( @@ -103,10 +104,17 @@ std::regex &PATH_MIXED_ALPHANUMERICS() { // or cardinality issues. // If you want to add any more steps to this function, we should use a more // sophisticated architecture. For now, YAGNI. -void audit(SpanData *span) { +void audit(bool legacy_obfuscation, SpanData *span) { auto http_tag = span->meta.find(ot::ext::http_url); if (http_tag != span->meta.end()) { - http_tag->second = std::regex_replace(http_tag->second, PATH_MIXED_ALPHANUMERICS(), "$1$2?"); + if (legacy_obfuscation) { + // Heavy-handed obfuscation that replaces hostname, runs of alphanumerics, fragments and + // parameters. + http_tag->second = std::regex_replace(http_tag->second, PATH_MIXED_ALPHANUMERICS(), "$1$2?"); + } else { + // Just trim the parameter portion of the URL. + http_tag->second = http_tag->second.substr(0, http_tag->second.find_first_of('?')); + } } } @@ -173,7 +181,7 @@ void Span::FinishWithOptions( span_->meta.erase(tag); } // Audit and finish span. - audit(span_.get()); + audit(legacy_obfuscation_, span_.get()); buffer_->finishSpan(std::move(span_)); // According to the OT lifecycle, no more methods should be called on this Span. But just in case // let's make sure that span_ isn't nullptr. Fine line between defensive programming and voodoo. diff --git a/src/span.h b/src/span.h index 4f9fa6a3..0b132674 100644 --- a/src/span.h +++ b/src/span.h @@ -92,7 +92,8 @@ class Span : public DatadogSpan { Span(std::shared_ptr tracer, std::shared_ptr buffer, TimeProvider get_time, uint64_t span_id, uint64_t trace_id, uint64_t parent_id, SpanContext context, TimePoint start_time, std::string span_service, std::string span_type, - std::string span_name, std::string resource, std::string operation_name_override); + std::string span_name, std::string resource, std::string operation_name_override, + bool legacy_obfuscation = false); Span() = delete; ~Span() override; @@ -134,6 +135,7 @@ class Span : public DatadogSpan { SpanContext context_; TimePoint start_time_; std::string operation_name_override_; + bool legacy_obfuscation_ = false; // Set in constructor initializer, depends on previous constructor initializer-set members: std::unique_ptr span_; diff --git a/src/tracer.cpp b/src/tracer.cpp index aec5f4b2..b3ee8257 100644 --- a/src/tracer.cpp +++ b/src/tracer.cpp @@ -127,6 +127,14 @@ double analyticsRate(TracerOptions options) { return std::nan(""); } +bool legacyObfuscationEnabled() { + auto obfuscation = std::getenv("DD_TRACE_CPP_LEGACY_OBFUSCATION"); + if (obfuscation != nullptr && std::string(obfuscation) == "1") { + return true; + } + return false; +} + void startupLog(TracerOptions &options) { auto env_setting = std::getenv("DD_TRACE_STARTUP_LOGS"); if (env_setting != nullptr && std::string(env_setting) == "0") { @@ -204,11 +212,18 @@ void logTracerOptions(std::chrono::time_point timesta Tracer::Tracer(TracerOptions options, std::shared_ptr buffer, TimeProvider get_time, IdProvider get_id) - : opts_(options), buffer_(std::move(buffer)), get_time_(get_time), get_id_(get_id) {} + : opts_(options), + buffer_(std::move(buffer)), + get_time_(get_time), + get_id_(get_id), + legacy_obfuscation_(legacyObfuscationEnabled()) {} Tracer::Tracer(TracerOptions options, std::shared_ptr &writer, std::shared_ptr sampler) - : opts_(options), get_time_(getRealTime), get_id_(getId) { + : opts_(options), + get_time_(getRealTime), + get_id_(getId), + legacy_obfuscation_(legacyObfuscationEnabled()) { configureRulesSampler(sampler, options); startupLog(options); buffer_ = std::make_shared( @@ -245,7 +260,7 @@ std::unique_ptr Tracer::StartSpanWithOptions(ot::string_view operation std::unique_ptr span{new Span{shared_from_this(), buffer_, get_time_, span_id, trace_id, parent_id, std::move(span_context), get_time_(), opts_.service, opts_.type, operation_name, operation_name, - opts_.operation_name_override}}; + opts_.operation_name_override, legacy_obfuscation_}}; if (!opts_.environment.empty()) { span->SetTag(datadog::tags::environment, opts_.environment); diff --git a/src/tracer.h b/src/tracer.h index aebbbaeb..e0c9b672 100644 --- a/src/tracer.h +++ b/src/tracer.h @@ -74,6 +74,7 @@ class Tracer : public ot::Tracer, public std::enable_shared_from_this { std::shared_ptr buffer_; TimeProvider get_time_; IdProvider get_id_; + bool legacy_obfuscation_ = false; }; } // namespace opentracing diff --git a/test/integration/nginx/Dockerfile b/test/integration/nginx/Dockerfile index 0301c0dd..bfaa11ff 100644 --- a/test/integration/nginx/Dockerfile +++ b/test/integration/nginx/Dockerfile @@ -73,9 +73,10 @@ COPY ./test/integration/nginx/dd-config.json /etc/dd-config.json RUN mkdir -p /var/www/ COPY ./test/integration/nginx/index.html /var/www/index.html -COPY ./test/integration/nginx/nginx_integration_test.sh ./ -COPY ./test/integration/nginx/expected_*.json ./ # TODO(cgilmour): Hack to pin a dep of msgpack-cli RUN git clone -b v1.1.2 https://github.com/ugorji/go /root/go/src/github.com/ugorji/go RUN go get github.com/jakm/msgpack-cli +# Copy these last so that edits don't need as many image rebuild steps +COPY ./test/integration/nginx/nginx_integration_test.sh ./ +COPY ./test/integration/nginx/expected_*.json ./ CMD [ "bash", "-x", "./nginx_integration_test.sh"] diff --git a/test/integration/nginx/expected_tc6.json b/test/integration/nginx/expected_tc6.json index fd7af267..3ccc8059 100644 --- a/test/integration/nginx/expected_tc6.json +++ b/test/integration/nginx/expected_tc6.json @@ -8,7 +8,7 @@ "http.method": "GET", "http.status_code": "200", "http.status_line": "200 OK", - "http.url": "http://localhost/proxy/?", + "http.url": "http://localhost/proxy/", "operation": "GET /proxy/" }, "metrics": { diff --git a/test/span_test.cpp b/test/span_test.cpp index 941a9be4..22fd2fce 100644 --- a/test/span_test.cpp +++ b/test/span_test.cpp @@ -62,7 +62,45 @@ TEST_CASE("span") { REQUIRE(result->duration == 10000000000); } - SECTION("audits span data") { + SECTION("audits span data (just URL parameters)") { + std::list> test_cases{ + // Should remove query params + {"/", "/"}, + {"/?asdf", "/"}, + {"/search", "/search"}, + {"/search?", "/search"}, + {"/search?id=100&private=true", "/search"}, + {"/search?id=100&private=true?", "/search"}, + {"http://i-012a3b45c6d78901e//api/v1/check_run?api_key=0abcdef1a23b4c5d67ef8a90b1cde234", + "http://i-012a3b45c6d78901e//api/v1/check_run"}, + }; + + std::shared_ptr buffer_ptr{buffer}; + for (auto& test_case : test_cases) { + auto span_id = get_id(); + Span span{nullptr, + buffer_ptr, + get_time, + span_id, + span_id, + 0, + SpanContext{span_id, span_id, "", {}}, + get_time(), + "", + "", + "", + "", + ""}; + span.SetTag(ot::ext::http_url, test_case.first); + const ot::FinishSpanOptions finish_options; + span.FinishWithOptions(finish_options); + + auto& result = buffer->traces(span_id).finished_spans->back(); + REQUIRE(result->meta.find(ot::ext::http_url)->second == test_case.second); + } + } + + SECTION("audits span data (legacy)") { std::list> test_cases{ // Should remove query params {"/", "/"}, @@ -103,7 +141,8 @@ TEST_CASE("span") { "", "", "", - ""}; + "", + true}; span.SetTag(ot::ext::http_url, test_case.first); const ot::FinishSpanOptions finish_options; span.FinishWithOptions(finish_options);