From f4515165a6bbc5d3f042090e550c588cfffc5a94 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Wed, 20 Oct 2021 19:18:29 +0000 Subject: [PATCH 1/5] rule based sampling decisions use the "user" priority values --- src/sample.cpp | 21 +++++++++-- test/sample_test.cpp | 90 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 102 insertions(+), 9 deletions(-) diff --git a/src/sample.cpp b/src/sample.cpp index 93433964..c1a67bf5 100644 --- a/src/sample.cpp +++ b/src/sample.cpp @@ -81,21 +81,36 @@ SampleResult RulesSampler::sample(const std::string& environment, const std::str return priority_sampler_.sample(environment, service, trace_id); } + // A sampling rule applies to (matches) the current span. + // + // Whatever sampling decision we make here (keep or drop) will be of "user" + // type, i.e. `SamplingPriority::UserKeep` or `SamplingPriority::UserDrop`. + // + // The matching rule's rate was configured by a user, and so we want to make + // sure that after sending the span to the agent, that the agent does not + // override our sampling decision as it might for "automated" sampling + // decisions, i.e. `SamplingPriority::SamplerKeep` or + // `SamplingPriority::SamplerDrop`. + SampleResult result; result.rule_rate = rule_result.rate; auto max_hash = maxIdFromSampleRate(rule_result.rate); uint64_t hashed_id = trace_id * constant_rate_hash_factor; if (hashed_id >= max_hash) { - result.sampling_priority = std::make_unique(SamplingPriority::SamplerDrop); + result.sampling_priority = std::make_unique(SamplingPriority::UserDrop); return result; } + // Even though both the priority sampler and the matching sampling rule, + // above, did not drop this span, we still might drop the span in order to + // satify the configured maximum sampling rate for spans selected by rule + // based sampling overall. auto limit_result = sampling_limiter_.allow(); result.limiter_rate = limit_result.effective_rate; if (limit_result.allowed) { - result.sampling_priority = std::make_unique(SamplingPriority::SamplerKeep); + result.sampling_priority = std::make_unique(SamplingPriority::UserKeep); } else { - result.sampling_priority = std::make_unique(SamplingPriority::SamplerDrop); + result.sampling_priority = std::make_unique(SamplingPriority::UserDrop); } return result; } diff --git a/test/sample_test.cpp b/test/sample_test.cpp index 67444d19..7d50d25b 100644 --- a/test/sample_test.cpp +++ b/test/sample_test.cpp @@ -62,20 +62,27 @@ TEST_CASE("priority sampler unit test") { } TEST_CASE("rules sampler") { + // `RulesSampler`'s constructor parameters are used to configure the + // sampler's `Limiter`. Here we prepare those arguments. std::tm start{}; start.tm_mday = 12; start.tm_mon = 2; start.tm_year = 107; - TimePoint time{std::chrono::system_clock::from_time_t(timegm(&start)), + const TimePoint time{std::chrono::system_clock::from_time_t(timegm(&start)), std::chrono::steady_clock::time_point{}}; - TimeProvider get_time = [&time]() { return time; }; // Mock clock. + const TimeProvider get_time = [&time]() { return time; }; // Mock clock. + // A `Limiter` configured with these parameters will allow the first, but + // none afterward. TODO: is that so? + const long max_tokens = 1; + const double refresh_rate = 1.0; + const long tokens_per_refresh = 1; + const auto sampler = std::make_shared(get_time, max_tokens, refresh_rate, tokens_per_refresh); - auto sampler = std::make_shared(get_time, 1, 1.0, 1); const ot::StartSpanOptions span_options; const ot::FinishSpanOptions finish_options; - auto mwriter = std::make_shared(sampler); - auto writer = std::shared_ptr(mwriter); + const auto mwriter = std::make_shared(sampler); + const auto writer = std::shared_ptr(mwriter); SECTION("rule matching applied") { TracerOptions tracer_options; @@ -144,7 +151,7 @@ TEST_CASE("rules sampler") { REQUIRE(metrics["_dd.rule_psr"] == 0.4); } - SECTION("applies limiter to sampled spans") { + SECTION("applies limiter to sampled spans only") { TracerOptions tracer_options; tracer_options.service = "test.service"; tracer_options.sampling_rules = R"([ @@ -161,4 +168,75 @@ TEST_CASE("rules sampler") { REQUIRE(metrics.find("_dd.limit_psr") == metrics.end()); REQUIRE(metrics.find("_dd.agent_psr") == metrics.end()); } + + SECTION("sampling based on rule yields a \"user\" sampling priority") { + // See the comments in `RulesSampler::sample` for an explanation of this + // section. + + // There are three cases: + // 1. Create a rule that matches the trace, and has rate `0.0`. Expect + // priority `UserDrop`. + // 2. Create a rule that matches the trace, and has rate `1.0`. Expect + // priority `UserKeep`. + // 3. Create a rule that matches the trace, and has rate `1.0`, but the + // limiter drops it. Expect `UserDrop`. + + SECTION("when the matching rule drops a trace") { + TracerOptions tracer_options; + tracer_options.service = "test.service"; + tracer_options.sampling_rules = R"([ + {"sample_rate": 0.0} +])"; + auto tracer = std::make_shared(tracer_options, writer, sampler); + + auto span = tracer->StartSpanWithOptions("operation name", span_options); + span->FinishWithOptions(finish_options); + + const auto& metrics = mwriter->traces[0][0]->metrics; + REQUIRE(metrics.count("_sampling_priority_v1")); + REQUIRE(metrics.at("_sampling_priority_v1") == static_cast(SamplingPriority::UserDrop)); + } + + SECTION("when the matching rule keeps a trace") { + TracerOptions tracer_options; + tracer_options.service = "test.service"; + tracer_options.sampling_rules = R"([ + {"sample_rate": 1.0} +])"; + auto tracer = std::make_shared(tracer_options, writer, sampler); + + auto span = tracer->StartSpanWithOptions("operation name", span_options); + span->FinishWithOptions(finish_options); + + const auto& metrics = mwriter->traces[0][0]->metrics; + REQUIRE(metrics.count("_sampling_priority_v1")); + REQUIRE(metrics.at("_sampling_priority_v1") == static_cast(SamplingPriority::UserKeep)); + } + + SECTION("when the limiter drops a trace") { + TracerOptions tracer_options; + tracer_options.service = "test.service"; + tracer_options.sampling_rules = R"([ + {"sample_rate": 1.0} +])"; + auto tracer = std::make_shared(tracer_options, writer, sampler); + + auto span = tracer->StartSpanWithOptions("operation name", span_options); + span->FinishWithOptions(finish_options); + + const auto& metrics = mwriter->traces[0][0]->metrics; + REQUIRE(metrics.count("_sampling_priority_v1")); + // TODO: We fail here. The sampling priority is actually `UserKeep` (as + // in the test case above). What I don't get is that, with the `Limiter` + // parameters (1, 1.0, 1), it should allow the first and then not any + // subsequent until the clock progresses, i.e. in the "limits requests" + // case in `limiter_test.cpp`. All of the `Tracer`s in this file use the + // same sampler, which uses the same limiter. If my claim about the + // effect of (1, 1.0, 1) were true, though, then we'd be getting + // `UserDrop` even before this point. What am I missing? Either I'm not + // understanding the `Limiter`'s role here, or some sort of reset is + // happening that I missed. + REQUIRE(metrics.at("_sampling_priority_v1") == static_cast(SamplingPriority::UserDrop)); + } + } } From fe9d30255d40bdbe414604e784319e255cee9818 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Wed, 20 Oct 2021 19:36:35 +0000 Subject: [PATCH 2/5] clang-format --- test/sample_test.cpp | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/test/sample_test.cpp b/test/sample_test.cpp index 7d50d25b..3a8b9ac9 100644 --- a/test/sample_test.cpp +++ b/test/sample_test.cpp @@ -69,14 +69,15 @@ TEST_CASE("rules sampler") { start.tm_mon = 2; start.tm_year = 107; const TimePoint time{std::chrono::system_clock::from_time_t(timegm(&start)), - std::chrono::steady_clock::time_point{}}; + std::chrono::steady_clock::time_point{}}; const TimeProvider get_time = [&time]() { return time; }; // Mock clock. // A `Limiter` configured with these parameters will allow the first, but // none afterward. TODO: is that so? const long max_tokens = 1; const double refresh_rate = 1.0; const long tokens_per_refresh = 1; - const auto sampler = std::make_shared(get_time, max_tokens, refresh_rate, tokens_per_refresh); + const auto sampler = + std::make_shared(get_time, max_tokens, refresh_rate, tokens_per_refresh); const ot::StartSpanOptions span_options; const ot::FinishSpanOptions finish_options; @@ -172,7 +173,7 @@ TEST_CASE("rules sampler") { SECTION("sampling based on rule yields a \"user\" sampling priority") { // See the comments in `RulesSampler::sample` for an explanation of this // section. - + // There are three cases: // 1. Create a rule that matches the trace, and has rate `0.0`. Expect // priority `UserDrop`. @@ -180,7 +181,7 @@ TEST_CASE("rules sampler") { // priority `UserKeep`. // 3. Create a rule that matches the trace, and has rate `1.0`, but the // limiter drops it. Expect `UserDrop`. - + SECTION("when the matching rule drops a trace") { TracerOptions tracer_options; tracer_options.service = "test.service"; @@ -191,10 +192,11 @@ TEST_CASE("rules sampler") { auto span = tracer->StartSpanWithOptions("operation name", span_options); span->FinishWithOptions(finish_options); - + const auto& metrics = mwriter->traces[0][0]->metrics; REQUIRE(metrics.count("_sampling_priority_v1")); - REQUIRE(metrics.at("_sampling_priority_v1") == static_cast(SamplingPriority::UserDrop)); + REQUIRE(metrics.at("_sampling_priority_v1") == + static_cast(SamplingPriority::UserDrop)); } SECTION("when the matching rule keeps a trace") { @@ -207,12 +209,13 @@ TEST_CASE("rules sampler") { auto span = tracer->StartSpanWithOptions("operation name", span_options); span->FinishWithOptions(finish_options); - + const auto& metrics = mwriter->traces[0][0]->metrics; REQUIRE(metrics.count("_sampling_priority_v1")); - REQUIRE(metrics.at("_sampling_priority_v1") == static_cast(SamplingPriority::UserKeep)); + REQUIRE(metrics.at("_sampling_priority_v1") == + static_cast(SamplingPriority::UserKeep)); } - + SECTION("when the limiter drops a trace") { TracerOptions tracer_options; tracer_options.service = "test.service"; @@ -223,7 +226,7 @@ TEST_CASE("rules sampler") { auto span = tracer->StartSpanWithOptions("operation name", span_options); span->FinishWithOptions(finish_options); - + const auto& metrics = mwriter->traces[0][0]->metrics; REQUIRE(metrics.count("_sampling_priority_v1")); // TODO: We fail here. The sampling priority is actually `UserKeep` (as @@ -236,7 +239,8 @@ TEST_CASE("rules sampler") { // `UserDrop` even before this point. What am I missing? Either I'm not // understanding the `Limiter`'s role here, or some sort of reset is // happening that I missed. - REQUIRE(metrics.at("_sampling_priority_v1") == static_cast(SamplingPriority::UserDrop)); + REQUIRE(metrics.at("_sampling_priority_v1") == + static_cast(SamplingPriority::UserDrop)); } } } From 311afcc10e44a2856ff3fb563a0836660a7b007a Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Fri, 22 Oct 2021 17:07:48 +0000 Subject: [PATCH 3/5] fix bug in proposed unit test --- scripts/run_unit_tests.sh | 2 +- test/sample_test.cpp | 51 ++++++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/scripts/run_unit_tests.sh b/scripts/run_unit_tests.sh index f0ac32bb..a9f17f3b 100755 --- a/scripts/run_unit_tests.sh +++ b/scripts/run_unit_tests.sh @@ -15,4 +15,4 @@ mkdir -p "$BUILD_DIR" cd "$BUILD_DIR" cmake -DBUILD_TESTING=ON .. make --jobs=$(nproc) -ctest --output-on-failure +ctest --output-on-failure "$@" diff --git a/test/sample_test.cpp b/test/sample_test.cpp index 3a8b9ac9..9b91e19b 100644 --- a/test/sample_test.cpp +++ b/test/sample_test.cpp @@ -72,7 +72,7 @@ TEST_CASE("rules sampler") { std::chrono::steady_clock::time_point{}}; const TimeProvider get_time = [&time]() { return time; }; // Mock clock. // A `Limiter` configured with these parameters will allow the first, but - // none afterward. TODO: is that so? + // none afterward. const long max_tokens = 1; const double refresh_rate = 1.0; const long tokens_per_refresh = 1; @@ -188,11 +188,13 @@ TEST_CASE("rules sampler") { tracer_options.sampling_rules = R"([ {"sample_rate": 0.0} ])"; - auto tracer = std::make_shared(tracer_options, writer, sampler); + const auto tracer = std::make_shared(tracer_options, writer, sampler); - auto span = tracer->StartSpanWithOptions("operation name", span_options); + const auto span = tracer->StartSpanWithOptions("operation name", span_options); span->FinishWithOptions(finish_options); + REQUIRE(mwriter->traces.size() == 1); + REQUIRE(mwriter->traces[0].size() == 1); const auto& metrics = mwriter->traces[0][0]->metrics; REQUIRE(metrics.count("_sampling_priority_v1")); REQUIRE(metrics.at("_sampling_priority_v1") == @@ -205,11 +207,13 @@ TEST_CASE("rules sampler") { tracer_options.sampling_rules = R"([ {"sample_rate": 1.0} ])"; - auto tracer = std::make_shared(tracer_options, writer, sampler); + const auto tracer = std::make_shared(tracer_options, writer, sampler); - auto span = tracer->StartSpanWithOptions("operation name", span_options); + const auto span = tracer->StartSpanWithOptions("operation name", span_options); span->FinishWithOptions(finish_options); + REQUIRE(mwriter->traces.size() == 1); + REQUIRE(mwriter->traces[0].size() == 1); const auto& metrics = mwriter->traces[0][0]->metrics; REQUIRE(metrics.count("_sampling_priority_v1")); REQUIRE(metrics.at("_sampling_priority_v1") == @@ -222,25 +226,32 @@ TEST_CASE("rules sampler") { tracer_options.sampling_rules = R"([ {"sample_rate": 1.0} ])"; - auto tracer = std::make_shared(tracer_options, writer, sampler); + const auto tracer = std::make_shared(tracer_options, writer, sampler); auto span = tracer->StartSpanWithOptions("operation name", span_options); span->FinishWithOptions(finish_options); + // The first trace will be allowed by the limiter. + { + REQUIRE(mwriter->traces.size() == 1); + REQUIRE(mwriter->traces[0].size() == 1); + const auto& metrics = mwriter->traces[0][0]->metrics; + REQUIRE(metrics.count("_sampling_priority_v1")); + REQUIRE(metrics.at("_sampling_priority_v1") == + static_cast(SamplingPriority::UserKeep)); + } - const auto& metrics = mwriter->traces[0][0]->metrics; - REQUIRE(metrics.count("_sampling_priority_v1")); - // TODO: We fail here. The sampling priority is actually `UserKeep` (as - // in the test case above). What I don't get is that, with the `Limiter` - // parameters (1, 1.0, 1), it should allow the first and then not any - // subsequent until the clock progresses, i.e. in the "limits requests" - // case in `limiter_test.cpp`. All of the `Tracer`s in this file use the - // same sampler, which uses the same limiter. If my claim about the - // effect of (1, 1.0, 1) were true, though, then we'd be getting - // `UserDrop` even before this point. What am I missing? Either I'm not - // understanding the `Limiter`'s role here, or some sort of reset is - // happening that I missed. - REQUIRE(metrics.at("_sampling_priority_v1") == - static_cast(SamplingPriority::UserDrop)); + span = tracer->StartSpanWithOptions("operation name", span_options); + span->FinishWithOptions(finish_options); + // The second trace will be dropped by the limiter, and the priority will + // be `UserDrop`. + { + REQUIRE(mwriter->traces.size() == 2); + REQUIRE(mwriter->traces[1].size() == 1); + const auto& metrics = mwriter->traces[1][0]->metrics; + REQUIRE(metrics.count("_sampling_priority_v1")); + REQUIRE(metrics.at("_sampling_priority_v1") == + static_cast(SamplingPriority::UserDrop)); + } } } } From 9033b81d76c6fecd024c571a4e0515197598d253 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Fri, 22 Oct 2021 21:22:24 +0000 Subject: [PATCH 4/5] update expected sampling priorities in nginx integration test --- test/integration/nginx/expected_tc1.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/integration/nginx/expected_tc1.json b/test/integration/nginx/expected_tc1.json index f33447be..aeb28674 100644 --- a/test/integration/nginx/expected_tc1.json +++ b/test/integration/nginx/expected_tc1.json @@ -14,7 +14,7 @@ "metrics": { "_dd.limit_psr": 1, "_dd.rule_psr": 1, - "_sampling_priority_v1": 1 + "_sampling_priority_v1": 2 }, "name": "nginx.handle", "resource": "/", @@ -37,7 +37,7 @@ "metrics": { "_dd.limit_psr": 1, "_dd.rule_psr": 1, - "_sampling_priority_v1": 1 + "_sampling_priority_v1": 2 }, "name": "nginx.handle", "resource": "/", @@ -60,7 +60,7 @@ "metrics": { "_dd.limit_psr": 1, "_dd.rule_psr": 1, - "_sampling_priority_v1": 1 + "_sampling_priority_v1": 2 }, "name": "nginx.handle", "resource": "/", From 1367df21d5031de7664301018d8f4209fe2d8764 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Tue, 26 Oct 2021 18:03:00 +0000 Subject: [PATCH 5/5] test/sample_test.cpp: address review comment --- test/sample_test.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/test/sample_test.cpp b/test/sample_test.cpp index 9b91e19b..e720137b 100644 --- a/test/sample_test.cpp +++ b/test/sample_test.cpp @@ -228,22 +228,14 @@ TEST_CASE("rules sampler") { ])"; const auto tracer = std::make_shared(tracer_options, writer, sampler); + // The first span will be allowed by the limiter (tested in the previous section). auto span = tracer->StartSpanWithOptions("operation name", span_options); span->FinishWithOptions(finish_options); - // The first trace will be allowed by the limiter. - { - REQUIRE(mwriter->traces.size() == 1); - REQUIRE(mwriter->traces[0].size() == 1); - const auto& metrics = mwriter->traces[0][0]->metrics; - REQUIRE(metrics.count("_sampling_priority_v1")); - REQUIRE(metrics.at("_sampling_priority_v1") == - static_cast(SamplingPriority::UserKeep)); - } - span = tracer->StartSpanWithOptions("operation name", span_options); - span->FinishWithOptions(finish_options); // The second trace will be dropped by the limiter, and the priority will // be `UserDrop`. + span = tracer->StartSpanWithOptions("operation name", span_options); + span->FinishWithOptions(finish_options); { REQUIRE(mwriter->traces.size() == 2); REQUIRE(mwriter->traces[1].size() == 1);