DataDog · dgoffredo · Oct 27, 2021 · Oct 20, 2021 · Oct 20, 2021 · Oct 22, 2021
diff --git a/scripts/run_unit_tests.sh b/scripts/run_unit_tests.sh
@@ -15,4 +15,4 @@ mkdir -p "$BUILD_DIR"
 cd "$BUILD_DIR"
 cmake -DBUILD_TESTING=ON ..
 make --jobs=$(nproc)
-ctest --output-on-failure
+ctest --output-on-failure "$@"
diff --git a/src/sample.cpp b/src/sample.cpp
@@ -81,21 +81,36 @@ SampleResult RulesSampler::sample(const std::string& environment, const std::str
     return priority_sampler_.sample(environment, service, trace_id);
   }
 
+  // A sampling rule applies to (matches) the current span.
+  //
+  // Whatever sampling decision we make here (keep or drop) will be of "user"
+  // type, i.e. `SamplingPriority::UserKeep` or `SamplingPriority::UserDrop`.
+  //
+  // The matching rule's rate was configured by a user, and so we want to make
+  // sure that after sending the span to the agent, that the agent does not
+  // override our sampling decision as it might for "automated" sampling
+  // decisions, i.e. `SamplingPriority::SamplerKeep` or
+  // `SamplingPriority::SamplerDrop`.
+
   SampleResult result;
   result.rule_rate = rule_result.rate;
   auto max_hash = maxIdFromSampleRate(rule_result.rate);
   uint64_t hashed_id = trace_id * constant_rate_hash_factor;
   if (hashed_id >= max_hash) {
-    result.sampling_priority = std::make_unique<SamplingPriority>(SamplingPriority::SamplerDrop);
+    result.sampling_priority = std::make_unique<SamplingPriority>(SamplingPriority::UserDrop);
     return result;
   }
 
+  // Even though both the priority sampler and the matching sampling rule,
+  // above, did not drop this span, we still might drop the span in order to
+  // satify the configured maximum sampling rate for spans selected by rule
+  // based sampling overall.
   auto limit_result = sampling_limiter_.allow();
   result.limiter_rate = limit_result.effective_rate;
   if (limit_result.allowed) {
-    result.sampling_priority = std::make_unique<SamplingPriority>(SamplingPriority::SamplerKeep);
+    result.sampling_priority = std::make_unique<SamplingPriority>(SamplingPriority::UserKeep);
   } else {
-    result.sampling_priority = std::make_unique<SamplingPriority>(SamplingPriority::SamplerDrop);
+    result.sampling_priority = std::make_unique<SamplingPriority>(SamplingPriority::UserDrop);
   }
   return result;
 }

diff --git a/test/integration/nginx/expected_tc1.json b/test/integration/nginx/expected_tc1.json
@@ -14,7 +14,7 @@
       "metrics": {
         "_dd.limit_psr": 1,
         "_dd.rule_psr": 1,
-        "_sampling_priority_v1": 1
+        "_sampling_priority_v1": 2
       },
       "name": "nginx.handle",
       "resource": "/",
@@ -37,7 +37,7 @@
       "metrics": {
         "_dd.limit_psr": 1,
         "_dd.rule_psr": 1,
-        "_sampling_priority_v1": 1
+        "_sampling_priority_v1": 2
       },
       "name": "nginx.handle",
       "resource": "/",
@@ -60,7 +60,7 @@
       "metrics": {
         "_dd.limit_psr": 1,
         "_dd.rule_psr": 1,
-        "_sampling_priority_v1": 1
+        "_sampling_priority_v1": 2
       },
       "name": "nginx.handle",
       "resource": "/",

diff --git a/test/sample_test.cpp b/test/sample_test.cpp
@@ -62,20 +62,28 @@ TEST_CASE("priority sampler unit test") {
 }
 
 TEST_CASE("rules sampler") {
+  // `RulesSampler`'s constructor parameters are used to configure the
+  // sampler's `Limiter`. Here we prepare those arguments.
   std::tm start{};
   start.tm_mday = 12;
   start.tm_mon = 2;
   start.tm_year = 107;
-  TimePoint time{std::chrono::system_clock::from_time_t(timegm(&start)),
-                 std::chrono::steady_clock::time_point{}};
-  TimeProvider get_time = [&time]() { return time; };  // Mock clock.
+  const TimePoint time{std::chrono::system_clock::from_time_t(timegm(&start)),
+                       std::chrono::steady_clock::time_point{}};
+  const TimeProvider get_time = [&time]() { return time; };  // Mock clock.
+  // A `Limiter` configured with these parameters will allow the first, but
+  // none afterward.
+  const long max_tokens = 1;
+  const double refresh_rate = 1.0;
+  const long tokens_per_refresh = 1;
+  const auto sampler =
+      std::make_shared<RulesSampler>(get_time, max_tokens, refresh_rate, tokens_per_refresh);
 
-  auto sampler = std::make_shared<RulesSampler>(get_time, 1, 1.0, 1);
   const ot::StartSpanOptions span_options;
   const ot::FinishSpanOptions finish_options;
 
-  auto mwriter = std::make_shared<MockWriter>(sampler);
-  auto writer = std::shared_ptr<Writer>(mwriter);
+  const auto mwriter = std::make_shared<MockWriter>(sampler);
+  const auto writer = std::shared_ptr<Writer>(mwriter);
 
   SECTION("rule matching applied") {
     TracerOptions tracer_options;
@@ -144,7 +152,7 @@ TEST_CASE("rules sampler") {
     REQUIRE(metrics["_dd.rule_psr"] == 0.4);
   }
 
-  SECTION("applies limiter to sampled spans") {
+  SECTION("applies limiter to sampled spans only") {
     TracerOptions tracer_options;
     tracer_options.service = "test.service";
     tracer_options.sampling_rules = R"([
@@ -161,4 +169,89 @@ TEST_CASE("rules sampler") {
     REQUIRE(metrics.find("_dd.limit_psr") == metrics.end());
     REQUIRE(metrics.find("_dd.agent_psr") == metrics.end());
   }
+
+  SECTION("sampling based on rule yields a \"user\" sampling priority") {
+    // See the comments in `RulesSampler::sample` for an explanation of this
+    // section.
+
+    // There are three cases:
+    // 1. Create a rule that matches the trace, and has rate `0.0`. Expect
+    //     priority `UserDrop`.
+    // 2. Create a rule that matches the trace, and has rate `1.0`. Expect
+    //     priority `UserKeep`.
+    // 3. Create a rule that matches the trace, and has rate `1.0`, but the
+    //     limiter drops it. Expect `UserDrop`.
+
+    SECTION("when the matching rule drops a trace") {
+      TracerOptions tracer_options;
+      tracer_options.service = "test.service";
+      tracer_options.sampling_rules = R"([
+    {"sample_rate": 0.0}
+])";
+      const auto tracer = std::make_shared<Tracer>(tracer_options, writer, sampler);
+
+      const auto span = tracer->StartSpanWithOptions("operation name", span_options);
+      span->FinishWithOptions(finish_options);
+
+      REQUIRE(mwriter->traces.size() == 1);
+      REQUIRE(mwriter->traces[0].size() == 1);
+      const auto& metrics = mwriter->traces[0][0]->metrics;
+      REQUIRE(metrics.count("_sampling_priority_v1"));
+      REQUIRE(metrics.at("_sampling_priority_v1") ==
+              static_cast<double>(SamplingPriority::UserDrop));
+    }
+
+    SECTION("when the matching rule keeps a trace") {
+      TracerOptions tracer_options;
+      tracer_options.service = "test.service";
+      tracer_options.sampling_rules = R"([
+    {"sample_rate": 1.0}
+])";
+      const auto tracer = std::make_shared<Tracer>(tracer_options, writer, sampler);
+
+      const auto span = tracer->StartSpanWithOptions("operation name", span_options);
+      span->FinishWithOptions(finish_options);
+
+      REQUIRE(mwriter->traces.size() == 1);
+      REQUIRE(mwriter->traces[0].size() == 1);
+      const auto& metrics = mwriter->traces[0][0]->metrics;
+      REQUIRE(metrics.count("_sampling_priority_v1"));
+      REQUIRE(metrics.at("_sampling_priority_v1") ==
+              static_cast<double>(SamplingPriority::UserKeep));
+    }
+
+    SECTION("when the limiter drops a trace") {
+      TracerOptions tracer_options;
+      tracer_options.service = "test.service";
+      tracer_options.sampling_rules = R"([
+    {"sample_rate": 1.0}
+])";
+      const auto tracer = std::make_shared<Tracer>(tracer_options, writer, sampler);
+
+      auto span = tracer->StartSpanWithOptions("operation name", span_options);
+      span->FinishWithOptions(finish_options);
+      // The first trace will be allowed by the limiter.
+      {
+        REQUIRE(mwriter->traces.size() == 1);
+        REQUIRE(mwriter->traces[0].size() == 1);
+        const auto& metrics = mwriter->traces[0][0]->metrics;
+        REQUIRE(metrics.count("_sampling_priority_v1"));
+        REQUIRE(metrics.at("_sampling_priority_v1") ==
+                static_cast<double>(SamplingPriority::UserKeep));
+      }
+
+      span = tracer->StartSpanWithOptions("operation name", span_options);
+      span->FinishWithOptions(finish_options);
+      // The second trace will be dropped by the limiter, and the priority will
+      // be `UserDrop`.
+      {
+        REQUIRE(mwriter->traces.size() == 2);
+        REQUIRE(mwriter->traces[1].size() == 1);
+        const auto& metrics = mwriter->traces[1][0]->metrics;
+        REQUIRE(metrics.count("_sampling_priority_v1"));
+        REQUIRE(metrics.at("_sampling_priority_v1") ==
+                static_cast<double>(SamplingPriority::UserDrop));
+      }
+    }
+  }
 }