From 2b017b7e0c1ca24fcae7adc9d69505649b469e18 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Thu, 10 Mar 2022 12:00:38 -0500 Subject: [PATCH] make the rules sample limiter and default sampling rate configurable (#215) * add limiter test for less-than-one-per-second * add limiter constructor that takes just a rate * configure rules sampler limiter with DD_TRACE_RATE_LIMIT * configure rules sampler limiter with factory (JSON), and include in config banner * test that DD_TRACE_RATE_LIMIT affects TracerOptions::sampling_limit_per_second * remove priority sampling from nginx integration tests * repurpose TracerOptions::sample_rate to be the default fallback rule sampling rate, and allow configuration using DD_TRACE_SAMPLE_RATE * review: change error reporting in parseDouble * review: ready for my close-up, Mr. DeMille * review: test parsing failure of new env variables * NaN nan NaN nan * fix: fallback rule exists only if sample_rate is specified; otherwise, fall back to priority sampling * Revert "remove priority sampling from nginx integration tests" This reverts commit 80d11a75173857239daf4b96089a11e6b710cbda. * fix: wrote the new catch-all rule test incorrectly * remove redundant code, improve readability of integration test output * fix the nginx integration tests per sampling changes * remove debugging artifacts * remove reference to defunct initializer in config test * remove debugging artifact --- include/datadog/opentracing.h | 38 +++-- src/limiter.cpp | 4 + src/limiter.h | 1 + src/opentracing_agent.cpp | 2 +- src/opentracing_external.cpp | 2 +- src/sample.cpp | 3 + src/sample.h | 1 + src/tracer.cpp | 141 ++++++++++-------- src/tracer_factory.cpp | 5 +- src/tracer_factory.h | 25 +--- src/tracer_options.cpp | 82 +++++++--- src/tracer_options.h | 2 +- test/integration/nginx/Dockerfile | 2 +- test/integration/nginx/expected_tc1.json | 15 +- .../nginx/nginx_integration_test.sh | 18 +-- test/limiter_test.cpp | 44 +++++- test/sample_test.cpp | 22 +++ test/tracer_options_test.cpp | 117 +++++++++++---- test/tracer_test.cpp | 2 +- 19 files changed, 357 insertions(+), 169 deletions(-) diff --git a/include/datadog/opentracing.h b/include/datadog/opentracing.h index 34e1d592..2c986cbb 100644 --- a/include/datadog/opentracing.h +++ b/include/datadog/opentracing.h @@ -66,21 +66,30 @@ struct TracerOptions { // The environment this trace belongs to. eg. "" (env:none), "staging", "prod". Can also be set // by the environment variable DD_ENV std::string environment = ""; - // This option is deprecated and may be removed in future releases. - // It is equivalent to setting a sampling rule with only a "sample_rate". - // Values must be between 0.0 and 1.0 (inclusive) + // `sample_rate` is the default sampling rate for any trace unmatched by a + // sampling rule. Setting `sample_rate` is equivalent to appending to + // `sampling_rules` a rule whose "sample_rate" is `sample_rate`. If + // `sample_rate` is NaN, then no default rule is added, and traces not + // matching any sampling rule are subject to "priority sampling," where the + // sampling rate is determined by the Datadog trace agent. This option is + // also configurable as the environment variable DD_TRACE_SAMPLE_RATE. double sample_rate = std::nan(""); // This option is deprecated, and may be removed in future releases. bool priority_sampling = true; - // Rules sampling is applied when initiating traces to determine the sampling rate. - // Traces that do not match any rules fall back to using priority sampling, where the rate is - // determined by a combination of user-assigned priorities and configuration from the agent. - // Configuration is specified as a JSON array of objects. Each object must have a "sample_rate", - // and the "name" and "service" fields are optional. The "sample_rate" value must be between 0.0 - // and 1.0 (inclusive). Rules are applied in configured order, so a specific match should be - // specified before a wider match. If any rules are invalid, they are ignored. Can also be set by - // the environment variable DD_TRACE_SAMPLING_RULES. - std::string sampling_rules = R"([{"sample_rate": 1.0}])"; + // Rules sampling is applied when initiating traces to determine the sampling + // rate. Configuration is specified as a JSON array of objects. Each object + // must have a "sample_rate", while the "name" and "service" fields are + // optional. The "sample_rate" value must be between 0.0 and 1.0 (inclusive). + // Rules are checked in order, so a more specific rule should be specified + // before a less specific rule. Note that if the `sample_rate` field of this + // `TracerOptions` has a non-NaN value, then there is an implicit rule at the + // end of the list that matches any trace unmatched by other rules, and + // applies a sampling rate of `sample_rate`. If no rule matches a trace, + // then "priority sampling" is applied instead, where the sample rate is + // determined by the Datadog trace agent. If any rules are invalid, they are + // ignored. This option is also configurable as the environment variable + // DD_TRACE_SAMPLING_RULES. + std::string sampling_rules = "[]"; // Max amount of time to wait between sending traces to agent, in ms. Agent discards traces older // than 10s, so that is the upper bound. int64_t write_period_ms = 1000; @@ -134,6 +143,11 @@ struct TracerOptions { } std::cerr << level_str + ": " + message.data() + "\n"; }; + // `sampling_limit_per_second` is the limit on the number of rule-controlled + // traces that may be sampled per second. This includes traces that match + // the implicit "catch-all" rule appended to `sampling_rules`. This option + // is also configurable as the environment variable DD_TRACE_RATE_LIMIT. + double sampling_limit_per_second = 100; }; // TraceEncoder exposes the data required to encode and submit traces to the diff --git a/src/limiter.cpp b/src/limiter.cpp index 064c9c46..7667becf 100644 --- a/src/limiter.cpp +++ b/src/limiter.cpp @@ -1,6 +1,7 @@ #include "limiter.h" #include +#include #include #include @@ -27,6 +28,9 @@ Limiter::Limiter(TimeProvider now_func, long max_tokens, double refresh_rate, previous_rates_sum_ = std::accumulate(previous_rates_.begin(), previous_rates_.end(), 0.0); } +Limiter::Limiter(TimeProvider now_func, double allowed_per_second) + : Limiter(now_func, long(std::ceil(allowed_per_second)), allowed_per_second, 1) {} + LimitResult Limiter::allow() { return allow(1); } LimitResult Limiter::allow(long tokens_requested) { diff --git a/src/limiter.h b/src/limiter.h index a2382e68..3cc423a9 100644 --- a/src/limiter.h +++ b/src/limiter.h @@ -17,6 +17,7 @@ struct LimitResult { class Limiter { public: Limiter(TimeProvider clock, long max_tokens, double refresh_rate, long tokens_per_refresh); + Limiter(TimeProvider clock, double allowed_per_second); LimitResult allow(); LimitResult allow(long tokens); diff --git a/src/opentracing_agent.cpp b/src/opentracing_agent.cpp index 2371a88c..ba793f7a 100644 --- a/src/opentracing_agent.cpp +++ b/src/opentracing_agent.cpp @@ -30,7 +30,7 @@ std::shared_ptr makeTracer(const TracerOptions &options) { TracerOptions opts = maybe_options.value(); auto logger = makeLogger(opts); - auto sampler = std::make_shared(); + auto sampler = std::make_shared(opts.sampling_limit_per_second); auto writer = std::shared_ptr{ new AgentWriter(opts.agent_host, opts.agent_port, opts.agent_url, std::chrono::milliseconds(llabs(opts.write_period_ms)), sampler, logger)}; diff --git a/src/opentracing_external.cpp b/src/opentracing_external.cpp index a785be5b..d164cb0c 100644 --- a/src/opentracing_external.cpp +++ b/src/opentracing_external.cpp @@ -39,7 +39,7 @@ std::tuple, std::shared_ptr> makeTrace } TracerOptions opts = maybe_options.value(); - auto sampler = std::make_shared(); + auto sampler = std::make_shared(opts.sampling_limit_per_second); auto writer = std::make_shared(sampler, logger); auto encoder = writer->encoder(); return std::tuple, std::shared_ptr>{ diff --git a/src/sample.cpp b/src/sample.cpp index fd3e1f78..caee98dc 100644 --- a/src/sample.cpp +++ b/src/sample.cpp @@ -72,6 +72,9 @@ void PrioritySampler::configure(json config) { RulesSampler::RulesSampler() : sampling_limiter_(getRealTime, 100, 100.0, 1) {} +RulesSampler::RulesSampler(double limit_per_second) + : sampling_limiter_(getRealTime, limit_per_second) {} + RulesSampler::RulesSampler(TimeProvider clock, long max_tokens, double refresh_rate, long tokens_per_refresh) : sampling_limiter_(clock, max_tokens, refresh_rate, tokens_per_refresh) {} diff --git a/src/sample.h b/src/sample.h index 5d2e512f..5bfeed35 100644 --- a/src/sample.h +++ b/src/sample.h @@ -69,6 +69,7 @@ using RuleFunc = std::function #endif +#include #include #include #include @@ -123,7 +124,11 @@ void startupLog(TracerOptions &options) { } j["analytics_enabled"] = options.analytics_enabled; j["analytics_sample_rate"] = options.analytics_rate; + if (!std::isnan(options.sample_rate)) { + j["sample_rate"] = options.sample_rate; + } j["sampling_rules"] = options.sampling_rules; + j["sampling_limit_per_second"] = options.sampling_limit_per_second; if (!options.tags.empty()) { j["tags"] = options.tags; } @@ -167,71 +172,83 @@ uint64_t traceTagsPropagationMaxLength(const TracerOptions & /*options*/, const } // namespace -void Tracer::configureRulesSampler(std::shared_ptr sampler) noexcept try { - auto log_invalid_json = [&](const std::string &description, json &object) { - logger_->Log(LogLevel::info, description + ": " + object.get()); - }; - json config = json::parse(opts_.sampling_rules); - for (auto &item : config.items()) { - auto rule = item.value(); - if (!rule.is_object()) { - log_invalid_json("rules sampler: unexpected item in sampling rules", rule); - continue; - } - // "sample_rate" is mandatory - if (!rule.contains("sample_rate")) { - log_invalid_json("rules sampler: rule is missing 'sample_rate'", rule); - continue; - } - if (!rule.at("sample_rate").is_number()) { - log_invalid_json("rules sampler: invalid type for 'sample_rate' (expected number)", rule); - continue; - } - auto sample_rate = rule.at("sample_rate").get(); - if (!(sample_rate >= 0.0 && sample_rate <= 1.0)) { - log_invalid_json( - "rules sampler: invalid value for sample rate (expected value between 0.0 and 1.0)", - rule); - } - // "service" and "name" are optional - bool has_service = rule.contains("service") && rule.at("service").is_string(); - bool has_name = rule.contains("name") && rule.at("name").is_string(); - auto nan = std::nan(""); - if (has_service && has_name) { - auto svc = rule.at("service").get(); - auto nm = rule.at("name").get(); - sampler->addRule([=](const std::string &service, const std::string &name) -> RuleResult { - if (service == svc && name == nm) { - return {true, sample_rate}; - } - return {false, nan}; - }); - } else if (has_service) { - auto svc = rule.at("service").get(); - sampler->addRule([=](const std::string &service, const std::string &) -> RuleResult { - if (service == svc) { - return {true, sample_rate}; - } - return {false, nan}; - }); - } else if (has_name) { - auto nm = rule.at("name").get(); - sampler->addRule([=](const std::string &, const std::string &name) -> RuleResult { - if (name == nm) { +void Tracer::configureRulesSampler(std::shared_ptr sampler) noexcept { + try { + auto log_invalid_json = [&](const std::string &description, json &object) { + logger_->Log(LogLevel::info, description + ": " + object.get()); + }; + json config = json::parse(opts_.sampling_rules); + for (auto &item : config.items()) { + auto rule = item.value(); + if (!rule.is_object()) { + log_invalid_json("rules sampler: unexpected item in sampling rules", rule); + continue; + } + // "sample_rate" is mandatory + if (!rule.contains("sample_rate")) { + log_invalid_json("rules sampler: rule is missing 'sample_rate'", rule); + continue; + } + if (!rule.at("sample_rate").is_number()) { + log_invalid_json("rules sampler: invalid type for 'sample_rate' (expected number)", rule); + continue; + } + auto sample_rate = rule.at("sample_rate").get(); + if (!(sample_rate >= 0.0 && sample_rate <= 1.0)) { + log_invalid_json( + "rules sampler: invalid value for sample rate (expected value between 0.0 and 1.0)", + rule); + } + // "service" and "name" are optional + bool has_service = rule.contains("service") && rule.at("service").is_string(); + bool has_name = rule.contains("name") && rule.at("name").is_string(); + auto nan = std::nan(""); + if (has_service && has_name) { + auto svc = rule.at("service").get(); + auto nm = rule.at("name").get(); + sampler->addRule([=](const std::string &service, const std::string &name) -> RuleResult { + if (service == svc && name == nm) { + return {true, sample_rate}; + } + return {false, nan}; + }); + } else if (has_service) { + auto svc = rule.at("service").get(); + sampler->addRule([=](const std::string &service, const std::string &) -> RuleResult { + if (service == svc) { + return {true, sample_rate}; + } + return {false, nan}; + }); + } else if (has_name) { + auto nm = rule.at("name").get(); + sampler->addRule([=](const std::string &, const std::string &name) -> RuleResult { + if (name == nm) { + return {true, sample_rate}; + } + return {false, nan}; + }); + } else { + sampler->addRule([=](const std::string &, const std::string &) -> RuleResult { return {true, sample_rate}; - } - return {false, nan}; - }); - } else { - sampler->addRule([=](const std::string &, const std::string &) -> RuleResult { - return {true, sample_rate}; - }); + }); + } } + } catch (const json::parse_error &error) { + std::ostringstream message; + message << "rules sampler: unable to parse JSON config for rules sampler: " << error.what(); + logger_->Log(LogLevel::error, message.str()); + } + + // If there is a configured overall sample rate, add an automatic "catch all" + // rule to the end that samples at that rate. Otherwise, don't (unmatched + // traces will be subject to priority sampling). + const double sample_rate = opts_.sample_rate; + if (!std::isnan(sample_rate)) { + sampler->addRule([=](const std::string &, const std::string &) -> RuleResult { + return {true, sample_rate}; + }); } -} catch (const json::parse_error &error) { - std::ostringstream message; - message << "rules sampler: unable to parse JSON config for rules sampler: " << error.what(); - logger_->Log(LogLevel::error, message.str()); } Tracer::Tracer(TracerOptions options, std::shared_ptr buffer, TimeProvider get_time, diff --git a/src/tracer_factory.cpp b/src/tracer_factory.cpp index a8610e96..cf490ac5 100644 --- a/src/tracer_factory.cpp +++ b/src/tracer_factory.cpp @@ -10,7 +10,7 @@ namespace opentracing { ot::expected optionsFromConfig(const char *configuration, std::string &error_message) { - TracerOptions options{"localhost", 8126, "", "web", "", 1.0}; + TracerOptions options; json config; try { config = json::parse(configuration); @@ -85,6 +85,9 @@ ot::expected optionsFromConfig(const char *configuration, if (config.find("dd.trace.analytics-sample-rate") != config.end()) { config.at("dd.trace.analytics-sample-rate").get_to(options.analytics_rate); } + if (config.find("sampling_limit_per_second") != config.end()) { + config.at("sampling_limit_per_second").get_to(options.sampling_limit_per_second); + } } catch (const nlohmann::detail::type_error &) { error_message = "configuration has an argument with an incorrect type"; return ot::make_unexpected(std::make_error_code(std::errc::invalid_argument)); diff --git a/src/tracer_factory.h b/src/tracer_factory.h index d355940b..6816a007 100644 --- a/src/tracer_factory.h +++ b/src/tracer_factory.h @@ -19,28 +19,9 @@ ot::expected optionsFromConfig(const char *configuration, template class TracerFactory : public ot::TracerFactory { public: - // Accepts configuration in JSON format, with the following keys: - // "service": Required. A string, the name of the service. - // "agent_host": A string, defaults to localhost. Can also be set by the environment variable - // DD_AGENT_HOST - // "agent_port": A number, defaults to 8126. "type": A string, defaults to web. Can also be set - // by the environment variable DD_TRACE_AGENT_PORT - // "type": A string, defaults to web. - // "environment": A string, defaults to "". The environment this trace belongs to. - // eg. "" (env:none), "staging", "prod". Can also be set by the environment variable - // DD_ENV - // "sample_rate": A double, defaults to 1.0. - // "operation_name_override": A string, if not empty it overrides the operation name (and the - // overridden operation name is recorded in the tag "operation"). - // "propagation_style_extract": A list of strings, each string is one of "Datadog", "B3". - // Defaults to ["Datadog"]. The type of headers to use to propagate - // distributed traces. Can also be set by the environment variable - // DD_PROPAGATION_STYLE_EXTRACT. - // "propagation_style_inject": A list of strings, each string is one of "Datadog", "B3". Defaults - // to ["Datadog"]. The type of headers to use to receive distributed traces. Can also be set - // by the environment variable DD_PROPAGATION_STYLE_INJECT. - // - // Extra keys will be ignored. + // Accepts configuration as a JSON object. See `optionsFromConfig` in + // tracer_factory.cpp for a list of supported attributes. Unsupported + // attributes are ignored. ot::expected> MakeTracer(const char *configuration, std::string &error_message) const noexcept override; diff --git a/src/tracer_options.cpp b/src/tracer_options.cpp index 65964e45..8c25cd57 100644 --- a/src/tracer_options.cpp +++ b/src/tracer_options.cpp @@ -3,7 +3,9 @@ #include #include +#include #include +#include #include "bool.h" @@ -85,6 +87,30 @@ std::vector tokenize_propagation_style(const std::string &input) { return result; } +ot::expected parseDouble(const std::string &text, double minimum, + double maximum) try { + std::size_t end_index; + const double value = std::stod(text, &end_index); + // If any of the remaining characters are not whitespace, then `text` + // contains something other than a floating point number. + if (std::any_of(text.begin() + end_index, text.end(), + [](unsigned char ch) { return !std::isspace(ch); })) { + return ot::make_unexpected("contains trailing non-floating-point characters: " + text); + } + // Note: Check "not inside" instead of "outside" so that the error is + // triggered for NaN `value`. + if (!(value >= minimum && value <= maximum)) { + std::ostringstream error; + error << "not within the expected bounds [" << minimum << ", " << maximum << "]: " << value; + return ot::make_unexpected(error.str()); + } + return value; +} catch (const std::invalid_argument &) { + return ot::make_unexpected("does not look like a double: " + text); +} catch (const std::out_of_range &) { + return ot::make_unexpected("not within the range of a double: " + text); +} + } // namespace ot::expected> asPropagationStyle( @@ -105,8 +131,10 @@ ot::expected> asPropagationStyle( return propagation_styles; } -ot::expected applyTracerOptionsFromEnvironment( +ot::expected applyTracerOptionsFromEnvironment( const TracerOptions &input) { + using namespace std::string_literals; + TracerOptions opts = input; auto environment = std::getenv("DD_ENV"); @@ -151,9 +179,9 @@ ot::expected applyTracerOptionsFromEnvironment( try { opts.agent_port = std::stoi(trace_agent_port); } catch (const std::invalid_argument &ia) { - return ot::make_unexpected("Value for DD_TRACE_AGENT_PORT is invalid"); + return ot::make_unexpected("Value for DD_TRACE_AGENT_PORT is invalid"s); } catch (const std::out_of_range &oor) { - return ot::make_unexpected("Value for DD_TRACE_AGENT_PORT is out of range"); + return ot::make_unexpected("Value for DD_TRACE_AGENT_PORT is out of range"s); } } @@ -171,7 +199,7 @@ ot::expected applyTracerOptionsFromEnvironment( if (extract != nullptr && std::strlen(extract) > 0) { auto style_maybe = asPropagationStyle(tokenize_propagation_style(extract)); if (!style_maybe) { - return ot::make_unexpected("Value for DD_PROPAGATION_STYLE_EXTRACT is invalid"); + return ot::make_unexpected("Value for DD_PROPAGATION_STYLE_EXTRACT is invalid"s); } opts.extract = style_maybe.value(); } @@ -180,7 +208,7 @@ ot::expected applyTracerOptionsFromEnvironment( if (inject != nullptr && std::strlen(inject) > 0) { auto style_maybe = asPropagationStyle(tokenize_propagation_style(inject)); if (!style_maybe) { - return ot::make_unexpected("Value for DD_PROPAGATION_STYLE_INJECT is invalid"); + return ot::make_unexpected("Value for DD_PROPAGATION_STYLE_INJECT is invalid"s); } opts.inject = style_maybe.value(); } @@ -191,7 +219,7 @@ ot::expected applyTracerOptionsFromEnvironment( if (value.empty() || isbool(value)) { opts.report_hostname = stob(value, false); } else { - return ot::make_unexpected("Value for DD_TRACE_REPORT_HOSTNAME is invalid"); + return ot::make_unexpected("Value for DD_TRACE_REPORT_HOSTNAME is invalid"s); } } @@ -206,26 +234,42 @@ ot::expected applyTracerOptionsFromEnvironment( opts.analytics_rate = std::nan(""); } } else { - return ot::make_unexpected("Value for DD_TRACE_ANALYTICS_ENABLED is invalid"); + return ot::make_unexpected("Value for DD_TRACE_ANALYTICS_ENABLED is invalid"s); } } auto analytics_rate = std::getenv("DD_TRACE_ANALYTICS_SAMPLE_RATE"); if (analytics_rate != nullptr) { - try { - double value = std::stod(analytics_rate); - if (value >= 0.0 && value <= 1.0) { - opts.analytics_enabled = true; - opts.analytics_rate = value; - } else { - return ot::make_unexpected("Value for DD_TRACE_ANALYTICS_SAMPLE_RATE is invalid"); - } - } catch (const std::invalid_argument &ia) { - return ot::make_unexpected("Value for DD_TRACE_ANALYTICS_SAMPLE_RATE is invalid"); - } catch (const std::out_of_range &oor) { - return ot::make_unexpected("Value for DD_TRACE_ANALYTICS_SAMPLE_RATE is invalid"); + auto maybe_value = parseDouble(analytics_rate, 0.0, 1.0); + if (!maybe_value) { + maybe_value.error().insert(0, "while parsing DD_TRACE_ANALYTICS_SAMPLE_RATE: "); + return ot::make_unexpected(std::move(maybe_value.error())); + } + opts.analytics_enabled = true; + opts.analytics_rate = maybe_value.value(); + } + + auto sampling_limit_per_second = std::getenv("DD_TRACE_RATE_LIMIT"); + if (sampling_limit_per_second != nullptr) { + auto maybe_value = + parseDouble(sampling_limit_per_second, 0.0, std::numeric_limits::infinity()); + if (!maybe_value) { + maybe_value.error().insert(0, "while parsing DD_TRACE_RATE_LIMIT: "); + return ot::make_unexpected(std::move(maybe_value.error())); } + opts.sampling_limit_per_second = maybe_value.value(); } + + auto sample_rate = std::getenv("DD_TRACE_SAMPLE_RATE"); + if (sample_rate != nullptr) { + auto maybe_value = parseDouble(sample_rate, 0.0, 1.0); + if (!maybe_value) { + maybe_value.error().insert(0, "while parsing DD_TRACE_SAMPLE_RATE: "); + return ot::make_unexpected(std::move(maybe_value.error())); + } + opts.sample_rate = maybe_value.value(); + } + return opts; } diff --git a/src/tracer_options.h b/src/tracer_options.h index cdc73e6f..88c9d21a 100644 --- a/src/tracer_options.h +++ b/src/tracer_options.h @@ -20,7 +20,7 @@ ot::expected> asPropagationStyle( const std::vector& styles); // TODO(cgilmour): refactor this so it returns a "finalized options" type. -ot::expected applyTracerOptionsFromEnvironment( +ot::expected applyTracerOptionsFromEnvironment( const TracerOptions& input); } // namespace opentracing diff --git a/test/integration/nginx/Dockerfile b/test/integration/nginx/Dockerfile index 27e6d2fe..6e73f598 100644 --- a/test/integration/nginx/Dockerfile +++ b/test/integration/nginx/Dockerfile @@ -61,7 +61,7 @@ RUN rm -rf dd-opentracing-cpp/.build RUN mkdir -p dd-opentracing-cpp/.build WORKDIR dd-opentracing-cpp/.build RUN cmake -DBUILD_PLUGIN=ON -DBUILD_STATIC=OFF -DBUILD_TESTING=OFF -DBUILD_SHARED=OFF -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -RUN make +RUN make "-j${MAKE_JOB_COUNT:-$(nproc)}" RUN make install WORKDIR .. diff --git a/test/integration/nginx/expected_tc1.json b/test/integration/nginx/expected_tc1.json index aeb28674..dc28d7df 100644 --- a/test/integration/nginx/expected_tc1.json +++ b/test/integration/nginx/expected_tc1.json @@ -12,9 +12,8 @@ "upstream.address": "" }, "metrics": { - "_dd.limit_psr": 1, - "_dd.rule_psr": 1, - "_sampling_priority_v1": 2 + "_dd.agent_psr": 1, + "_sampling_priority_v1": 1 }, "name": "nginx.handle", "resource": "/", @@ -35,9 +34,8 @@ "upstream.address": "" }, "metrics": { - "_dd.limit_psr": 1, - "_dd.rule_psr": 1, - "_sampling_priority_v1": 2 + "_dd.agent_psr": 1, + "_sampling_priority_v1": 1 }, "name": "nginx.handle", "resource": "/", @@ -58,9 +56,8 @@ "upstream.address": "" }, "metrics": { - "_dd.limit_psr": 1, - "_dd.rule_psr": 1, - "_sampling_priority_v1": 2 + "_dd.agent_psr": 1, + "_sampling_priority_v1": 1 }, "name": "nginx.handle", "resource": "/", diff --git a/test/integration/nginx/nginx_integration_test.sh b/test/integration/nginx/nginx_integration_test.sh index 4ba1d7d1..b98e025e 100755 --- a/test/integration/nginx/nginx_integration_test.sh +++ b/test/integration/nginx/nginx_integration_test.sh @@ -5,7 +5,7 @@ # * Java, Golang # Run this test from the Docker container or CircleCI. -# Disable tracer startup logs for test purposes +# Disable tracer startup logs (these tests consider any nginx output an error). export DD_TRACE_STARTUP_LOGS=false NGINX_CONF_PATH=$(nginx -V 2>&1 | grep "configure arguments" | sed -n 's/.*--conf-path=\([^ ]*\).*/\1/p') @@ -73,7 +73,7 @@ function run_nginx() { wait_for_port 80 } -# TEST 1: Ensure the right traces sent to the agent. +echo "Test 1: Ensure the right traces sent to the agent." # Start wiremock in background wiremock --port 8126 >/dev/null 2>&1 & # Wait for wiremock to start @@ -105,7 +105,7 @@ then fi reset_test -# TEST 2: Check that libcurl isn't writing to stdout +echo "Test 2: Check that libcurl isn't writing to stdout" run_nginx curl -s localhost?[1-10000] 1> /tmp/curl_log.txt @@ -118,7 +118,7 @@ then fi reset_test -# TEST 3: Check that creating a root span doesn't produce an error +echo "Test 3: Check that creating a root span doesn't produce an error" run_nginx curl -s localhost?[1-5] 1> /tmp/curl_log.txt @@ -137,7 +137,7 @@ then fi reset_test -# Test 4: Check that priority sampling works. +echo "Test 4: Check that priority sampling works." # Start the mock agent wiremock --port 8126 >/dev/null 2>&1 & wait_for_port 8126 curl -s -X POST --data '{ "priority":10, "request": { "method": "ANY", "urlPattern": ".*" }, "response": { "status": 200, "body": "{\"rate_by_service\":{\"service:nginx,env:prod\":0.5, \"service:nginx,env:\":0.2, \"service:wrong,env:\":0.1, \"service:nginx,env:wrong\":0.9}}" }}' http://localhost:8126/__admin/mappings/new @@ -166,7 +166,7 @@ curl -s localhost/proxy/?[1-1000] 1> /tmp/curl_log.txt # Check the traces the agent got. GOT=$(get_n_traces 1000) -RATE=$(echo $GOT | jq '[.[] | .[] | .metrics._sampling_priority_v1] | add/length') +RATE=$(echo "$GOT" | jq '[.[] | .[] | .metrics._sampling_priority_v1] | add/length') if [ $(echo $RATE | jq '(. > 0.45) and (. < 0.55)') != "true" ] then echo "Test 4 failed: Sample rate should be ~0.5 but was $RATE" @@ -189,7 +189,7 @@ then fi reset_test -# Test 5: Ensure that NGINX errors are reported to Datadog +echo "Test 5: Ensure that NGINX errors are reported to Datadog" wiremock --port 8126 >/dev/null 2>&1 & # Wait for wiremock to start wait_for_port 8126 @@ -203,7 +203,7 @@ run_nginx curl -s localhost/get_error/ 1> /tmp/curl_log.txt GOT=$(get_n_traces 1) -ERROR=$(echo $GOT | jq '.[] | .[] | .error') +ERROR=$(echo "$GOT" | jq '.[] | .[] | .error') if ! [ "$ERROR" = "1" ] then @@ -213,7 +213,7 @@ fi reset_test -# Test 6: Origin header is propagated and adds a tag +echo "Test 6: Origin header is propagated and adds a tag" wiremock --port 8126 >/dev/null 2>&1 & wait_for_port 8126 curl -s -X POST --data '{ "priority":10, "request": { "method": "ANY", "urlPattern": ".*" }, "response": { "status": 200, "body": "OK" }}' http://localhost:8126/__admin/mappings/new wiremock --port 8080 >/dev/null 2>&1 & wait_for_port 8080 diff --git a/test/limiter_test.cpp b/test/limiter_test.cpp index 2c7f8fb7..837e2404 100644 --- a/test/limiter_test.cpp +++ b/test/limiter_test.cpp @@ -63,7 +63,7 @@ TEST_CASE("limiter") { } SECTION("updates tokens at sub-second intervals") { - Limiter lim(get_time, 5, 5.0, 1); // replace tokens @ 5.0 per second (ie: every 0.2s) + Limiter lim(get_time, 5, 5.0, 1); // replace tokens @ 5.0 per second (i.e. every 0.2 seconds) // consume all the tokens first for (auto i = 0; i < 5; i++) { auto result = lim.allow(); @@ -87,4 +87,46 @@ TEST_CASE("limiter") { all_consumed = lim.allow(); REQUIRE(!all_consumed.allowed); } + + SECTION("updates tokens at multi-second intervals") { + Limiter lim(get_time, 1, 0.25, 1); // replace tokens @ 0.25 per second (i.e. every 4 seconds) + + // 0 seconds (0s) + auto result = lim.allow(); + REQUIRE(result.allowed); + + for (int i = 0; i < 3; ++i) { + // 1s, 2s, 3s... still haven't released a token + advanceTime(time, std::chrono::seconds(1)); + result = lim.allow(); + REQUIRE(!result.allowed); + } + + // 4s... one token was just released + advanceTime(time, std::chrono::seconds(1)); + result = lim.allow(); + REQUIRE(result.allowed); + + // still 4s... and we used that token already + result = lim.allow(); + REQUIRE(!result.allowed); + } + + SECTION("dedicated constructor configures based on desired allowed-per-second") { + const double per_second = 23.97; + Limiter lim(get_time, per_second); + for (int i = 0; i < 24; ++i) { + auto result = lim.allow(); + REQUIRE(result.allowed); + } + + auto result = lim.allow(); + REQUIRE(!result.allowed); + + advanceTime(time, std::chrono::milliseconds(int(1 / per_second * 1000) + 1)); + result = lim.allow(); + REQUIRE(result.allowed); + result = lim.allow(); + REQUIRE(!result.allowed); + } } diff --git a/test/sample_test.cpp b/test/sample_test.cpp index 675a0e6e..ae64283f 100644 --- a/test/sample_test.cpp +++ b/test/sample_test.cpp @@ -121,6 +121,9 @@ TEST_CASE("rules sampler") { SECTION("falls back to priority sampling when no matching rule") { TracerOptions tracer_options; tracer_options.service = "test.service"; + // In addition to `tracer_options.sampling_rules`, there would be an + // implicit rule added if `tracer_options.sample_rate` were not NaN. That + // case is handled in the next section (not this one). tracer_options.sampling_rules = R"([ {"name": "unmatched.name", "service": "unmatched.service", "sample_rate": 0.1} ])"; @@ -136,6 +139,25 @@ TEST_CASE("rules sampler") { REQUIRE(metrics.find("_dd.agent_psr") != metrics.end()); } + SECTION("falls back to catch-all rule if sample_rate is configured and no other rule matches") { + TracerOptions tracer_options; + tracer_options.service = "test.service"; + tracer_options.sample_rate = 0.5; + tracer_options.sampling_rules = R"([ + {"name": "unmatched.name", "service": "unmatched.service", "sample_rate": 0.1} +])"; + auto tracer = + std::make_shared(tracer_options, writer, sampler, std::make_shared()); + + auto span = tracer->StartSpanWithOptions("operation.name", span_options); + span->FinishWithOptions(finish_options); + + auto& metrics = mwriter->traces[0][0]->metrics; + REQUIRE(metrics.find("_dd.rule_psr") != metrics.end()); + REQUIRE(metrics["_dd.rule_psr"] == 0.5); + REQUIRE(metrics.find("_dd.agent_psr") == metrics.end()); + } + SECTION("rule matching applied to overridden name") { TracerOptions tracer_options; tracer_options.service = "test.service"; diff --git a/test/tracer_options_test.cpp b/test/tracer_options_test.cpp index 06db15eb..36528628 100644 --- a/test/tracer_options_test.cpp +++ b/test/tracer_options_test.cpp @@ -3,15 +3,31 @@ #include #include +#include using namespace datadog::opentracing; +using namespace std::string_literals; -void requireTracerOptionsResultsMatch(const ot::expected &lhs, - const ot::expected &rhs) { +namespace datadog { +namespace opentracing { + +std::ostream &operator<<(std::ostream &stream, + const ot::expected &result) { + if (result.has_value()) { + return stream << "TracerOptions{...}"; + } + return stream << result.error(); +} + +} // namespace opentracing +} // namespace datadog + +void requireTracerOptionsResultsMatch(const ot::expected &lhs, + const ot::expected &rhs) { // One is an error, the other not. REQUIRE((!!lhs) == (!!rhs)); // They're both errors. if (!lhs) { - REQUIRE(std::string(lhs.error()) == std::string(rhs.error())); + REQUIRE(lhs.error() == rhs.error()); return; } // Compare TracerOptions. @@ -39,13 +55,14 @@ void requireTracerOptionsResultsMatch(const ot::expectedanalytics_rate == rhs->analytics_rate); } REQUIRE(lhs->tags == rhs->tags); + REQUIRE(lhs->sampling_limit_per_second == rhs->sampling_limit_per_second); } TEST_CASE("tracer options from environment variables") { TracerOptions input{}; struct TestCase { std::map environment_variables; - ot::expected expected; + ot::expected expected; }; auto test_case = GENERATE(values({ @@ -61,45 +78,87 @@ TEST_CASE("tracer options from environment variables") { {"DD_TRACE_REPORT_HOSTNAME", "true"}, {"DD_TRACE_ANALYTICS_ENABLED", "true"}, {"DD_TRACE_ANALYTICS_SAMPLE_RATE", "0.5"}, - {"DD_TAGS", "host:my-host-name,region:us-east-1,datacenter:us,partition:5"}}, + {"DD_TAGS", "host:my-host-name,region:us-east-1,datacenter:us,partition:5"}, + {"DD_TRACE_RATE_LIMIT", "200"}, + {"DD_TRACE_SAMPLE_RATE", "0.7"}}, TracerOptions{ - "host", - 420, - "service", - "web", - "test-env", - std::nan(""), - true, - "rules", - 1000, - "", - {PropagationStyle::Datadog, PropagationStyle::B3}, - {PropagationStyle::Datadog, PropagationStyle::B3}, - true, - true, - 0.5, + "host", // agent_host + 420, // agent_port + "service", // service + "web", // type + "test-env", // environment + 0.7, // sample_rate + true, // priority_sampling + "rules", // sampling_rules + 1000, // write_period_ms + "", // operation_name_override + {PropagationStyle::Datadog, PropagationStyle::B3}, // extract + {PropagationStyle::Datadog, PropagationStyle::B3}, // inject + true, // report_hostname + true, // analytics_enabled + 0.5, // analytics_rate { {"host", "my-host-name"}, {"region", "us-east-1"}, {"datacenter", "us"}, {"partition", "5"}, - }, - "test-version v0.0.1", + }, // tags + "test-version v0.0.1", // version + "", // agent_url + [](LogLevel, ot::string_view) {}, // log_func (dummy) + 200 // sampling_limit_per_second }}, {{{"DD_PROPAGATION_STYLE_EXTRACT", "Not even a real style"}}, - ot::make_unexpected("Value for DD_PROPAGATION_STYLE_EXTRACT is invalid")}, + ot::make_unexpected("Value for DD_PROPAGATION_STYLE_EXTRACT is invalid"s)}, {{{"DD_PROPAGATION_STYLE_INJECT", "Not even a real style"}}, - ot::make_unexpected("Value for DD_PROPAGATION_STYLE_INJECT is invalid")}, + ot::make_unexpected("Value for DD_PROPAGATION_STYLE_INJECT is invalid"s)}, {{{"DD_TRACE_AGENT_PORT", "sixty nine"}}, - ot::make_unexpected("Value for DD_TRACE_AGENT_PORT is invalid")}, + ot::make_unexpected("Value for DD_TRACE_AGENT_PORT is invalid"s)}, {{{"DD_TRACE_AGENT_PORT", "9223372036854775807"}}, - ot::make_unexpected("Value for DD_TRACE_AGENT_PORT is out of range")}, + ot::make_unexpected("Value for DD_TRACE_AGENT_PORT is out of range"s)}, {{{"DD_TRACE_REPORT_HOSTNAME", "yes please"}}, - ot::make_unexpected("Value for DD_TRACE_REPORT_HOSTNAME is invalid")}, + ot::make_unexpected("Value for DD_TRACE_REPORT_HOSTNAME is invalid"s)}, {{{"DD_TRACE_ANALYTICS_ENABLED", "yes please"}}, - ot::make_unexpected("Value for DD_TRACE_ANALYTICS_ENABLED is invalid")}, + ot::make_unexpected("Value for DD_TRACE_ANALYTICS_ENABLED is invalid"s)}, {{{"DD_TRACE_ANALYTICS_SAMPLE_RATE", "1.1"}}, - ot::make_unexpected("Value for DD_TRACE_ANALYTICS_SAMPLE_RATE is invalid")}, + ot::make_unexpected( + "while parsing DD_TRACE_ANALYTICS_SAMPLE_RATE: not within the expected bounds [0, 1]: 1.1"s)}, + // Each of DD_TRACE_SAMPLE_RATE and DD_TRACE_RATE_LIMIT can fail to parse + // for any of the following reasons: + // - not a floating point number + // - can't fit in a double + // - outside of the expected range (e.g. [0, 1] for DD_TRACE_SAMPLE_RATE) + // - has non-whitespace trailing characters + {{{"DD_TRACE_SAMPLE_RATE", "total nonsense"}}, + ot::make_unexpected( + "while parsing DD_TRACE_SAMPLE_RATE: does not look like a double: total nonsense"s)}, + {{{"DD_TRACE_SAMPLE_RATE", "3.14e99999"}}, + ot::make_unexpected( + "while parsing DD_TRACE_SAMPLE_RATE: not within the range of a double: 3.14e99999"s)}, + {{{"DD_TRACE_SAMPLE_RATE", "1.6"}}, + ot::make_unexpected( + "while parsing DD_TRACE_SAMPLE_RATE: not within the expected bounds [0, 1]: 1.6"s)}, + {{{"DD_TRACE_SAMPLE_RATE", "NaN"}}, + ot::make_unexpected( + "while parsing DD_TRACE_SAMPLE_RATE: not within the expected bounds [0, 1]: nan"s)}, + {{{"DD_TRACE_SAMPLE_RATE", "0.5 BOOM"}}, + ot::make_unexpected( + "while parsing DD_TRACE_SAMPLE_RATE: contains trailing non-floating-point characters: 0.5 BOOM"s)}, + {{{"DD_TRACE_RATE_LIMIT", "total nonsense"}}, + ot::make_unexpected( + "while parsing DD_TRACE_RATE_LIMIT: does not look like a double: total nonsense"s)}, + {{{"DD_TRACE_RATE_LIMIT", "3.14e99999"}}, + ot::make_unexpected( + "while parsing DD_TRACE_RATE_LIMIT: not within the range of a double: 3.14e99999"s)}, + {{{"DD_TRACE_RATE_LIMIT", "-8"}}, + ot::make_unexpected( + "while parsing DD_TRACE_RATE_LIMIT: not within the expected bounds [0, inf]: -8"s)}, + {{{"DD_TRACE_RATE_LIMIT", "NaN"}}, + ot::make_unexpected( + "while parsing DD_TRACE_RATE_LIMIT: not within the expected bounds [0, inf]: nan"s)}, + {{{"DD_TRACE_RATE_LIMIT", "0.5 BOOM"}}, + ot::make_unexpected( + "while parsing DD_TRACE_RATE_LIMIT: contains trailing non-floating-point characters: 0.5 BOOM"s)}, })); // Setup diff --git a/test/tracer_test.cpp b/test/tracer_test.cpp index 483aad1e..08669f6e 100644 --- a/test/tracer_test.cpp +++ b/test/tracer_test.cpp @@ -199,7 +199,7 @@ TEST_CASE("env overrides") { ::setenv(env_test.env.c_str(), env_test.val.c_str(), 0); auto maybe_options = applyTracerOptionsFromEnvironment(tracer_options); if (env_test.error) { - REQUIRE(maybe_options.error()); + REQUIRE(!maybe_options); return; } REQUIRE(maybe_options);