diff --git a/include/envoy/stats/stats.h b/include/envoy/stats/stats.h index 3e7997052e6d..4cf6b12cd175 100644 --- a/include/envoy/stats/stats.h +++ b/include/envoy/stats/stats.h @@ -10,6 +10,8 @@ #include "envoy/common/interval_set.h" #include "envoy/common/pure.h" +#include "absl/strings/string_view.h" + namespace Envoy { namespace Event { class Dispatcher; @@ -58,6 +60,20 @@ class TagExtractor { */ virtual bool extractTag(const std::string& stat_name, std::vector& tags, IntervalSet& remove_characters) const PURE; + + /** + * Finds a prefix string associated with the matching criteria owned by the + * extractor. This is used to reduce the number of extractors required for + * processing each stat, by pulling the first "."-separated token on the tag. + * + * If a prefix cannot be extracted, an empty string_view is returned, and the + * matcher must be applied on all inputs. + * + * The storage for the prefix is owned by the TagExtractor. + * + * @return absl::string_view the prefix, or an empty string_view if none was found. + */ + virtual absl::string_view prefixToken() const PURE; }; typedef std::unique_ptr TagExtractorPtr; diff --git a/source/common/common/BUILD b/source/common/common/BUILD index 7fc5586f92fd..d67b215548ec 100644 --- a/source/common/common/BUILD +++ b/source/common/common/BUILD @@ -132,9 +132,9 @@ envoy_cc_library( hdrs = ["utility.h"], deps = [ ":assert_lib", + ":hash_lib", "//include/envoy/common:interval_set_interface", "//include/envoy/common:time_interface", - "//source/common/common:hash_lib", ], ) diff --git a/source/common/common/utility.h b/source/common/common/utility.h index 0d4221bb1baf..a423bf183098 100644 --- a/source/common/common/utility.h +++ b/source/common/common/utility.h @@ -15,6 +15,7 @@ #include "envoy/common/time.h" #include "common/common/assert.h" +#include "common/common/hash.h" #include "absl/strings/string_view.h" @@ -424,4 +425,11 @@ template class IntervalSetImpl : public IntervalSet { std::set intervals_; // Intervals do not overlap or abut. }; +/** + * Hashing functor for use with unordered_map and unordered_set with string_view as a key. + */ +struct StringViewHash { + std::size_t operator()(const absl::string_view& k) const { return HashUtil::xxHash64(k); } +}; + } // namespace Envoy diff --git a/source/common/config/well_known_names.cc b/source/common/config/well_known_names.cc index efde72e21f23..fc6ee0f41edb 100644 --- a/source/common/config/well_known_names.cc +++ b/source/common/config/well_known_names.cc @@ -3,9 +3,7 @@ namespace Envoy { namespace Config { -std::vector> TagNameValues::getRegexMapping() { - std::vector> name_regex_pairs; - +TagNameValues::TagNameValues() { // Note: the default regexes are defined below in the order that they will typically be matched // (see the TagExtractor class definition for an explanation of the iterative matching process). // This ordering is roughly from most specific to least specific. Despite the fact that these @@ -26,88 +24,84 @@ std::vector> TagNameValues::getRegexMapping( // - Typical * notation will be used to denote an arbitrary set of characters. // *_rq(_) - name_regex_pairs.push_back({RESPONSE_CODE, "_rq(_(\\d{3}))$"}); + addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$"); // *_rq_()xx - name_regex_pairs.push_back({RESPONSE_CODE_CLASS, "_rq_(\\d)xx$"}); + addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$"); // http.[.]dynamodb.table.[.]capacity.[.](__partition_id=) - name_regex_pairs.push_back({DYNAMO_PARTITION_ID, "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\." - "capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))" - "$"}); + addRegex(DYNAMO_PARTITION_ID, "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\." + "capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))" + "$"); // http.[.]dynamodb.operation.(.) or // http.[.]dynamodb.table.[.]capacity.(.)[] - name_regex_pairs.push_back({DYNAMO_OPERATION, "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?=" - "\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)"}); + addRegex(DYNAMO_OPERATION, "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?=" + "\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)"); // mongo.[.]collection.[.]callsite.(.)query. - name_regex_pairs.push_back( - {MONGO_CALLSITE, - "^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$"}); + addRegex(MONGO_CALLSITE, + "^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$"); // http.[.]dynamodb.table.(.) or // http.[.]dynamodb.error.(.)* - name_regex_pairs.push_back( - {DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)"}); + addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)"); // mongo.[.]collection.(.)query. - name_regex_pairs.push_back( - {MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$"}); + addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$"); // mongo.[.]cmd.(.) - name_regex_pairs.push_back({MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$"}); + addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$"); // cluster.[.]grpc.[.](.) - name_regex_pairs.push_back( - {GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$"}); + addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$"); // http.[.]user_agent.(.) - name_regex_pairs.push_back({HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$"}); + addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$"); // vhost.[.]vcluster.(.) - name_regex_pairs.push_back({VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$"}); + addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$"); // http.[.]fault.(.) - name_regex_pairs.push_back( - {FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$"}); + addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$"); // listener.[
.]ssl.cipher.() - name_regex_pairs.push_back({SSL_CIPHER, "^listener(?=\\.).*?\\.ssl\\.cipher(\\.(.*?))$"}); + addRegex(SSL_CIPHER, "^listener(?=\\.).*?\\.ssl\\.cipher(\\.(.*?))$"); // cluster.[.]ssl.ciphers.() - name_regex_pairs.push_back({SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$"}); + addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$"); // cluster.[.]grpc.(.)* - name_regex_pairs.push_back({GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)"}); + addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)"); // tcp.(.) - name_regex_pairs.push_back({TCP_PREFIX, "^tcp\\.((.*?)\\.)\\w+?$"}); + addRegex(TCP_PREFIX, "^tcp\\.((.*?)\\.)\\w+?$"); // auth.clientssl.(.) - name_regex_pairs.push_back({CLIENTSSL_PREFIX, "^auth\\.clientssl\\.((.*?)\\.)\\w+?$"}); + addRegex(CLIENTSSL_PREFIX, "^auth\\.clientssl\\.((.*?)\\.)\\w+?$"); // ratelimit.(.) - name_regex_pairs.push_back({RATELIMIT_PREFIX, "^ratelimit\\.((.*?)\\.)\\w+?$"}); + addRegex(RATELIMIT_PREFIX, "^ratelimit\\.((.*?)\\.)\\w+?$"); // cluster.(.)* - name_regex_pairs.push_back({CLUSTER_NAME, "^cluster\\.((.*?)\\.)"}); + addRegex(CLUSTER_NAME, "^cluster\\.((.*?)\\.)"); // http.(.)* or listener.[
.]http.(.)* - name_regex_pairs.push_back( - {HTTP_CONN_MANAGER_PREFIX, "^(?:|listener(?=\\.).*?\\.)http\\.((.*?)\\.)"}); + addRegex(HTTP_CONN_MANAGER_PREFIX, "^(?:|listener(?=\\.).*?\\.)http\\.((.*?)\\.)"); // listener.(
.)* - name_regex_pairs.push_back( - {LISTENER_ADDRESS, "^listener\\.(((?:[_.[:digit:]]*|[_\\[\\]aAbBcCdDeEfF[:digit:]]*))\\.)"}); + addRegex(LISTENER_ADDRESS, + "^listener\\.(((?:[_.[:digit:]]*|[_\\[\\]aAbBcCdDeEfF[:digit:]]*))\\.)"); // vhost.(.)* - name_regex_pairs.push_back({VIRTUAL_HOST, "^vhost\\.((.*?)\\.)"}); + addRegex(VIRTUAL_HOST, "^vhost\\.((.*?)\\.)"); // mongo.(.)* - name_regex_pairs.push_back({MONGO_PREFIX, "^mongo\\.((.*?)\\.)"}); + addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)"); +} - return name_regex_pairs; +void TagNameValues::addRegex(const std::string& name, const std::string& regex) { + descriptor_vec_.emplace_back(Descriptor(name, regex)); } } // namespace Config diff --git a/source/common/config/well_known_names.h b/source/common/config/well_known_names.h index c5a5ea3b4174..43757ad4666b 100644 --- a/source/common/config/well_known_names.h +++ b/source/common/config/well_known_names.h @@ -219,6 +219,20 @@ typedef ConstSingleton MetadataEnvoyLbKeys; */ class TagNameValues { public: + TagNameValues(); + + /** + * Represents a tag extraction. This structure may be extended to + * allow for an faster pattern-matching engine to be used as an + * alternative to regexes, on an individual tag basis. Some of the + * tags, such as "_rq_(\\d)xx$", will probably stay as regexes. + */ + struct Descriptor { + Descriptor(const std::string& name, const std::string& regex) : name_(name), regex_(regex) {} + const std::string name_; + const std::string regex_; + }; + // Cluster name tag const std::string CLUSTER_NAME = "envoy.cluster_name"; // Listener port tag @@ -269,12 +283,14 @@ class TagNameValues { // Mapping from the names above to their respective regex strings. const std::vector> name_regex_pairs_; - // Constructor to fill map. - TagNameValues() : name_regex_pairs_(getRegexMapping()) {} + // Returns the list of descriptors. + const std::vector& descriptorVec() const { return descriptor_vec_; } private: - // Creates a regex mapping for all tag names. - std::vector> getRegexMapping(); + void addRegex(const std::string& name, const std::string& regex); + + // Collection of tag descriptors. + std::vector descriptor_vec_; }; typedef ConstSingleton TagNames; diff --git a/source/common/stats/stats_impl.cc b/source/common/stats/stats_impl.cc index 2cfb6b8efffb..cb7055a5631c 100644 --- a/source/common/stats/stats_impl.cc +++ b/source/common/stats/stats_impl.cc @@ -11,6 +11,9 @@ #include "common/common/utility.h" #include "common/config/well_known_names.h" +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" + namespace Envoy { namespace Stats { @@ -62,7 +65,27 @@ std::string Utility::sanitizeStatsName(const std::string& name) { } TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex) - : name_(name), regex_(RegexUtil::parseRegex(regex)) {} + : name_(name), prefix_(std::string(extractRegexPrefix(regex))), + regex_(RegexUtil::parseRegex(regex)) {} + +std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) { + std::string prefix; + if (absl::StartsWith(regex, "^")) { + for (absl::string_view::size_type i = 1; i < regex.size(); ++i) { + if (!absl::ascii_isalnum(regex[i]) && (regex[i] != '_')) { + if (i > 1) { + const bool last_char = i == regex.size() - 1; + if ((!last_char && (regex[i] == '\\') && (regex[i + 1] == '.')) || + (last_char && (regex[i] == '$'))) { + prefix.append(regex.data() + 1, i - 1); + } + } + break; + } + } + } + return prefix; +} TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name, const std::string& regex) { @@ -71,22 +94,11 @@ TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name, throw EnvoyException("tag_name cannot be empty"); } - if (!regex.empty()) { - return TagExtractorPtr{new TagExtractorImpl(name, regex)}; - } else { - // Look up the default for that name. - const auto& name_regex_pairs = Config::TagNames::get().name_regex_pairs_; - auto it = std::find_if(name_regex_pairs.begin(), name_regex_pairs.end(), - [&name](const std::pair& name_regex_pair) { - return name == name_regex_pair.first; - }); - if (it != name_regex_pairs.end()) { - return TagExtractorPtr{new TagExtractorImpl(name, it->second)}; - } else { - throw EnvoyException(fmt::format( - "No regex specified for tag specifier and no default regex for name: '{}'", name)); - } + if (regex.empty()) { + throw EnvoyException(fmt::format( + "No regex specified for tag specifier and no default regex for name: '{}'", name)); } + return TagExtractorPtr{new TagExtractorImpl(name, regex)}; } bool TagExtractorImpl::extractTag(const std::string& stat_name, std::vector& tags, @@ -128,62 +140,103 @@ RawStatData* HeapRawStatDataAllocator::alloc(const std::string& name) { TagProducerImpl::TagProducerImpl(const envoy::config::metrics::v2::StatsConfig& config) : TagProducerImpl() { // To check name conflict. - std::unordered_set names; reserveResources(config); - addDefaultExtractors(config, names); + std::unordered_set names = addDefaultExtractors(config); for (const auto& tag_specifier : config.stats_tags()) { - if (!names.emplace(tag_specifier.tag_name()).second) { - throw EnvoyException(fmt::format("Tag name '{}' specified twice.", tag_specifier.tag_name())); + const std::string& name = tag_specifier.tag_name(); + if (!names.emplace(name).second) { + throw EnvoyException(fmt::format("Tag name '{}' specified twice.", name)); } // If no tag value is found, fallback to default regex to keep backward compatibility. if (tag_specifier.tag_value_case() == envoy::config::metrics::v2::TagSpecifier::TAG_VALUE_NOT_SET || tag_specifier.tag_value_case() == envoy::config::metrics::v2::TagSpecifier::kRegex) { - tag_extractors_.emplace_back(Stats::TagExtractorImpl::createTagExtractor( - tag_specifier.tag_name(), tag_specifier.regex())); + if (tag_specifier.regex().empty()) { + if (addExtractorsMatching(name) == 0) { + throw EnvoyException(fmt::format( + "No regex specified for tag specifier and no default regex for name: '{}'", name)); + } + } else { + addExtractor(Stats::TagExtractorImpl::createTagExtractor(name, tag_specifier.regex())); + } } else if (tag_specifier.tag_value_case() == envoy::config::metrics::v2::TagSpecifier::kFixedValue) { - default_tags_.emplace_back( - Stats::Tag{.name_ = tag_specifier.tag_name(), .value_ = tag_specifier.fixed_value()}); + default_tags_.emplace_back(Stats::Tag{.name_ = name, .value_ = tag_specifier.fixed_value()}); } } } -std::string TagProducerImpl::produceTags(const std::string& stat_name, - std::vector& tags) const { - tags.insert(tags.end(), default_tags_.begin(), default_tags_.end()); +int TagProducerImpl::addExtractorsMatching(absl::string_view name) { + int num_found = 0; + for (const auto& desc : Config::TagNames::get().descriptorVec()) { + if (desc.name_ == name) { + addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_)); + ++num_found; + } + } + // TODO(jmarantz): Changing the default tag regexes so that more than one regex can + // yield the same tag, on the theory that this will reduce regex backtracking. At the + // moment, this doesn't happen, so this flow isn't well tested. When we start exploiting + // this, and it's tested, we can simply remove this assert. + ASSERT(num_found <= 1); + return num_found; +} + +void TagProducerImpl::addExtractor(TagExtractorPtr extractor) { + const absl::string_view prefix = extractor->prefixToken(); + if (prefix.empty()) { + tag_extractors_without_prefix_.emplace_back(std::move(extractor)); + } else { + tag_extractor_prefix_map_[prefix].emplace_back(std::move(extractor)); + } +} +void TagProducerImpl::forEachExtractorMatching( + const std::string& stat_name, std::function f) const { IntervalSetImpl remove_characters; - for (const TagExtractorPtr& tag_extractor : tag_extractors_) { - tag_extractor->extractTag(stat_name, tags, remove_characters); + for (const TagExtractorPtr& tag_extractor : tag_extractors_without_prefix_) { + f(tag_extractor); + } + const std::string::size_type dot = stat_name.find('.'); + if (dot != std::string::npos) { + const absl::string_view token = absl::string_view(stat_name.data(), dot); + const auto iter = tag_extractor_prefix_map_.find(token); + if (iter != tag_extractor_prefix_map_.end()) { + for (const TagExtractorPtr& tag_extractor : iter->second) { + f(tag_extractor); + } + } } - return StringUtil::removeCharacters(stat_name, remove_characters); } -// Roughly estimate the size of the vectors. +std::string TagProducerImpl::produceTags(const std::string& metric_name, + std::vector& tags) const { + tags.insert(tags.end(), default_tags_.begin(), default_tags_.end()); + IntervalSetImpl remove_characters; + forEachExtractorMatching( + metric_name, [&remove_characters, &tags, &metric_name](const TagExtractorPtr& tag_extractor) { + tag_extractor->extractTag(metric_name, tags, remove_characters); + }); + return StringUtil::removeCharacters(metric_name, remove_characters); +} + void TagProducerImpl::reserveResources(const envoy::config::metrics::v2::StatsConfig& config) { default_tags_.reserve(config.stats_tags().size()); - - if (!config.has_use_all_default_tags() || config.use_all_default_tags().value()) { - tag_extractors_.reserve(Config::TagNames::get().name_regex_pairs_.size() + - config.stats_tags().size()); - } else { - tag_extractors_.reserve(config.stats_tags().size()); - } } -void TagProducerImpl::addDefaultExtractors(const envoy::config::metrics::v2::StatsConfig& config, - std::unordered_set& names) { +std::unordered_set +TagProducerImpl::addDefaultExtractors(const envoy::config::metrics::v2::StatsConfig& config) { + std::unordered_set names; if (!config.has_use_all_default_tags() || config.use_all_default_tags().value()) { - for (const auto& extractor : Config::TagNames::get().name_regex_pairs_) { - names.emplace(extractor.first); - tag_extractors_.emplace_back( - Stats::TagExtractorImpl::createTagExtractor(extractor.first, extractor.second)); + for (const auto& desc : Config::TagNames::get().descriptorVec()) { + names.emplace(desc.name_); + addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_)); } } + return names; } void HeapRawStatDataAllocator::free(RawStatData& data) { diff --git a/source/common/stats/stats_impl.h b/source/common/stats/stats_impl.h index 5fbbe1f4a699..3b825473d9d8 100644 --- a/source/common/stats/stats_impl.h +++ b/source/common/stats/stats_impl.h @@ -29,10 +29,9 @@ namespace Stats { class TagExtractorImpl : public TagExtractor { public: /** - * Creates a tag extractor from the regex provided or looks up a default regex. - * @param name name for tag extractor. Used to look up a default tag extractor if regex is empty. - * @param regex optional regex expression. Can be specified as an empty string to trigger a - * default regex lookup. + * Creates a tag extractor from the regex provided. name and regex must be non-empty. + * @param name name for tag extractor. + * @param regex regex expression. * @return TagExtractorPtr newly constructed TagExtractor. */ static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex); @@ -41,12 +40,26 @@ class TagExtractorImpl : public TagExtractor { std::string name() const override { return name_; } bool extractTag(const std::string& tag_extracted_name, std::vector& tags, IntervalSet& remove_characters) const override; + absl::string_view prefixToken() const override { return prefix_; } private: + /** + * Examines a regex string, looking for the pattern: ^alphanumerics_with_underscores\. + * Returns "alphanumerics_with_underscores" if that pattern is found, empty-string otherwise. + * @param regex absl::string_view the regex to scan for prefixes. + * @return std::string the prefix, or "" if no prefix found. + */ + static std::string extractRegexPrefix(absl::string_view regex); + const std::string name_; + const std::string prefix_; const std::regex regex_; }; +/** + * Organizes a collection of TagExtractors so that stat-names can be processed without + * iterating through all extractors. + */ class TagProducerImpl : public TagProducer { public: TagProducerImpl(const envoy::config::metrics::v2::StatsConfig& config); @@ -61,11 +74,65 @@ class TagProducerImpl : public TagProducer { std::string produceTags(const std::string& metric_name, std::vector& tags) const override; private: + friend class DefaultTagRegexTester; + + /** + * Adds a TagExtractor to the collection of tags, tracking prefixes to help make + * produceTags run efficiently by trying only extractors that have a chance to match. + * @param extractor TagExtractorPtr the extractor to add. + */ + void addExtractor(TagExtractorPtr extractor); + + /** + * Adds all default extractors matching the specified tag name. In this model, + * more than one TagExtractor can be used to generate a given tag. The default + * extractors are specified in common/config/well_known_names.cc. + * @param name absl::string_view the extractor to add. + * @return int the number of matching extractors. + */ + int addExtractorsMatching(absl::string_view name); + + /** + * Roughly estimate the size of the vectors. + * @param config const envoy::config::metrics::v2::StatsConfig& the config. + */ void reserveResources(const envoy::config::metrics::v2::StatsConfig& config); - void addDefaultExtractors(const envoy::config::metrics::v2::StatsConfig& config, - std::unordered_set& names); - std::vector tag_extractors_; + /** + * Adds all default extractors from well_known_names.cc into the + * collection. Returns a set of names of all default extractors + * into a string-set for dup-detection against new stat names + * specified in the configuration. + * @param config const envoy::config::metrics::v2::StatsConfig& the config. + * @return names std::unordered_set the set of names to populate + */ + std::unordered_set + addDefaultExtractors(const envoy::config::metrics::v2::StatsConfig& config); + + /** + * Iterates over every tag extractor that might possibly match stat_name, calling + * callback f for each one. This is broken out this way to reduce code redundancy + * during testing, where we want to verify that extraction is order-independent. + * The possibly-matching-extractors list is computed by: + * 1. Finding the first '.' separated token in stat_name. + * 2. Collecting the TagExtractors whose regexes have that same prefix "^prefix\\." + * 3. Collecting also the TagExtractors whose regexes don't start with any prefix. + * In the future, we may also do substring searches in some cases. + * See DefaultTagRegexTester::produceTagsReverse in test/common/stats/stats_impl_test.cc. + * + * @param stat_name const std::string& the stat name. + * @param f std::function function to call for each extractor. + */ + void forEachExtractorMatching(const std::string& stat_name, + std::function f) const; + + std::vector tag_extractors_without_prefix_; + + // Maps a prefix word extracted out of a regex to a vector of TagExtractors. Note that + // the storage for the prefix string is owned by the TagExtractor, which, depending on + // implementation, may need make a copy of the prefix. + std::unordered_map, StringViewHash> + tag_extractor_prefix_map_; std::vector default_tags_; }; diff --git a/test/common/stats/stats_impl_test.cc b/test/common/stats/stats_impl_test.cc index 94d317247bb6..0f784dad7e16 100644 --- a/test/common/stats/stats_impl_test.cc +++ b/test/common/stats/stats_impl_test.cc @@ -1,5 +1,6 @@ #include #include +#include #include "envoy/config/metrics/v2/stats.pb.h" #include "envoy/stats/stats_macros.h" @@ -122,23 +123,14 @@ TEST(TagExtractorTest, BadRegex) { class DefaultTagRegexTester { public: - DefaultTagRegexTester() { - const auto& tag_names = Config::TagNames::get(); + DefaultTagRegexTester() : tag_extractors_(envoy::config::metrics::v2::StatsConfig()) {} - for (const std::pair& name_and_regex : tag_names.name_regex_pairs_) { - tag_extractors_.emplace_back(TagExtractorImpl::createTagExtractor(name_and_regex.first, "")); - } - } void testRegex(const std::string& stat_name, const std::string& expected_tag_extracted_name, const std::vector& expected_tags) { // Test forward iteration through the regexes std::vector tags; - IntervalSetImpl remove_characters; - for (const TagExtractorPtr& tag_extractor : tag_extractors_) { - tag_extractor->extractTag(stat_name, tags, remove_characters); - } - std::string tag_extracted_name = StringUtil::removeCharacters(stat_name, remove_characters); + const std::string tag_extracted_name = tag_extractors_.produceTags(stat_name, tags); auto cmp = [](const Tag& lhs, const Tag& rhs) { return lhs.name_ == rhs.name_ && lhs.value_ == rhs.value_; @@ -152,11 +144,7 @@ class DefaultTagRegexTester { // Reverse iteration through regexes to ensure ordering invariance std::vector rev_tags; - IntervalSetImpl rev_remove_characters; - for (auto it = tag_extractors_.rbegin(); it != tag_extractors_.rend(); ++it) { - (*it)->extractTag(stat_name, rev_tags, rev_remove_characters); - } - std::string rev_tag_extracted_name = StringUtil::removeCharacters(stat_name, remove_characters); + const std::string rev_tag_extracted_name = produceTagsReverse(stat_name, rev_tags); EXPECT_EQ(expected_tag_extracted_name, rev_tag_extracted_name); ASSERT_EQ(expected_tags.size(), rev_tags.size()) @@ -170,7 +158,33 @@ class DefaultTagRegexTester { stat_name); } - std::vector tag_extractors_; + /** + * Reimplements TagProducerImpl::produceTags, but extracts the tags in reverse order. + * This helps demonstrate that the order of extractors does not matter to the end result, + * assuming we don't care about tag-order. This is in large part correct by design because + * stat_name is not mutated until all the extraction is done. + * @param metric_name std::string a name of Stats::Metric (Counter, Gauge, Histogram). + * @param tags std::vector& a set of Stats::Tag. + * @return std::string the metric_name with tags removed. + */ + std::string produceTagsReverse(const std::string& metric_name, std::vector& tags) const { + // Note: one discrepency between this and TagProducerImpl::produceTags is that this + // version does not add in tag_extractors_.default_tags_ into tags. That doesn't matter + // for this test, however. + std::list extractors; // Note push-front is used to reverse order. + tag_extractors_.forEachExtractorMatching(metric_name, + [&extractors](const TagExtractorPtr& tag_extractor) { + extractors.push_front(tag_extractor.get()); + }); + + IntervalSetImpl remove_characters; + for (const TagExtractor* tag_extractor : extractors) { + tag_extractor->extractTag(metric_name, tags, remove_characters); + } + return StringUtil::removeCharacters(metric_name, remove_characters); + } + + TagProducerImpl tag_extractors_; }; TEST(TagExtractorTest, DefaultTagExtractors) { @@ -373,6 +387,27 @@ TEST(TagExtractorTest, DefaultTagExtractors) { {fault_connection_manager, fault_downstream_cluster}); } +TEST(TagExtractorTest, ExtractRegexPrefix) { + TagExtractorPtr tag_extractor; // Keep tag_extractor in this scope to prolong prefix lifetime. + auto extractRegexPrefix = [&tag_extractor](const std::string& regex) -> absl::string_view { + tag_extractor = TagExtractorImpl::createTagExtractor("foo", regex); + return tag_extractor->prefixToken(); + }; + + EXPECT_EQ("", extractRegexPrefix("^prefix(foo).")); + EXPECT_EQ("prefix", extractRegexPrefix("^prefix\\.foo")); + EXPECT_EQ("", extractRegexPrefix("^notACompleteToken")); // + EXPECT_EQ("onlyToken", extractRegexPrefix("^onlyToken$")); // + EXPECT_EQ("", extractRegexPrefix("(prefix)")); + EXPECT_EQ("", extractRegexPrefix("^(prefix)")); + EXPECT_EQ("", extractRegexPrefix("prefix(foo)")); +} + +TEST(TagExtractorTest, CreateTagExtractorNoRegex) { + EXPECT_THROW_WITH_REGEX(TagExtractorImpl::createTagExtractor("no such default tag", ""), + EnvoyException, "^No regex specified for tag specifier and no default"); +} + TEST(TagProducerTest, CheckConstructor) { envoy::config::metrics::v2::StatsConfig stats_config;