Skip to content

Commit

Permalink
stats: Reorganize tags into a map<prefix, std::vector<tag>> to avoid …
Browse files Browse the repository at this point in the history
…initiating multiple regexes that don't match. (#2582)

Description:
Improves startup speed from about 18 seconds to 7 seconds on the 10k example from #2063.

This is step 5 in the the plan to improve startup performance, and the first one (in time-order) to actually improve speed. This also removes a previously existing restriction that a tag can only be computed from one regex; now a tag could be computed by multiple different regexes, although no such change has occurred in this PR.

Note that the code which collects the regexes has been refactored to be more functional, and thus easier to change in the an upcoming PR, but the regexes have not been changed at all.

Risk Level: Medium -- this is startup critical but I think it's functionally bounded and well tested.

Testing:
//test/...
manually hacked main_common to exit(0) before loop and timed server startup with 10k clusters.

Release Notes:
None yet, will add release notes when the chain of PRs is done.
  • Loading branch information
jmarantz authored and alyssawilk committed Feb 15, 2018
1 parent d0aea18 commit 9c728a0
Show file tree
Hide file tree
Showing 8 changed files with 300 additions and 111 deletions.
16 changes: 16 additions & 0 deletions include/envoy/stats/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "envoy/common/interval_set.h"
#include "envoy/common/pure.h"

#include "absl/strings/string_view.h"

namespace Envoy {
namespace Event {
class Dispatcher;
Expand Down Expand Up @@ -58,6 +60,20 @@ class TagExtractor {
*/
virtual bool extractTag(const std::string& stat_name, std::vector<Tag>& tags,
IntervalSet<size_t>& remove_characters) const PURE;

/**
* Finds a prefix string associated with the matching criteria owned by the
* extractor. This is used to reduce the number of extractors required for
* processing each stat, by pulling the first "."-separated token on the tag.
*
* If a prefix cannot be extracted, an empty string_view is returned, and the
* matcher must be applied on all inputs.
*
* The storage for the prefix is owned by the TagExtractor.
*
* @return absl::string_view the prefix, or an empty string_view if none was found.
*/
virtual absl::string_view prefixToken() const PURE;
};

typedef std::unique_ptr<const TagExtractor> TagExtractorPtr;
Expand Down
2 changes: 1 addition & 1 deletion source/common/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ envoy_cc_library(
hdrs = ["utility.h"],
deps = [
":assert_lib",
":hash_lib",
"//include/envoy/common:interval_set_interface",
"//include/envoy/common:time_interface",
"//source/common/common:hash_lib",
],
)

Expand Down
8 changes: 8 additions & 0 deletions source/common/common/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "envoy/common/time.h"

#include "common/common/assert.h"
#include "common/common/hash.h"

#include "absl/strings/string_view.h"

Expand Down Expand Up @@ -424,4 +425,11 @@ template <typename Value> class IntervalSetImpl : public IntervalSet<Value> {
std::set<Interval, Compare> intervals_; // Intervals do not overlap or abut.
};

/**
* Hashing functor for use with unordered_map and unordered_set with string_view as a key.
*/
struct StringViewHash {
std::size_t operator()(const absl::string_view& k) const { return HashUtil::xxHash64(k); }
};

} // namespace Envoy
70 changes: 32 additions & 38 deletions source/common/config/well_known_names.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
namespace Envoy {
namespace Config {

std::vector<std::pair<std::string, std::string>> TagNameValues::getRegexMapping() {
std::vector<std::pair<std::string, std::string>> name_regex_pairs;

TagNameValues::TagNameValues() {
// Note: the default regexes are defined below in the order that they will typically be matched
// (see the TagExtractor class definition for an explanation of the iterative matching process).
// This ordering is roughly from most specific to least specific. Despite the fact that these
Expand All @@ -26,88 +24,84 @@ std::vector<std::pair<std::string, std::string>> TagNameValues::getRegexMapping(
// - Typical * notation will be used to denote an arbitrary set of characters.

// *_rq(_<response_code>)
name_regex_pairs.push_back({RESPONSE_CODE, "_rq(_(\\d{3}))$"});
addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$");

// *_rq_(<response_code_class>)xx
name_regex_pairs.push_back({RESPONSE_CODE_CLASS, "_rq_(\\d)xx$"});
addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$");

// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.[<operation_name>.](__partition_id=<last_seven_characters_from_partition_id>)
name_regex_pairs.push_back({DYNAMO_PARTITION_ID, "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\."
"capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))"
"$"});
addRegex(DYNAMO_PARTITION_ID, "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\."
"capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))"
"$");

// http.[<stat_prefix>.]dynamodb.operation.(<operation_name>.)<base_stat> or
// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.(<operation_name>.)[<partition_id>]
name_regex_pairs.push_back({DYNAMO_OPERATION, "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?="
"\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)"});
addRegex(DYNAMO_OPERATION, "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?="
"\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)");

// mongo.[<stat_prefix>.]collection.[<collection>.]callsite.(<callsite>.)query.<base_stat>
name_regex_pairs.push_back(
{MONGO_CALLSITE,
"^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$"});
addRegex(MONGO_CALLSITE,
"^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$");

// http.[<stat_prefix>.]dynamodb.table.(<table_name>.) or
// http.[<stat_prefix>.]dynamodb.error.(<table_name>.)*
name_regex_pairs.push_back(
{DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)"});
addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)");

// mongo.[<stat_prefix>.]collection.(<collection>.)query.<base_stat>
name_regex_pairs.push_back(
{MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$"});
addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$");

// mongo.[<stat_prefix>.]cmd.(<cmd>.)<base_stat>
name_regex_pairs.push_back({MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$"});
addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$");

// cluster.[<route_target_cluster>.]grpc.[<grpc_service>.](<grpc_method>.)<base_stat>
name_regex_pairs.push_back(
{GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$"});
addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$");

// http.[<stat_prefix>.]user_agent.(<user_agent>.)<base_stat>
name_regex_pairs.push_back({HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$"});
addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$");

// vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)<base_stat>
name_regex_pairs.push_back({VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$"});
addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$");

// http.[<stat_prefix>.]fault.(<downstream_cluster>.)<base_stat>
name_regex_pairs.push_back(
{FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$"});
addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$");

// listener.[<address>.]ssl.cipher.(<cipher>)
name_regex_pairs.push_back({SSL_CIPHER, "^listener(?=\\.).*?\\.ssl\\.cipher(\\.(.*?))$"});
addRegex(SSL_CIPHER, "^listener(?=\\.).*?\\.ssl\\.cipher(\\.(.*?))$");

// cluster.[<cluster_name>.]ssl.ciphers.(<cipher>)
name_regex_pairs.push_back({SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$"});
addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$");

// cluster.[<route_target_cluster>.]grpc.(<grpc_service>.)*
name_regex_pairs.push_back({GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)"});
addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)");

// tcp.(<stat_prefix>.)<base_stat>
name_regex_pairs.push_back({TCP_PREFIX, "^tcp\\.((.*?)\\.)\\w+?$"});
addRegex(TCP_PREFIX, "^tcp\\.((.*?)\\.)\\w+?$");

// auth.clientssl.(<stat_prefix>.)<base_stat>
name_regex_pairs.push_back({CLIENTSSL_PREFIX, "^auth\\.clientssl\\.((.*?)\\.)\\w+?$"});
addRegex(CLIENTSSL_PREFIX, "^auth\\.clientssl\\.((.*?)\\.)\\w+?$");

// ratelimit.(<stat_prefix>.)<base_stat>
name_regex_pairs.push_back({RATELIMIT_PREFIX, "^ratelimit\\.((.*?)\\.)\\w+?$"});
addRegex(RATELIMIT_PREFIX, "^ratelimit\\.((.*?)\\.)\\w+?$");

// cluster.(<cluster_name>.)*
name_regex_pairs.push_back({CLUSTER_NAME, "^cluster\\.((.*?)\\.)"});
addRegex(CLUSTER_NAME, "^cluster\\.((.*?)\\.)");

// http.(<stat_prefix>.)* or listener.[<address>.]http.(<stat_prefix>.)*
name_regex_pairs.push_back(
{HTTP_CONN_MANAGER_PREFIX, "^(?:|listener(?=\\.).*?\\.)http\\.((.*?)\\.)"});
addRegex(HTTP_CONN_MANAGER_PREFIX, "^(?:|listener(?=\\.).*?\\.)http\\.((.*?)\\.)");

// listener.(<address>.)*
name_regex_pairs.push_back(
{LISTENER_ADDRESS, "^listener\\.(((?:[_.[:digit:]]*|[_\\[\\]aAbBcCdDeEfF[:digit:]]*))\\.)"});
addRegex(LISTENER_ADDRESS,
"^listener\\.(((?:[_.[:digit:]]*|[_\\[\\]aAbBcCdDeEfF[:digit:]]*))\\.)");

// vhost.(<virtual host name>.)*
name_regex_pairs.push_back({VIRTUAL_HOST, "^vhost\\.((.*?)\\.)"});
addRegex(VIRTUAL_HOST, "^vhost\\.((.*?)\\.)");

// mongo.(<stat_prefix>.)*
name_regex_pairs.push_back({MONGO_PREFIX, "^mongo\\.((.*?)\\.)"});
addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)");
}

return name_regex_pairs;
void TagNameValues::addRegex(const std::string& name, const std::string& regex) {
descriptor_vec_.emplace_back(Descriptor(name, regex));
}

} // namespace Config
Expand Down
24 changes: 20 additions & 4 deletions source/common/config/well_known_names.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,20 @@ typedef ConstSingleton<MetadataEnvoyLbKeyValues> MetadataEnvoyLbKeys;
*/
class TagNameValues {
public:
TagNameValues();

/**
* Represents a tag extraction. This structure may be extended to
* allow for an faster pattern-matching engine to be used as an
* alternative to regexes, on an individual tag basis. Some of the
* tags, such as "_rq_(\\d)xx$", will probably stay as regexes.
*/
struct Descriptor {
Descriptor(const std::string& name, const std::string& regex) : name_(name), regex_(regex) {}
const std::string name_;
const std::string regex_;
};

// Cluster name tag
const std::string CLUSTER_NAME = "envoy.cluster_name";
// Listener port tag
Expand Down Expand Up @@ -269,12 +283,14 @@ class TagNameValues {
// Mapping from the names above to their respective regex strings.
const std::vector<std::pair<std::string, std::string>> name_regex_pairs_;

// Constructor to fill map.
TagNameValues() : name_regex_pairs_(getRegexMapping()) {}
// Returns the list of descriptors.
const std::vector<Descriptor>& descriptorVec() const { return descriptor_vec_; }

private:
// Creates a regex mapping for all tag names.
std::vector<std::pair<std::string, std::string>> getRegexMapping();
void addRegex(const std::string& name, const std::string& regex);

// Collection of tag descriptors.
std::vector<Descriptor> descriptor_vec_;
};

typedef ConstSingleton<TagNameValues> TagNames;
Expand Down
Loading

0 comments on commit 9c728a0

Please sign in to comment.