Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: symbolize strings in HeapStatData and ThreadLocalStore #4281

Closed
wants to merge 9 commits into from
10 changes: 10 additions & 0 deletions include/envoy/stats/symbol_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,20 @@ class StatName {
public:
virtual ~StatName(){};
virtual std::string toString() const PURE;
virtual uint64_t hash() const PURE;
virtual bool operator==(const StatName& rhs) const PURE;
};

using StatNamePtr = std::unique_ptr<StatName>;

struct StatNameHash_ {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why the trailing underscore here? and StatNameCompare_ ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I must have thought it was idiomatic for some reason. Will change.

size_t operator()(const StatName* a) const { return a->hash(); }
};

struct StatNameCompare_ {
bool operator()(const StatName* a, const StatName* b) const { return (*a == *b); }
};

/**
* Interface for shortening and retrieving stat names.
*
Expand Down
15 changes: 15 additions & 0 deletions source/common/common/hash.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <string>
#include <vector>

#include "absl/strings/ascii.h"
#include "absl/strings/string_view.h"
Expand Down Expand Up @@ -35,6 +36,20 @@ class HashUtil {
};
return hash;
}

/**
* Return 64-bit hash from a vector of uint32s.
* @param input supplies the vector to be hashed.
* Adapted from boost::hash_combine. See details here: https://stackoverflow.com/a/4948967
* @return 64-bit hash of the supplied vector.
*/
static uint64_t hashVector(std::vector<uint32_t> const& input) {
std::size_t seed = input.size();
for (auto& i : input) {
mrice32 marked this conversation as resolved.
Show resolved Hide resolved
seed ^= i + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
return seed;
}
};

} // namespace Envoy
5 changes: 5 additions & 0 deletions source/common/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ envoy_cc_library(
hdrs = ["heap_stat_data.h"],
deps = [
":stat_data_allocator_lib",
":symbol_table_lib",
"//source/common/common:assert_lib",
"//source/common/common:hash_lib",
"//source/common/common:thread_annotations",
Expand Down Expand Up @@ -64,6 +65,7 @@ envoy_cc_library(
hdrs = ["raw_stat_data.h"],
deps = [
":stat_data_allocator_lib",
":symbol_table_lib",
"//include/envoy/stats:stats_interface",
"//source/common/common:assert_lib",
"//source/common/common:hash_lib",
Expand Down Expand Up @@ -126,6 +128,9 @@ envoy_cc_library(
deps = [
"//include/envoy/stats:symbol_table_interface",
"//source/common/common:assert_lib",
"//source/common/common:hash_lib",
"//source/common/common:lock_guard_lib",
"//source/common/common:thread_lib",
"//source/common/common:utility_lib",
],
)
Expand Down
4 changes: 2 additions & 2 deletions source/common/stats/heap_stat_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
namespace Envoy {
namespace Stats {

HeapStatData::HeapStatData(absl::string_view key) : name_(key.data(), key.size()) {}
HeapStatData::HeapStatData(StatNamePtr name_ptr) : name_ptr_(std::move(name_ptr)) {}

HeapStatDataAllocator::HeapStatDataAllocator() {}

Expand All @@ -15,8 +15,8 @@ HeapStatDataAllocator::~HeapStatDataAllocator() { ASSERT(stats_.empty()); }
HeapStatData* HeapStatDataAllocator::alloc(absl::string_view name) {
// Any expected truncation of name is done at the callsite. No truncation is
// required to use this allocator.
auto data = std::make_unique<HeapStatData>(name);
Thread::ReleasableLockGuard lock(mutex_);
auto data = std::make_unique<HeapStatData>(table_.encode(name));
auto ret = stats_.insert(data.get());
HeapStatData* existing_data = *ret.first;
lock.release();
Expand Down
44 changes: 27 additions & 17 deletions source/common/stats/heap_stat_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
#include <unordered_set>

#include "common/common/hash.h"
#include "common/common/lock_guard.h"
#include "common/common/thread.h"
#include "common/common/thread_annotations.h"
#include "common/stats/stat_data_allocator_impl.h"
#include "common/stats/symbol_table_impl.h"

namespace Envoy {
namespace Stats {
Expand All @@ -17,23 +19,25 @@ namespace Stats {
* so that it can be allocated efficiently from the heap on demand.
*/
struct HeapStatData {
explicit HeapStatData(absl::string_view key);
explicit HeapStatData(StatNamePtr name_ptr);

/**
* @returns absl::string_view the name as a string_view.
* @returns std::string the name as a std::string with no truncation.
*/
absl::string_view key() const { return name_; }
std::string name() const { return name_ptr_->toString(); }

/**
* @returns std::string the name as a std::string.
* Alias for name(), because BlockMemoryHashSet<Value> expects Value::key().
mrice32 marked this conversation as resolved.
Show resolved Hide resolved
*/
std::string name() const { return name_; }
std::string key() const { return name(); }

bool operator==(const HeapStatData& rhs) const { return *name_ptr_ == *(rhs.name_ptr_); }
mrice32 marked this conversation as resolved.
Show resolved Hide resolved

std::atomic<uint64_t> value_{0};
std::atomic<uint64_t> pending_increment_{0};
std::atomic<uint16_t> flags_{0};
std::atomic<uint16_t> ref_count_{1};
std::string name_;
StatNamePtr name_ptr_;
};

/**
Expand All @@ -52,27 +56,33 @@ class HeapStatDataAllocator : public StatDataAllocatorImpl<HeapStatData> {
// StatDataAllocator
bool requiresBoundedStatNameSize() const override { return false; }

// SymbolTableImpl
StatNamePtr encode(absl::string_view sv) {
Thread::LockGuard lock(mutex_);
return table_.encode(sv);
}

private:
struct HeapStatHash_ {
size_t operator()(const HeapStatData* a) const { return HashUtil::xxHash64(a->key()); }
size_t operator()(const HeapStatData* a) const { return a->name_ptr_->hash(); }
};
struct HeapStatCompare_ {
bool operator()(const HeapStatData* a, const HeapStatData* b) const {
return (a->key() == b->key());
}
bool operator()(const HeapStatData* a, const HeapStatData* b) const { return (*a == *b); }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: don't need parens around *a == *b

};

// TODO(jmarantz): See https://github.com/envoyproxy/envoy/pull/3927 and
// https://github.com/envoyproxy/envoy/issues/3585, which can help reorganize
// the heap stats using a ref-counted symbol table to compress the stat strings.
typedef std::unordered_set<HeapStatData*, HeapStatHash_, HeapStatCompare_> StatSet;

// An unordered set of HeapStatData pointers which keys off the key()
// field in each object. This necessitates a custom comparator and hasher.
// field in each object. This necessitates a custom comparator and hasher, which key off of the
// StatNamePtr's own StatNamePtrHash_ and StatNamePtrCompare_ operators.
StatSet stats_ GUARDED_BY(mutex_);
// A mutex is needed here to protect the stats_ object from both alloc() and free() operations.
// Although alloc() operations are called under existing locking, free() operations are made from
// the destructors of the individual stat objects, which are not protected by locks.
// A locally held symbol table which encodes stat names as StatNamePtrs and decodes StatNamePtrs
// back into strings.
SymbolTableImpl table_ GUARDED_BY(mutex_);
// A mutex is needed here to protect both the stats_ object and the table_ object from both
// alloc() and free() operations. Although alloc() operations are called under existing locking,
// free() operations are made from the destructors of the individual stat objects, which are not
// protected by locks.
Thread::MutexBasicLockable mutex_;
};

Expand Down
33 changes: 20 additions & 13 deletions source/common/stats/isolated_store_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,26 @@ namespace Envoy {
namespace Stats {

IsolatedStoreImpl::IsolatedStoreImpl()
: counters_([this](const std::string& name) -> CounterSharedPtr {
std::string tag_extracted_name = name;
std::vector<Tag> tags;
return alloc_.makeCounter(name, std::move(tag_extracted_name), std::move(tags));
}),
gauges_([this](const std::string& name) -> GaugeSharedPtr {
std::string tag_extracted_name = name;
std::vector<Tag> tags;
return alloc_.makeGauge(name, std::move(tag_extracted_name), std::move(tags));
}),
histograms_([this](const std::string& name) -> HistogramSharedPtr {
return std::make_shared<HistogramImpl>(name, *this, std::string(name), std::vector<Tag>());
}) {}
: counters_(
[this](const std::string& name) -> CounterSharedPtr {
std::string tag_extracted_name = name;
std::vector<Tag> tags;
return alloc_.makeCounter(name, std::move(tag_extracted_name), std::move(tags));
},
alloc_),
gauges_(
[this](const std::string& name) -> GaugeSharedPtr {
std::string tag_extracted_name = name;
std::vector<Tag> tags;
return alloc_.makeGauge(name, std::move(tag_extracted_name), std::move(tags));
},
alloc_),
histograms_(
[this](const std::string& name) -> HistogramSharedPtr {
return std::make_shared<HistogramImpl>(name, *this, std::string(name),
std::vector<Tag>());
},
alloc_) {}

struct IsolatedScopeImpl : public Scope {
IsolatedScopeImpl(IsolatedStoreImpl& parent, const std::string& prefix)
Expand Down
14 changes: 10 additions & 4 deletions source/common/stats/isolated_store_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
#include "envoy/stats/stats.h"
#include "envoy/stats/stats_options.h"
#include "envoy/stats/store.h"
#include "envoy/stats/symbol_table.h"

#include "common/common/utility.h"
#include "common/stats/heap_stat_data.h"
#include "common/stats/stats_options_impl.h"
#include "common/stats/symbol_table_impl.h"
#include "common/stats/utility.h"

namespace Envoy {
Expand All @@ -24,16 +26,18 @@ template <class Base> class IsolatedStatsCache {
public:
typedef std::function<std::shared_ptr<Base>(const std::string& name)> Allocator;

IsolatedStatsCache(Allocator alloc) : alloc_(alloc) {}
IsolatedStatsCache(Allocator alloc, HeapStatDataAllocator& heap_alloc)
: alloc_(alloc), heap_alloc_(heap_alloc) {}

Base& get(const std::string& name) {
auto stat = stats_.find(name);
StatNamePtr ptr = heap_alloc_.encode(name);
auto stat = stats_.find(ptr);
if (stat != stats_.end()) {
return *stat->second;
}

std::shared_ptr<Base> new_stat = alloc_(name);
stats_.emplace(name, new_stat);
stats_.emplace(std::move(ptr), new_stat);
return *new_stat;
}

Expand All @@ -48,8 +52,10 @@ template <class Base> class IsolatedStatsCache {
}

private:
std::unordered_map<std::string, std::shared_ptr<Base>> stats_;
std::unordered_map<StatNamePtr, std::shared_ptr<Base>, StatNamePtrHash_, StatNamePtrCompare_>
stats_;
Allocator alloc_;
HeapStatDataAllocator& heap_alloc_;
};

class IsolatedStoreImpl : public Store {
Expand Down
9 changes: 6 additions & 3 deletions source/common/stats/symbol_table_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
namespace Envoy {
namespace Stats {

// TODO(ambuc): There is a possible performance optimization here for avoiding the encoding of IPs,
// if they appear in stat names. We don't want to waste time symbolizing an integer as an integer,
// if we can help it.
// TODO(ambuc): There is a possible performance optimization here for avoiding the encoding of IPs /
// numbers if they appear in stat names. We don't want to waste time symbolizing an integer as an
// integer, if we can help it.
StatNamePtr SymbolTableImpl::encode(const absl::string_view name) {
Thread::LockGuard lock(lock_);
SymbolVec symbol_vec;
std::vector<absl::string_view> name_vec = absl::StrSplit(name, '.');
symbol_vec.reserve(name_vec.size());
Expand All @@ -23,6 +24,7 @@ StatNamePtr SymbolTableImpl::encode(const absl::string_view name) {
}

std::string SymbolTableImpl::decode(const SymbolVec& symbol_vec) const {
Thread::LockGuard lock(lock_);
std::vector<absl::string_view> name;
name.reserve(symbol_vec.size());
std::transform(symbol_vec.begin(), symbol_vec.end(), std::back_inserter(name),
Expand All @@ -31,6 +33,7 @@ std::string SymbolTableImpl::decode(const SymbolVec& symbol_vec) const {
}

void SymbolTableImpl::free(const SymbolVec& symbol_vec) {
Thread::LockGuard lock(lock_);
for (const Symbol symbol : symbol_vec) {
auto decode_search = decode_map_.find(symbol);
ASSERT(decode_search != decode_map_.end());
Expand Down
27 changes: 27 additions & 0 deletions source/common/stats/symbol_table_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include "envoy/stats/symbol_table.h"

#include "common/common/assert.h"
#include "common/common/hash.h"
#include "common/common/lock_guard.h"
#include "common/common/thread.h"
#include "common/common/utility.h"

#include "absl/strings/str_join.h"
Expand Down Expand Up @@ -115,6 +118,9 @@ class SymbolTableImpl : public SymbolTable {
// TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols
// using an Envoy::IntervalSet.
std::stack<Symbol> pool_;
ambuc marked this conversation as resolved.
Show resolved Hide resolved

// This must be called during both encode() and free().
mutable Thread::MutexBasicLockable lock_;
ambuc marked this conversation as resolved.
Show resolved Hide resolved
};

/**
Expand All @@ -128,12 +134,33 @@ class StatNameImpl : public StatName {
~StatNameImpl() override { symbol_table_.free(symbol_vec_); }
std::string toString() const override { return symbol_table_.decode(symbol_vec_); }

// Returns a hash of the underlying symbol vector, since StatNames are uniquely defined by their
// symbol vectors.
uint64_t hash() const override { return HashUtil::hashVector(symbol_vec_); }
// Compares on the underlying symbol vectors.
// NB: operator==(std::vector) checks size first, then compares equality for each element.
bool operator==(const StatName& rhs) const override {
const StatNameImpl& r = dynamic_cast<const StatNameImpl&>(rhs);
mrice32 marked this conversation as resolved.
Show resolved Hide resolved
return symbol_vec_ == r.symbol_vec_;
}

private:
friend class StatNameTest;
SymbolVec symbolVec() { return symbol_vec_; }
SymbolVec symbol_vec_;
SymbolTableImpl& symbol_table_;
};

struct StatNamePtrHash_ {
size_t operator()(const StatNamePtr& a) const { return a->hash(); }
};

struct StatNamePtrCompare_ {
bool operator()(const StatNamePtr& a, const StatNamePtr& b) const {
// This extracts the underlying statnames.
return (*a.get() == *b.get());
}
};

} // namespace Stats
} // namespace Envoy
Loading