Skip to content

Commit

Permalink
Provide instrumentation points called "hooks".
Browse files Browse the repository at this point in the history
Change-Id: I854b2ba264d8c7239efb8a80bc440b1f5e0955f5
Reviewed-on: https://code-review.googlesource.com/c/re2/+/51834
Reviewed-by: Randall Bosetti <[email protected]>
Reviewed-by: Paul Wankadia <[email protected]>
  • Loading branch information
junyer committed Feb 14, 2020
1 parent 42d10ac commit 4f5e6c2
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 10 deletions.
12 changes: 11 additions & 1 deletion re2/dfa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "util/strutil.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/sparse_set.h"
#include "re2/stringpiece.h"

Expand Down Expand Up @@ -1184,6 +1185,11 @@ void DFA::ResetCache(RWLocker* cache_lock) {
// Re-acquire the cache_mutex_ for writing (exclusive use).
cache_lock->LockForWriting();

hooks::GetDFAStateCacheResetHook()({
.state_budget = state_budget_,
.state_cache_size = state_cache_.size(),
});

// Clear the cache, reset the memory budget.
for (int i = 0; i < kMaxStart; i++) {
start_[i].start = NULL;
Expand Down Expand Up @@ -1917,8 +1923,12 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
bool matched = dfa->Search(text, context, anchored,
want_earliest_match, !reversed_,
failed, &ep, matches);
if (*failed)
if (*failed) {
hooks::GetDFASearchFailureHook()({
// Nothing yet...
});
return false;
}
if (!matched)
return false;
if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
Expand Down
55 changes: 46 additions & 9 deletions re2/re2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <atomic>
#include <iterator>
#include <mutex>
#include <string>
Expand Down Expand Up @@ -645,7 +646,6 @@ bool RE2::Match(const StringPiece& text,
Prog::MatchKind kind = Prog::kFirstMatch;
if (options_.longest_match())
kind = Prog::kLongestMatch;
bool skipped_test = false;

bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);

Expand All @@ -657,7 +657,9 @@ bool RE2::Match(const StringPiece& text,
bool can_bit_state = prog_->CanBitState();
size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();

hooks::context = this;
bool dfa_failed = false;
bool skipped_test = false;
switch (re_anchor) {
default:
LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
Expand Down Expand Up @@ -1252,15 +1254,50 @@ bool RE2::Arg::parse_float(const char* str, size_t n, void* dest) {
return parse_##name##_radix(str, n, dest, 0); \
}

DEFINE_INTEGER_PARSER(short);
DEFINE_INTEGER_PARSER(ushort);
DEFINE_INTEGER_PARSER(int);
DEFINE_INTEGER_PARSER(uint);
DEFINE_INTEGER_PARSER(long);
DEFINE_INTEGER_PARSER(ulong);
DEFINE_INTEGER_PARSER(longlong);
DEFINE_INTEGER_PARSER(ulonglong);
DEFINE_INTEGER_PARSER(short)
DEFINE_INTEGER_PARSER(ushort)
DEFINE_INTEGER_PARSER(int)
DEFINE_INTEGER_PARSER(uint)
DEFINE_INTEGER_PARSER(long)
DEFINE_INTEGER_PARSER(ulong)
DEFINE_INTEGER_PARSER(longlong)
DEFINE_INTEGER_PARSER(ulonglong)

#undef DEFINE_INTEGER_PARSER

namespace hooks {

thread_local const RE2* context = NULL;

template <typename T>
union Hook {
void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
T* Load() const { return cb_.load(std::memory_order_acquire); }

#if !defined(__clang__) && defined(_MSC_VER)
// Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
// this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
"std::atomic<T*> must be always lock-free");
T* cb_for_constinit_;
#endif

std::atomic<T*> cb_;
};

template <typename T>
static void DoNothing(const T&) {}

#define DEFINE_HOOK(type, name) \
static Hook<type##Callback> name##_hook = {{&DoNothing<type>}}; \
void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
type##Callback* Get##type##Hook() { return name##_hook.Load(); }

DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
DEFINE_HOOK(DFASearchFailure, dfa_search_failure)

#undef DEFINE_HOOK

} // namespace hooks

} // namespace re2
34 changes: 34 additions & 0 deletions re2/re2.h
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,40 @@ class LazyRE2 {
};
#endif

namespace hooks {

// A hook must not make any assumptions regarding the lifetime of the context
// pointee beyond the current invocation of the hook. Pointers and references
// obtained via the context pointee should be considered invalidated when the
// hook returns. Hence, any data about the context pointee (e.g. its pattern)
// would have to be copied in order for it to be kept for an indefinite time.
//
// A hook must not use RE2 for matching. Control flow reentering RE2::Match()
// could result in infinite mutual recursion. To discourage that possibility,
// RE2 will not maintain the context pointer correctly when used in that way.
extern thread_local const RE2* context;

struct DFAStateCacheReset {
int64_t state_budget;
size_t state_cache_size;
};

struct DFASearchFailure {
// Nothing yet...
};

#define DECLARE_HOOK(type) \
using type##Callback = void(const type&); \
void Set##type##Hook(type##Callback* cb); \
type##Callback* Get##type##Hook();

DECLARE_HOOK(DFAStateCacheReset)
DECLARE_HOOK(DFASearchFailure)

#undef DECLARE_HOOK

} // namespace hooks

} // namespace re2

using re2::RE2;
Expand Down
22 changes: 22 additions & 0 deletions re2/testing/dfa_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,20 @@ DEFINE_FLAG(int, threads, 4, "number of threads");

namespace re2 {

static int state_cache_resets = 0;
static int search_failures = 0;

struct SetHooks {
SetHooks() {
hooks::SetDFAStateCacheResetHook(+[](const hooks::DFAStateCacheReset&) {
++state_cache_resets;
});
hooks::SetDFASearchFailureHook(+[](const hooks::DFASearchFailure&) {
++search_failures;
});
}
} set_hooks;

// Check that multithreaded access to DFA class works.

// Helper function: builds entire DFA for prog.
Expand Down Expand Up @@ -168,6 +182,8 @@ TEST(SingleThreaded, SearchDFA) {
// if it can't get a good cache hit rate.)
// Tell the DFA to trudge along instead.
Prog::TEST_dfa_should_bail_when_slow(false);
state_cache_resets = 0;
search_failures = 0;

// Choice of n is mostly arbitrary, except that:
// * making n too big makes the test run for too long.
Expand Down Expand Up @@ -217,6 +233,8 @@ TEST(SingleThreaded, SearchDFA) {

// Reset to original behaviour.
Prog::TEST_dfa_should_bail_when_slow(true);
ASSERT_GT(state_cache_resets, 0);
ASSERT_EQ(search_failures, 0);
}

// Helper function: searches for match, which should match,
Expand All @@ -239,6 +257,8 @@ static void DoSearch(Prog* prog, const StringPiece& match,

TEST(Multithreaded, SearchDFA) {
Prog::TEST_dfa_should_bail_when_slow(false);
state_cache_resets = 0;
search_failures = 0;

// Same as single-threaded test above.
const int n = 18;
Expand Down Expand Up @@ -278,6 +298,8 @@ TEST(Multithreaded, SearchDFA) {

// Reset to original behaviour.
Prog::TEST_dfa_should_bail_when_slow(true);
ASSERT_GT(state_cache_resets, 0);
ASSERT_EQ(search_failures, 0);
}

struct ReverseTest {
Expand Down

0 comments on commit 4f5e6c2

Please sign in to comment.