Skip to content

Commit

Permalink
[Utility] Reimplement RegularExpression on top of llvm::Regex
Browse files Browse the repository at this point in the history
Originally I wanted to remove the RegularExpression class in Utility and
replace it with llvm::Regex. However, during that transition I noticed
that there are several places where need the regular expression string.
So instead I propose to keep the RegularExpression class and make it a
thin wrapper around llvm::Regex.

This patch also removes the workaround for empty regular expressions.
The result is that we are now (more or less) POSIX conformant.

Differential revision: https://reviews.llvm.org/D66174

llvm-svn: 369153
  • Loading branch information
JDevlieghere committed Aug 16, 2019
1 parent 250aafa commit 3af3f1e
Show file tree
Hide file tree
Showing 28 changed files with 275 additions and 468 deletions.
2 changes: 1 addition & 1 deletion lldb/include/lldb/Breakpoint/BreakpointResolverFileRegex.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace lldb_private {
class BreakpointResolverFileRegex : public BreakpointResolver {
public:
BreakpointResolverFileRegex(
Breakpoint *bkpt, RegularExpression &regex,
Breakpoint *bkpt, RegularExpression regex,
const std::unordered_set<std::string> &func_name_set, bool exact_match);

static BreakpointResolver *
Expand Down
2 changes: 1 addition & 1 deletion lldb/include/lldb/Breakpoint/BreakpointResolverName.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class BreakpointResolverName : public BreakpointResolver {

// Creates a function breakpoint by regular expression. Takes over control
// of the lifespan of func_regex.
BreakpointResolverName(Breakpoint *bkpt, RegularExpression &func_regex,
BreakpointResolverName(Breakpoint *bkpt, RegularExpression func_regex,
lldb::LanguageType language, lldb::addr_t offset,
bool skip_prologue);

Expand Down
2 changes: 1 addition & 1 deletion lldb/include/lldb/Core/AddressResolverName.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class AddressResolverName : public AddressResolver {

// Creates a function breakpoint by regular expression. Takes over control
// of the lifespan of func_regex.
AddressResolverName(RegularExpression &func_regex);
AddressResolverName(RegularExpression func_regex);

AddressResolverName(const char *class_name, const char *method,
AddressResolver::MatchType type);
Expand Down
4 changes: 2 additions & 2 deletions lldb/include/lldb/Interpreter/OptionValueRegex.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class OptionValueRegex : public OptionValue {
VarSetOperationType = eVarSetOperationAssign) = delete;

bool Clear() override {
m_regex.Clear();
m_regex = RegularExpression();
m_value_was_set = false;
return true;
}
Expand All @@ -52,7 +52,7 @@ class OptionValueRegex : public OptionValue {
if (value && value[0])
m_regex.Compile(llvm::StringRef(value));
else
m_regex.Clear();
m_regex = RegularExpression();
}

bool IsValid() const { return m_regex.IsValid(); }
Expand Down
139 changes: 23 additions & 116 deletions lldb/include/lldb/Utility/RegularExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,102 +9,28 @@
#ifndef liblldb_RegularExpression_h_
#define liblldb_RegularExpression_h_

#ifdef _WIN32
#include "../lib/Support/regex_impl.h"

typedef llvm_regmatch_t regmatch_t;
typedef llvm_regex_t regex_t;

inline int regcomp(llvm_regex_t *a, const char *b, int c) {
return llvm_regcomp(a, b, c);
}

inline size_t regerror(int a, const llvm_regex_t *b, char *c, size_t d) {
return llvm_regerror(a, b, c, d);
}

inline int regexec(const llvm_regex_t *a, const char *b, size_t c,
llvm_regmatch_t d[], int e) {
return llvm_regexec(a, b, c, d, e);
}

inline void regfree(llvm_regex_t *a) { llvm_regfree(a); }
#else
#ifdef __ANDROID__
#include <regex>
#endif
#include <regex.h>
#endif

#include <string>
#include <vector>

#include <stddef.h>
#include <stdint.h>

namespace llvm {
class StringRef;
} // namespace llvm
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Regex.h"

namespace lldb_private {

/// \class RegularExpression RegularExpression.h
/// "lldb/Utility/RegularExpression.h"
/// A C++ wrapper class for regex.
///
/// This regular expression class wraps the posix regex functions \c
/// regcomp(), \c regerror(), \c regexec(), and \c regfree() from the header
/// file in \c /usr/include/regex\.h.
class RegularExpression {
class RegularExpression : public llvm::Regex {
public:
class Match {
public:
Match(uint32_t max_matches) : m_matches() {
if (max_matches > 0)
m_matches.resize(max_matches + 1);
}

void Clear() {
const size_t num_matches = m_matches.size();
regmatch_t invalid_match = {-1, -1};
for (size_t i = 0; i < num_matches; ++i)
m_matches[i] = invalid_match;
}

size_t GetSize() const { return m_matches.size(); }

regmatch_t *GetData() {
return (m_matches.empty() ? nullptr : m_matches.data());
}

bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
std::string &match_str) const;

bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
llvm::StringRef &match_str) const;

protected:
std::vector<regmatch_t>
m_matches; ///< Where parenthesized subexpressions results are stored
};

/// Default constructor.
///
/// The default constructor that initializes the object state such that it
/// contains no compiled regular expression.
RegularExpression();
RegularExpression() = default;

explicit RegularExpression(llvm::StringRef string);

/// Destructor.
///
/// Any previously compiled regular expression contained in this object will
/// be freed.
~RegularExpression();
~RegularExpression() = default;

RegularExpression(const RegularExpression &rhs);
RegularExpression(RegularExpression &&rhs) = default;

const RegularExpression &operator=(const RegularExpression &rhs);
RegularExpression &operator=(RegularExpression &&rhs) = default;
RegularExpression &operator=(const RegularExpression &rhs) = default;

/// Compile a regular expression.
///
Expand All @@ -118,11 +44,9 @@ class RegularExpression {
/// A NULL terminated C string that represents the regular
/// expression to compile.
///
/// \return
/// \b true if the regular expression compiles successfully,
/// \b false otherwise.
/// \return \b true if the regular expression compiles successfully, \b false
/// otherwise.
bool Compile(llvm::StringRef string);
bool Compile(const char *) = delete;

/// Executes a regular expression.
///
Expand All @@ -140,19 +64,10 @@ class RegularExpression {
/// properly initialized with the desired number of maximum
/// matches, or nullptr if no parenthesized matching is needed.
///
/// \return
/// \b true if \a string matches the compiled regular
/// expression, \b false otherwise.
bool Execute(llvm::StringRef string, Match *match = nullptr) const;
bool Execute(const char *, Match * = nullptr) = delete;

size_t GetErrorAsCString(char *err_str, size_t err_str_max_len) const;

/// Free the compiled regular expression.
///
/// If this object contains a valid compiled regular expression, this
/// function will free any resources it was consuming.
void Free();
/// \return \b true if \a string matches the compiled regular expression, \b
/// false otherwise.
bool Execute(llvm::StringRef string,
llvm::SmallVectorImpl<llvm::StringRef> *matches = nullptr) const;

/// Access the regular expression text.
///
Expand All @@ -168,26 +83,18 @@ class RegularExpression {
///
/// Test if this object contains a valid regular expression.
///
/// \return
/// \b true if the regular expression compiled and is ready
/// for execution, \b false otherwise.
/// \return \b true if the regular expression compiled and is ready for
/// execution, \b false otherwise.
bool IsValid() const;

void Clear() {
Free();
m_re.clear();
m_comp_err = 1;
}

int GetErrorCode() const { return m_comp_err; }

bool operator<(const RegularExpression &rhs) const;
/// Return an error if the regular expression failed to compile.
llvm::Error GetError() const;

private:
// Member variables
std::string m_re; ///< A copy of the original regular expression text
int m_comp_err; ///< Status code for the regular expression compilation
regex_t m_preg; ///< The compiled regular expression
/// A copy of the original regular expression text.
std::string m_regex_text;
/// The compiled regular expression.
mutable llvm::Regex m_regex;
};

} // namespace lldb_private
Expand Down
9 changes: 5 additions & 4 deletions lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ using namespace lldb_private;

// BreakpointResolverFileRegex:
BreakpointResolverFileRegex::BreakpointResolverFileRegex(
Breakpoint *bkpt, RegularExpression &regex,
Breakpoint *bkpt, RegularExpression regex,
const std::unordered_set<std::string> &func_names, bool exact_match)
: BreakpointResolver(bkpt, BreakpointResolver::FileRegexResolver),
m_regex(regex), m_exact_match(exact_match), m_function_names(func_names) {
}
m_regex(std::move(regex)), m_exact_match(exact_match),
m_function_names(func_names) {}

BreakpointResolverFileRegex::~BreakpointResolverFileRegex() {}

Expand Down Expand Up @@ -69,7 +69,8 @@ BreakpointResolver *BreakpointResolverFileRegex::CreateFromStructuredData(
}
}

return new BreakpointResolverFileRegex(bkpt, regex, names_set, exact_match);
return new BreakpointResolverFileRegex(bkpt, std::move(regex), names_set,
exact_match);
}

StructuredData::ObjectSP
Expand Down
4 changes: 2 additions & 2 deletions lldb/source/Breakpoint/BreakpointResolverName.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ BreakpointResolverName::BreakpointResolverName(Breakpoint *bkpt,
}

BreakpointResolverName::BreakpointResolverName(Breakpoint *bkpt,
RegularExpression &func_regex,
RegularExpression func_regex,
lldb::LanguageType language,
lldb::addr_t offset,
bool skip_prologue)
: BreakpointResolver(bkpt, BreakpointResolver::NameResolver, offset),
m_class_name(nullptr), m_regex(func_regex),
m_class_name(nullptr), m_regex(std::move(func_regex)),
m_match_type(Breakpoint::Regexp), m_language(language),
m_skip_prologue(skip_prologue) {}

Expand Down
12 changes: 4 additions & 8 deletions lldb/source/Commands/CommandObjectBreakpoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -682,12 +682,10 @@ class CommandObjectBreakpointSet : public CommandObjectParsed {
// name
{
RegularExpression regexp(m_options.m_func_regexp);
if (!regexp.IsValid()) {
char err_str[1024];
regexp.GetErrorAsCString(err_str, sizeof(err_str));
if (llvm::Error err = regexp.GetError()) {
result.AppendErrorWithFormat(
"Function name regular expression could not be compiled: \"%s\"",
err_str);
llvm::toString(std::move(err)).c_str());
result.SetStatus(eReturnStatusFailed);
return false;
}
Expand Down Expand Up @@ -718,12 +716,10 @@ class CommandObjectBreakpointSet : public CommandObjectParsed {
}

RegularExpression regexp(m_options.m_source_text_regexp);
if (!regexp.IsValid()) {
char err_str[1024];
regexp.GetErrorAsCString(err_str, sizeof(err_str));
if (llvm::Error err = regexp.GetError()) {
result.AppendErrorWithFormat(
"Source text regular expression could not be compiled: \"%s\"",
err_str);
llvm::toString(std::move(err)).c_str());
result.SetStatus(eReturnStatusFailed);
return false;
}
Expand Down
6 changes: 3 additions & 3 deletions lldb/source/Commands/CommandObjectFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,9 +573,9 @@ class CommandObjectFrameVariable : public CommandObjectParsed {
entry.c_str());
}
} else {
char regex_error[1024];
if (regex.GetErrorAsCString(regex_error, sizeof(regex_error)))
result.GetErrorStream().Printf("error: %s\n", regex_error);
if (llvm::Error err = regex.GetError())
result.GetErrorStream().Printf(
"error: %s\n", llvm::toString(std::move(err)).c_str());
else
result.GetErrorStream().Printf(
"error: unknown regex error when compiling '%s'\n",
Expand Down
4 changes: 2 additions & 2 deletions lldb/source/Core/AddressResolverName.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ AddressResolverName::AddressResolverName(const char *func_name,
}
}

AddressResolverName::AddressResolverName(RegularExpression &func_regex)
AddressResolverName::AddressResolverName(RegularExpression func_regex)
: AddressResolver(), m_func_name(nullptr), m_class_name(nullptr),
m_regex(func_regex), m_match_type(AddressResolver::Regexp) {}
m_regex(std::move(func_regex)), m_match_type(AddressResolver::Regexp) {}

AddressResolverName::AddressResolverName(const char *class_name,
const char *method,
Expand Down
25 changes: 11 additions & 14 deletions lldb/source/Core/Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,12 +355,9 @@ bool Disassembler::ElideMixedSourceAndDisassemblyLine(
const char *function_name =
sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
.GetCString();
if (function_name) {
RegularExpression::Match regex_match(1);
if (avoid_regex->Execute(function_name, &regex_match)) {
// skip this source line
return true;
}
if (function_name && avoid_regex->Execute(function_name)) {
// skip this source line
return true;
}
}
// don't skip this source line
Expand Down Expand Up @@ -793,10 +790,9 @@ OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream,
std::string value;
static RegularExpression g_reg_exp(
llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
RegularExpression::Match regex_match(1);
bool reg_exp_success = g_reg_exp.Execute(line, &regex_match);
if (reg_exp_success)
regex_match.GetMatchAtIndex(line.c_str(), 1, value);
llvm::SmallVector<llvm::StringRef, 2> matches;
if (g_reg_exp.Execute(line, &matches))
value = matches[1].str();
else
value = line;

Expand Down Expand Up @@ -856,14 +852,15 @@ OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) {
if (!line.empty()) {
static RegularExpression g_reg_exp(llvm::StringRef(
"^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
RegularExpression::Match regex_match(2);

bool reg_exp_success = g_reg_exp.Execute(line, &regex_match);
llvm::SmallVector<llvm::StringRef, 3> matches;

bool reg_exp_success = g_reg_exp.Execute(line, &matches);
std::string key;
std::string value;
if (reg_exp_success) {
regex_match.GetMatchAtIndex(line.c_str(), 1, key);
regex_match.GetMatchAtIndex(line.c_str(), 2, value);
key = matches[1].str();
value = matches[2].str();
} else {
out_stream->Printf("Instruction::ReadDictionary: Failure executing "
"regular expression.\n");
Expand Down
Loading

0 comments on commit 3af3f1e

Please sign in to comment.