Skip to content

Commit

Permalink
[regexp] Remove BufferedZoneList
Browse files Browse the repository at this point in the history
.. as a custom data structure with questionable value.

Also: a few drive-by refactors.

Change-Id: I74957b70c4357795dc46ef5520d58b6a78be31b2
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3240823
Commit-Queue: Jakob Gruber <[email protected]>
Reviewed-by: Leszek Swirski <[email protected]>
Cr-Commit-Position: refs/heads/main@{#77674}
  • Loading branch information
schuay authored and V8 LUCI CQ committed Nov 3, 2021
1 parent bfa681f commit a7e9b8f
Showing 1 changed file with 34 additions and 112 deletions.
146 changes: 34 additions & 112 deletions src/regexp/regexp-parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,90 +30,15 @@ enum class InClassEscapeState {
kNotInClass,
};

// A BufferedZoneList is an automatically growing list, just like (and backed
// by) a ZoneList, that is optimized for the case of adding and removing
// a single element. The last element added is stored outside the backing list,
// and if no more than one element is ever added, the ZoneList isn't even
// allocated.
// Elements must not be nullptr pointers.
template <typename T, int initial_size>
class BufferedZoneList {
public:
BufferedZoneList() : list_(nullptr), last_(nullptr) {}

// Adds element at end of list. This element is buffered and can
// be read using last() or removed using RemoveLast until a new Add or until
// RemoveLast or GetList has been called.
void Add(T* value, Zone* zone) {
if (last_ != nullptr) {
if (list_ == nullptr) {
list_ = zone->New<ZoneList<T*>>(initial_size, zone);
}
list_->Add(last_, zone);
}
last_ = value;
}

T* last() {
DCHECK(last_ != nullptr);
return last_;
}

T* RemoveLast() {
DCHECK(last_ != nullptr);
T* result = last_;
if ((list_ != nullptr) && (list_->length() > 0))
last_ = list_->RemoveLast();
else
last_ = nullptr;
return result;
}

T* Get(int i) {
DCHECK((0 <= i) && (i < length()));
if (list_ == nullptr) {
DCHECK_EQ(0, i);
return last_;
} else {
if (i == list_->length()) {
DCHECK(last_ != nullptr);
return last_;
} else {
return list_->at(i);
}
}
}

void Clear() {
list_ = nullptr;
last_ = nullptr;
}

int length() {
int length = (list_ == nullptr) ? 0 : list_->length();
return length + ((last_ == nullptr) ? 0 : 1);
}

ZoneList<T*>* GetList(Zone* zone) {
if (list_ == nullptr) {
list_ = zone->New<ZoneList<T*>>(initial_size, zone);
}
if (last_ != nullptr) {
list_->Add(last_, zone);
last_ = nullptr;
}
return list_;
}

private:
ZoneList<T*>* list_;
T* last_;
};

// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
class RegExpBuilder : public ZoneObject {
class RegExpBuilder {
public:
RegExpBuilder(Zone* zone, RegExpFlags flags);
RegExpBuilder(Zone* zone, RegExpFlags flags)
: zone_(zone),
flags_(flags),
terms_(2, zone),
text_(2, zone),
alternatives_(2, zone) {}
void AddCharacter(base::uc16 character);
void AddUnicodeCharacter(base::uc32 character);
void AddEscapedUnicodeCharacter(base::uc32 character);
Expand Down Expand Up @@ -149,15 +74,21 @@ class RegExpBuilder : public ZoneObject {
bool unicode() const { return IsUnicode(flags_); }

Zone* const zone_;
bool pending_empty_;
bool pending_empty_ = false;
const RegExpFlags flags_;
ZoneList<base::uc16>* characters_;
base::uc16 pending_surrogate_;
BufferedZoneList<RegExpTree, 2> terms_;
BufferedZoneList<RegExpTree, 2> text_;
BufferedZoneList<RegExpTree, 2> alternatives_;
ZoneList<base::uc16>* characters_ = nullptr;
base::uc16 pending_surrogate_ = kNoPendingSurrogate;
ZoneList<RegExpTree*> terms_;
ZoneList<RegExpTree*> text_;
ZoneList<RegExpTree*> alternatives_;
#ifdef DEBUG
enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
enum {
ADD_NONE,
ADD_CHAR,
ADD_TERM,
ADD_ASSERT,
ADD_ATOM
} last_added_ = ADD_NONE;
#define LAST(x) last_added_ = x;
#else
#define LAST(x)
Expand All @@ -182,7 +113,7 @@ class RegExpParserState : public ZoneObject {
const ZoneVector<base::uc16>* capture_name,
RegExpFlags flags, Zone* zone)
: previous_state_(previous_state),
builder_(zone->New<RegExpBuilder>(zone, flags)),
builder_(zone, flags),
group_type_(group_type),
lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index),
Expand All @@ -191,7 +122,7 @@ class RegExpParserState : public ZoneObject {
RegExpParserState* previous_state() const { return previous_state_; }
bool IsSubexpression() { return previous_state_ != nullptr; }
// RegExpBuilder building this regexp's AST.
RegExpBuilder* builder() const { return builder_; }
RegExpBuilder* builder() { return &builder_; }
// Type of regexp being parsed (parenthesized group or entire regexp).
SubexpressionType group_type() const { return group_type_; }
// Lookahead or Lookbehind.
Expand Down Expand Up @@ -234,7 +165,7 @@ class RegExpParserState : public ZoneObject {
// Linked list implementation of stack of states.
RegExpParserState* const previous_state_;
// Builder for the stored disjunction.
RegExpBuilder* const builder_;
RegExpBuilder builder_;
// Stored disjunction type (capture, look-ahead or grouping), if any.
const SubexpressionType group_type_;
// Stored read direction.
Expand Down Expand Up @@ -2073,21 +2004,6 @@ bool RegExpParserImpl<CharT>::Parse(RegExpCompileData* result) {
return true;
}

RegExpBuilder::RegExpBuilder(Zone* zone, RegExpFlags flags)
: zone_(zone),
pending_empty_(false),
flags_(flags),
characters_(nullptr),
pending_surrogate_(kNoPendingSurrogate),
terms_(),
alternatives_()
#ifdef DEBUG
,
last_added_(ADD_NONE)
#endif
{
}

void RegExpBuilder::AddLeadSurrogate(base::uc16 lead_surrogate) {
DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
FlushPendingSurrogate();
Expand Down Expand Up @@ -2150,10 +2066,12 @@ void RegExpBuilder::FlushText() {
terms_.Add(text_.last(), zone());
} else {
RegExpText* text = zone()->New<RegExpText>(zone());
for (int i = 0; i < num_text; i++) text_.Get(i)->AppendToText(text, zone());
for (int i = 0; i < num_text; i++) {
text_[i]->AppendToText(text, zone());
}
terms_.Add(text, zone());
}
text_.Clear();
text_.Rewind(0);
}

void RegExpBuilder::AddCharacter(base::uc16 c) {
Expand Down Expand Up @@ -2252,10 +2170,11 @@ void RegExpBuilder::FlushTerms() {
} else if (num_terms == 1) {
alternative = terms_.last();
} else {
alternative = zone()->New<RegExpAlternative>(terms_.GetList(zone()));
alternative = zone()->New<RegExpAlternative>(
zone()->New<ZoneList<RegExpTree*>>(terms_, zone()));
}
alternatives_.Add(alternative, zone());
terms_.Clear();
terms_.Rewind(0);
LAST(ADD_NONE);
}

Expand Down Expand Up @@ -2298,7 +2217,8 @@ RegExpTree* RegExpBuilder::ToRegExp() {
int num_alternatives = alternatives_.length();
if (num_alternatives == 0) return zone()->New<RegExpEmpty>();
if (num_alternatives == 1) return alternatives_.last();
return zone()->New<RegExpDisjunction>(alternatives_.GetList(zone()));
return zone()->New<RegExpDisjunction>(
zone()->New<ZoneList<RegExpTree*>>(alternatives_, zone()));
}

bool RegExpBuilder::AddQuantifierToAtom(
Expand Down Expand Up @@ -2410,5 +2330,7 @@ bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
return ParseRegExpFromHeapString(isolate, zone, input, flags, result);
}

#undef LAST

} // namespace internal
} // namespace v8

0 comments on commit a7e9b8f

Please sign in to comment.