-
Notifications
You must be signed in to change notification settings - Fork 715
/
regex.hh
247 lines (200 loc) · 8.55 KB
/
regex.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#ifndef regex_hh_INCLUDED
#define regex_hh_INCLUDED
#include "string.hh"
#include "regex_impl.hh"
#include "ref_ptr.hh"
namespace Kakoune
{
// Regex that keeps track of its string representation
class Regex
{
public:
Regex() = default;
explicit Regex(StringView re, RegexCompileFlags flags = RegexCompileFlags::None);
bool empty() const { return m_str.empty(); }
bool operator==(const Regex& other) const { return m_str == other.m_str; }
const String& str() const { return m_str; }
size_t mark_count() const { return m_impl->save_count / 2 - 1; }
int named_capture_index(StringView name) const;
static constexpr const char* option_type_name = "regex";
const CompiledRegex* impl() const { return m_impl.get(); }
private:
struct Impl : RefCountable, CompiledRegex {};
RefPtr<Impl> m_impl;
String m_str;
};
template<typename Iterator>
struct MatchResults
{
struct SubMatch : std::pair<Iterator, Iterator>
{
SubMatch() = default;
SubMatch(Iterator begin, Iterator end)
: std::pair<Iterator, Iterator>{begin, end}, matched{static_cast<bool>(begin)}
{}
bool matched = false;
};
struct iterator
{
using difference_type = size_t;
using value_type = SubMatch;
using pointer = SubMatch*;
using reference = SubMatch;
using iterator_category = std::bidirectional_iterator_tag;
using It = typename Vector<Iterator, MemoryDomain::Regex>::const_iterator;
iterator() = default;
iterator(It it) : m_it{std::move(it)} {}
iterator& operator--() { m_it += 2; return *this; }
iterator& operator++() { m_it += 2; return *this; }
SubMatch operator*() const { return {*m_it, *(m_it+1)}; }
friend bool operator==(const iterator& lhs, const iterator& rhs) = default;
private:
It m_it;
};
MatchResults() = default;
MatchResults(Vector<Iterator, MemoryDomain::Regex> values) : m_values{std::move(values)} {}
iterator begin() const { return iterator{m_values.begin()}; }
iterator cbegin() const { return iterator{m_values.cbegin()}; }
iterator end() const { return iterator{m_values.end()}; }
iterator cend() const { return iterator{m_values.cend()}; }
size_t size() const { return m_values.size() / 2; }
bool empty() const { return m_values.empty(); }
SubMatch operator[](size_t i) const
{
return i * 2 < m_values.size() ?
SubMatch{m_values[i*2], m_values[i*2+1]} : SubMatch{};
}
friend bool operator==(const MatchResults& lhs, const MatchResults& rhs) = default;
void swap(MatchResults& other)
{
m_values.swap(other.m_values);
}
Vector<Iterator, MemoryDomain::Regex>& values() { return m_values; }
private:
Vector<Iterator, MemoryDomain::Regex> m_values;
};
inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow)
{
return (bol ? RegexExecFlags::None : RegexExecFlags::NotBeginOfLine) |
(eol ? RegexExecFlags::None : RegexExecFlags::NotEndOfLine) |
(bow ? RegexExecFlags::None : RegexExecFlags::NotBeginOfWord) |
(eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord);
}
struct NoopIdle
{
void operator()() {}
};
template<typename It, typename IdleFunc = NoopIdle>
bool regex_match(It begin, It end, const Regex& re, IdleFunc&& idle_func = {})
{
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
return vm.exec(begin, end, begin, end, RegexExecFlags::None, idle_func);
}
template<typename It, typename IdleFunc = NoopIdle>
bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re, IdleFunc&& idle_func = {})
{
res.values().clear();
ThreadedRegexVM<It, RegexMode::Forward> vm{*re.impl()};
if (vm.exec(begin, end, begin, end, RegexExecFlags::None, idle_func))
{
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
return true;
}
return false;
}
template<typename It, typename IdleFunc = NoopIdle>
bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None, IdleFunc&& idle_func = {})
{
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::Search | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
return vm.exec(begin, end, subject_begin, subject_end, flags, idle_func);
}
template<typename It, RegexMode mode = RegexMode::Forward, typename IdleFunc = NoopIdle>
bool regex_search(It begin, It end, It subject_begin, It subject_end,
MatchResults<It>& res, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None,
IdleFunc&& idle_func = {})
{
res.values().clear();
ThreadedRegexVM<It, mode | RegexMode::Search> vm{*re.impl()};
if (vm.exec(begin, end, subject_begin, subject_end, flags, idle_func))
{
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
return true;
}
return false;
}
template<typename It, typename IdleFunc = NoopIdle>
bool backward_regex_search(It begin, It end, It subject_begin, It subject_end,
MatchResults<It>& res, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None,
IdleFunc&& idle_func = {})
{
return regex_search<It, RegexMode::Backward>(begin, end, subject_begin, subject_end, res, re, flags, idle_func);
}
enum class Quoting;
String option_to_string(const Regex& re, Quoting quoting);
Regex option_from_string(Meta::Type<Regex>, StringView str);
template<typename Iterator, RegexMode mode = RegexMode::Forward,
typename VmArg = const Regex, typename IdleFunc = NoopIdle>
struct RegexIterator
{
static_assert(has_direction(mode));
static constexpr bool forward = mode & RegexMode::Forward;
using ValueType = MatchResults<Iterator>;
struct Sentinel{};
struct It
{
It(RegexIterator& base) : m_base(base), m_valid{m_base.next()} {}
const ValueType& operator*() const { kak_assert(m_valid); return m_base.m_results; }
const ValueType* operator->() const { kak_assert(m_valid); return &m_base.m_results; }
It& operator++() { m_valid = m_base.next(); return *this; }
bool operator==(Sentinel) const { return not m_valid; }
RegexIterator& m_base;
bool m_valid;
};
RegexIterator(Iterator begin, Iterator end,
Iterator subject_begin, Iterator subject_end,
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None,
IdleFunc idle_func = {})
: m_vm{make_vm(vm_arg)}, m_next_pos{forward ? begin : end},
m_begin{std::move(begin)}, m_end{std::move(end)},
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
m_flags{flags}, m_idle_func{idle_func} {}
RegexIterator(const Iterator& begin, const Iterator& end,
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None,
IdleFunc idle_func = {})
: RegexIterator{begin, end, begin, end, vm_arg, flags, idle_func} {}
It begin() { return {*this}; }
Sentinel end() const { return {}; }
private:
bool next()
{
auto additional_flags = RegexExecFlags::None;
if (m_results.size() and m_results[0].first == m_results[0].second)
additional_flags |= RegexExecFlags::NotInitialNull;
if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
m_subject_begin, m_subject_end, m_flags | additional_flags,
m_idle_func))
return false;
m_results.values().clear();
std::move(m_vm.captures().begin(), m_vm.captures().end(), std::back_inserter(m_results.values()));
m_next_pos = forward ? m_results[0].second : m_results[0].first;
kak_assert(forward ? (m_next_pos <= m_end) : (m_next_pos >= m_begin));
return true;
}
using RegexVM = ThreadedRegexVM<Iterator, mode | RegexMode::Search>;
static RegexVM& make_vm(RegexVM& vm) { return vm; }
static RegexVM make_vm(const Regex& regex) { return {*regex.impl()}; }
decltype(make_vm(std::declval<VmArg&>())) m_vm;
MatchResults<Iterator> m_results;
Iterator m_next_pos{};
const Iterator m_begin{};
const Iterator m_end{};
const Iterator m_subject_begin{};
const Iterator m_subject_end{};
const RegexExecFlags m_flags = RegexExecFlags::None;
IdleFunc m_idle_func;
};
}
#endif // regex_hh_INCLUDED