-
Notifications
You must be signed in to change notification settings - Fork 75
/
pcre.hxx
70 lines (54 loc) · 1.33 KB
/
pcre.hxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#pragma once
#include <optional>
#include <memory>
#include <vector>
#include <cstring>
#include <cctype>
#include <string>
#include <stdexcept>
namespace pcre {
struct node_t;
typedef std::unique_ptr<node_t> node_ptr_t;
enum class metachar_func_t {
none,
iscntrl, isprint, isspace, isblank, isgraph, ispunct, isalnum,
isalpha, isupper, islower, isdigit, isxdigit, isword,
};
struct node_t {
enum kind_t {
// terminals
kind_char,
kind_range,
kind_any,
kind_meta,
kind_boundary,
kind_cclass,
// unary operators
kind_opt,
kind_star,
kind_plus,
kind_quant,
kind_capture,
// binary operators
kind_seq,
kind_alt,
} kind;
// kind_cclass, kind_meta, kind_boundary
bool negate = false;
union {
char32_t c; // kind_char
struct { char32_t c_min, c_max; }; // kind_range
metachar_func_t metachar_func; // kind_meta
struct { int r_min, r_max; }; // kind_quant
int capture_index; // knid_capture
};
node_t(kind_t kind) : kind(kind) { }
std::vector<node_ptr_t> children;
};
std::pair<node_ptr_t, int> parse_regex(const char* pattern);
void print_ast(const node_t* node, int indent = 0);
// regex [:word:] is alnum or _.
inline int isword(int c) {
return isalnum(c) || '_' == c;
}
} // namespace pcre