-
Notifications
You must be signed in to change notification settings - Fork 10
/
make_space_pattern.moon
141 lines (111 loc) · 2.4 KB
/
make_space_pattern.moon
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
-- this script will generate the data for an optimal lpeg pattern for matching
-- space
-- unicode spaces
-- https://en.wikipedia.org/wiki/Whitespace_character#Unicode
space_codepoints = {
9
10
11
12
13
32
133
160
5760
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8232
8233
8239
8287
12288
-- related
6158
8203
8204
8205
8288
65279
}
import utf8_encode from require "web_sanitize.unicode"
bytes = (str) -> [string.byte(c) for c in str\gmatch "."]
byte_list = {}
for thing in * space_codepoints
t = utf8_encode thing
-- print thing, "`#{t}`"
bs = bytes t
table.insert byte_list, bs
tree = {}
for k, v in pairs byte_list
top = tree
for idx, byte in ipairs v
if idx == #v
if type(top[byte]) == "table"
error "invalid nesting"
top[byte] = true
else
top[byte] or= {}
if type(top[byte]) != "table"
error "invalid nesting"
top = top[byte]
-- compile the pattern into the tree
-- flattinging where necessary
import types from require "tableshape"
-- used to determine if we can collapse sub pattern
simple_sequence = types.one_of {
types.shape {
"P", types.table\tag "bytes"
}
}
compile_pattern = (level) ->
-- make pattern for all the terminal nodes
term_bytes = for byte, v in pairs level
continue unless v == true
byte
out = {}
if next term_bytes
if #term_bytes == 1
table.insert out, {"P", term_bytes}
else
table.insert out, {"S", term_bytes}
for byte, v in pairs level
continue unless type(v) == "table"
patt = compile_pattern v
if match = simple_sequence patt
table.insert out, {"P", {byte, unpack match.bytes}}
else
table.insert out, {"*", {"P", {byte}}, patt }
if #out == 1
out[1]
else
{"+", unpack out}
out = compile_pattern tree
precedences = {
"+": 1
"*": 2
}
node_to_lpeg = (node) ->
node_type = node[1]
switch node_type
when "+", "*"
chunks = for n in *node[2,]
chunk, precedence = node_to_lpeg n
if precedence and precedences[node_type] > precedence
"(#{chunk})"
else
chunk
table.concat(chunks, " #{node_type} "), precedences[node_type]
when "S", "P"
"#{node_type}(\"\\#{table.concat node[2], "\\"}\")"
else
error "unknown node type: #{node_type}: #{require("moon").dump node}"
print (node_to_lpeg out)