-
Notifications
You must be signed in to change notification settings - Fork 0
/
autoroutes.pyx
409 lines (365 loc) · 13.7 KB
/
autoroutes.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
# cython: language_level=3
cimport cython
from cpython cimport bool
import re
class InvalidRoute(Exception):
...
cdef enum:
MATCH_DIGIT = 1, MATCH_ALNUM, MATCH_NOSLASH, MATCH_NODASH, MATCH_ALPHA, MATCH_ALL, MATCH_ANY, MATCH_REGEX
DEFAULT_MATCH_TYPE = 'string' # Faster default, works for most common use case /{var}/.
MATCH_TYPES = {
'alnum': MATCH_ALNUM,
'alpha': MATCH_ALPHA,
'any': MATCH_ANY,
'digit': MATCH_DIGIT,
DEFAULT_MATCH_TYPE: MATCH_NOSLASH,
'path': MATCH_ALL,
}
PATTERNS = {
MATCH_ALL: '.+',
MATCH_ANY: '.*',
MATCH_ALNUM: '\w+',
MATCH_ALPHA: '[a-zA-Z]+',
MATCH_DIGIT: '\d+',
MATCH_NOSLASH: '[^/]+',
}
cdef int common_root_len(string1, string2):
cdef unsigned int bound, i
bound = min(len(string1), len(string2))
for i in range(bound):
if string1[i] != string2[i]:
return i
else:
return bound
@cython.final
cdef class Edge:
cdef public str pattern
cdef public str regex
cdef int placeholder_start
cdef int placeholder_end
cdef unsigned int pattern_len
cdef public str prefix
cdef public str suffix
cdef unsigned int prefix_len
cdef signed int suffix_len
cdef public Node child
cdef public unsigned int match_type
def __init__(self, pattern, child):
self.pattern = pattern
self.child = child
self.compile()
def __repr__(self):
return '<Edge {}>'.format(self.pattern)
cdef branch_at(self, unsigned int prefix_len):
cdef:
Node new_child = Node()
str rest = self.pattern[prefix_len:]
new_child.connect(self.child, rest)
self.child = new_child
new_child.compile()
self.pattern = self.pattern[:prefix_len]
self.compile()
cdef Node join(self, str path):
cdef:
unsigned int local_index, candidate_index
Edge candidate = Edge(path, None)
local_index, candidate_index = self.compare(candidate)
del candidate
if not local_index:
return None
if local_index < self.pattern_len:
self.branch_at(local_index)
if candidate_index < len(path):
return self.child.insert(path[candidate_index:])
return self.child
cdef tuple compare(self, Edge other):
cdef unsigned int common_len
if self.prefix_len and other.prefix_len:
common_len = common_root_len(self.prefix, other.prefix)
if not common_len: # Nothing common.
return 0, 0
# At least one prefix is not finished, no need to compare further.
elif common_len < self.prefix_len or common_len < other.prefix_len:
return common_len, common_len
elif self.prefix_len or other.prefix_len:
return 0, 0
# We now know prefix are either none or equal.
if not self.match_type or self.match_type == MATCH_REGEX or self.match_type != other.match_type:
return self.prefix_len, other.prefix_len
# We now know match types are mergeable, let's see if we should also deal with suffix.
if self.suffix and other.suffix:
common_len = common_root_len(self.suffix, other.suffix)
if common_len:
return self.placeholder_end + common_len + 1, other.placeholder_end + common_len + 1
return self.placeholder_end + 1, other.placeholder_end + 1
cpdef str compile(self):
"""Compute and cache pattern properties.
Eg. with pattern="foo{id}bar", we would have
prefix=foo
suffix=bar
placeholder_start=3
placeholder_end=6
"""
cdef:
list parts
str match_type_or_regex = DEFAULT_MATCH_TYPE
self.placeholder_start = self.pattern.find('{') # Slow, but at compile it's ok.
self.placeholder_end = self.pattern.find('}')
self.pattern_len = len(self.pattern)
self.prefix = self.pattern[:self.placeholder_start] if self.placeholder_start != -1 else self.pattern
self.prefix_len = len(self.prefix)
if self.placeholder_end != -1 and <unsigned>self.placeholder_end < self.pattern_len:
self.suffix = self.pattern[self.placeholder_end+1:]
self.suffix_len = len(self.suffix)
else:
self.suffix = None
self.suffix_len = 0
if self.placeholder_start != -1 and self.placeholder_end != -1:
segment = self.pattern[self.placeholder_start:self.placeholder_end]
parts = segment.split(':')
if len(parts) == 2:
match_type_or_regex = parts[1]
if match_type_or_regex in MATCH_TYPES:
self.match_type = MATCH_TYPES.get(match_type_or_regex)
self.regex = PATTERNS.get(self.match_type)
else:
self.match_type = MATCH_REGEX
self.regex = match_type_or_regex
self.regex = f"^{self.prefix}({self.regex})"
else: # Flat string.
self.regex = f"^({self.pattern})"
self.match_type = 0 # Reset, in case of branching.
cdef signed int match(self, str path, signed int path_len, list params):
cdef:
signed int i = -1
signed int consume_until = path_len
str capture
# Flat match.
if not self.match_type:
if path.startswith(self.pattern):
return self.pattern_len
return -1
# Placeholder is not at the start (eg. "foo.{ext}").
if self.placeholder_start > 0:
if not self.prefix == path[:self.placeholder_start]:
return -1
if self.suffix:
if self.suffix_len > path_len:
return -1
consume_until = path_len - self.suffix_len
if self.match_type == MATCH_ALL or self.match_type == MATCH_ANY:
i = consume_until
elif self.match_type == MATCH_NOSLASH:
for i in range(self.placeholder_start, consume_until):
if path[i] == '/':
break
else:
i = consume_until
elif self.match_type == MATCH_ALPHA:
for i in range(self.placeholder_start, consume_until):
if not path[i].isalpha():
break
else:
i = consume_until
elif self.match_type == MATCH_DIGIT:
for i in range(self.placeholder_start, consume_until):
if not path[i].isdigit():
break
else:
i = consume_until
elif self.match_type == MATCH_ALNUM:
for i in range(self.placeholder_start, consume_until):
if not path[i].isalnum():
break
else:
i = consume_until
elif self.match_type == MATCH_NODASH:
for i in range(self.placeholder_start, consume_until):
if path[i] == '-':
break
else:
i = consume_until
if i == 0 and self.match_type != MATCH_ANY:
return -1
# Not all path matched, and edge is a leaf, makes no sense to
# consume a part of a path
if i != path_len and not self.child.edges and not self.suffix_len:
return -1
capture = path[self.placeholder_start:i]
if self.suffix_len:
# The placeholder is not at the end (eg. "{name}.json").
if path[i:i+self.suffix_len] != self.suffix:
return -1
i = i + self.suffix_len
params.append(capture) # Slow.
return i
cdef class Node:
cdef public dict payload
cdef public list edges
cdef public str path
cdef public object regex
cdef public str pattern
cdef public list slugs
cdef unsigned int slugs_count
SLUGS = re.compile('{([^:}]+).*?}')
def __cinit__(self):
self.payload = {}
def __repr__(self):
return f'<Node {self.path}|{self.pattern}>'
cdef void attach_route(self, str path, dict payload):
self.slugs = Node.SLUGS.findall(path)
self.slugs_count = len(self.slugs)
self.path = path
self.payload.update(payload)
cdef Edge connect(self, child, pattern):
cdef Edge edge
if not self.edges:
self.edges = []
for edge in self.edges:
if edge.pattern == pattern:
break
else:
edge = Edge(pattern=pattern, child=child)
self.edges.append(edge)
return edge
cdef Node common_edge(self, str path):
cdef:
Edge edge
Node node
if self.edges:
for edge in self.edges:
node = edge.join(path)
if node:
return node
cdef Edge match(self, str path, list params):
cdef:
signed int path_len = len(path)
signed int match_len
Edge edge
object matched
if self.edges:
if self.pattern: # Non optimizable branching.
matched = self.regex.match(path)
if matched:
edge = self.edges[matched.lastindex-1]
if edge.placeholder_start != -1: # Is the capture a slug value?
params.append(matched.group(matched.lastindex))
if matched.end() == path_len and not edge.child.pattern:
if edge.child.path:
return edge
else:
return edge.child.match(path[matched.end():], params)
else:
for edge in self.edges:
match_len = edge.match(path, path_len, params)
if match_len != -1:
if path_len == match_len and edge.child.path:
return edge
return edge.child.match(path[match_len:], params)
return None
cdef void compile(self):
cdef:
bool has_param = False
str pattern = ''
unsigned int total = 0
Edge edge
if self.edges:
total = len(self.edges)
for i, edge in enumerate(self.edges):
pattern += edge.regex
if edge.pattern.find('{') != -1 and edge.match_type == MATCH_REGEX:
has_param = True
if i + 1 < total:
pattern += '|'
# Run in regex mode only if we have a non optimizable pattern.
if has_param:
self.pattern = pattern
self.regex = re.compile(pattern)
cdef Node insert(self, str path):
cdef:
Node node
Edge edge = None
str prefix
int bound, end
unsigned int nb_slugs
node = self.common_edge(path)
if node:
return node
nb_slugs = path.count('{')
start = path.find('{')
if nb_slugs > 1:
# Break into parts
child = Node()
start = path.find('{', start + 1) # Goto the next one.
self.connect(child, path[:start])
return child.insert(path[start:])
else:
child = Node()
edge = self.connect(child, path)
if nb_slugs:
if edge.match_type == MATCH_REGEX: # Non optimizable, split if pattern has prefix or suffix.
if start > 0: # slug does not start at first char (eg. foo{slug})
edge.branch_at(start)
end = path.find('}')
if end + 1 < len(path): # slug does not end pattern (eg. {slug}foo)
edge.branch_at(end + 1)
return child
cdef class Routes:
cdef public Node root
def __cinit__(self):
self.root = Node()
def add(self, str path, **payload):
"""Add a new route
path: string describing the new route path
payload: any key/value that would be stored with the new route
"""
cdef Node node
if path.count('{') != path.count('}'):
raise InvalidRoute('Unbalanced curly brackets for "{path}"'.format(path=path))
node = self.root.insert(path)
node.attach_route(path, payload)
self.compile(self.root)
def match(self, str path):
"""Try to find a route that matches `path`, and return the payload if any."""
return self._match(path)
cdef tuple _match(self, str path):
cdef:
list values = []
dict params = {}
list slugs
unsigned int i
edge = self.root.match(path, values)
if edge:
slugs = edge.child.slugs
for i in range(edge.child.slugs_count):
params[slugs[i]] = values[i]
return edge.child.payload, params
return None, None
def dump(self):
dump(self.root)
cdef compile(self, Node node):
cdef:
Edge edge
node.compile()
if node.edges:
for edge in node.edges:
self.compile(edge.child)
cdef dump(node, level=0):
i = " " * level * 4
print(f'{i}(o)')
if node.pattern:
print(f'{i}| regexp: %s' % node.pattern)
if node.payload:
print(f'{i}| data: %s' % node.payload)
if node.path:
print(f'{i}| path: %s' % node.path)
print(f'{i}| slugs: %s' % node.slugs)
if node.edges:
for edge in node.edges:
if edge.match_type:
pattern = edge.prefix + edge.regex + edge.suffix or '' + f" ({edge.match_type})"
else:
pattern = edge.pattern
print(f'{i}' + '\--- %s' % pattern)
if edge.child:
dump(edge.child, level + 1)