-
Notifications
You must be signed in to change notification settings - Fork 0
/
n2mw_parser.py
296 lines (187 loc) · 9.14 KB
/
n2mw_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
"""
This modules defines the class `N2MW_Parser`
> The class can be inherited on instanced to use its metods
> I has a main method: `identifyTag(input)` this one will match the input with one tag and return the function
> that will be able to parse that input, so the return function must be called passing the same argument as the identify function
> The rest of the functions are the parser functions
"""
import re
class N2MW_Parser():
def __init__(self, debug=False):
self.debuglevel = debug # Print debug messages
self.buffer = ["", None, ""] # This attr will be filled when we need some elements to support multiline children --> ["<closingPattern>", <callback>, "<buffer>"]
self.CHARSET_SYMBOLS = "!\.\-\_@#%\?=\/:<>,"
def identifyTag(self, input):
"""
Matches the input data with the corresponding function that parses it as a tag
`input`:(String) --> Data that will be matched as a MD object
RETURN:(function) --> Function that will parse the input MD object
"""
tags = { # All managed tags that can be parsed
"####\s": self.parseTitle4,
"###\s": self.parseTitle3,
"##\s": self.parseTitle2,
"#\s": self.parseTitle,
'(---|===)+': self.parseHr,
'>\s': self.parseQuote,
'```': self.parseCode,
'!\[': self.parseImg,
'\[': self.parseUrl,
'</?aside>': self.parseCallout,
'\*\*\w+': self.parseBold,
'\*\w+': self.parseItalic,
'`\w+': self.parseInlineCode,
'\-\s': self.parseUList,
'-->': self.parseTest,
}
if self.debuglevel: print('[*] DEBUG: identifyTag("', input,'") @ Buffer: ', self.buffer)
if len(input.strip()) and (self.buffer[0] == 0):
return None
if self.buffer[0] == "": # If there is no active buffer
for key,val in tags.items():
if self.debuglevel: print(' [i] DEBUG: identifyTag("', input,'") -->', re.match("\s*" + key, input))
if (re.match("\s*" + key, input) != None):
ret = val
break
else:
ret = self.parseParagraph
else:
if re.search(self.buffer[0] + "$", input): # Closing pattern matched
ret = self.buffer[1]
else: # Keep writting to the buffer
self.buffer[2] += '\n\n' + input
ret = None
return ret
def toggleBuffer(self, pattern, data, callback):
"""
Return the buffer if the closing pattern is matched
"""
if self.buffer[0] == '': # Buffer empty, start new one
self.buffer[0] = pattern
self.buffer[1] = callback
self.buffer[2] = data
if self.debuglevel: print(' [i] DEBUG: Start wrap', self.buffer)
return None
else:
self.buffer[0] = ""
self.buffer[1] = None
if self.debuglevel: print(' [i] DEBUG: Stop wrap', self.buffer)
return self.buffer[2] + '\n\n'
def wrapTag(self, tag, child, params=None, escape=False):
"""
Takes a tag and its child and create as list of the open tag (<Tags.tag>) and the close tag (</Tags.tag>)
@params (string): Props of the component
@escape (bool): Wrap the children inside of {``} to escape the content
"""
params = params if params else ""
if escape:
return [f"<Tags.{tag} {params}>" + "{`" , child.strip(), "`}" + f"</Tags.{tag}>"]
else:
return [f"<Tags.{tag} {params}>", child.strip(), f"</Tags.{tag}>"]
def extractTitle(self, data):
"""
Takes a string, and if it is longer than a single line, extrac the first line as a title parameter and returns a tuple
RETURN ('title="My first line"', 'The rest of my lines\nThis is a second line')
"""
data = data.strip().split('\n')
params = 'title="' + data[0] + '"'
data = "\n".join(data[1:])
return (params, data)
def parseFontStyle(self, string):
"""
Parse the inline font styles for Bold, Italic & Code
"""
if self.debuglevel: print('[*] DEBUG: parseFontStyle("', string,'")')
if string == None: return None
# Parse bold
matches = re.findall("\*\*([\w\s`]+)\*\*", string)
for entry in matches:
string = re.sub("\*\*"+entry+"\*\*", "<b>" + entry + "</b>", string)
string = re.sub("(\*\*)", "", string)
# Parse italic
matches = re.findall("\*([\w\s<>/`]+)\*", string)
for entry in matches:
string = re.sub("\*"+entry+"\*", "<i>" + entry + "</i>", string)
string = re.sub("(\*)", "", string)
# Parse inline code
matches = re.findall("`([\w\s<>/\*]+)`", string)
for entry in matches:
string = re.sub("`"+entry+"`", "<Tags.Code inline>" + entry + "</Tags.Code>", string)
string = re.sub("(`)", "", string)
return string
"""
Mardown object parsing functions bellow...
---------------------------------------------------------------------------------------------------
"""
def parseTitle4(self, data):
if self.debuglevel: print('[*] DEBUG: parseTitle3("', data,'")')
return self.wrapTag("Title3", data.strip(' #'))
def parseTitle3(self, data):
if self.debuglevel: print('[*] DEBUG: parseTitle3("', data,'")')
return self.wrapTag("Title3", data.strip(' #'))
def parseTitle2(self, data):
if self.debuglevel: print('[*] DEBUG: parseTitle2("', data,'")')
return self.wrapTag("Title2", data.strip(' #'))
def parseTitle(self, data):
if self.debuglevel: print('[*] DEBUG: parseTitle("', data,'")')
return self.wrapTag("Title", data.strip(' #'))
def parseHr(self, _):
if self.debuglevel: print('[*] DEBUG: parseHr()')
return ["<Hr />",]
def parseParagraph(self, data):
if self.debuglevel: print('[*] DEBUG: parseParagraph()')
return self.wrapTag("Paragraph", data)
def parseQuote(self, data):
if self.debuglevel: print('[*] DEBUG: parseQuote()')
data = "".join(re.findall("> ?([\w\s]+)\s?", data))
(params, data) = self.extractTitle(data)
return self.wrapTag("Quote", data, params)
def parseCode(self, data):
if self.debuglevel: print('[*] DEBUG: parseCode("', data, '")')
buffState = self.toggleBuffer("```", data.strip(" `"), self.parseCode)
if buffState:
children = buffState + data.strip(" `")
return self.wrapTag("Code", children, escape=True)
else:
return None
def parseImg(self, data):
if self.debuglevel: print('[*] DEBUG: parseImg()')
charsetAlt = f"\w\s{self.CHARSET_SYMBOLS}"
charsetUrl = f"\w\s{self.CHARSET_SYMBOLS}"
alt, src = re.search(f'!\[([{charsetAlt}]+)\]\(([{charsetUrl}]+)\)', data).groups()
return ['<Tags.Image alt={"' + alt + '"} img={"' + src + '"} />',]
def parseUrl(self, data):
if self.debuglevel: print('[*] DEBUG: parseUrl()')
charsetAlt = f"\w\s{self.CHARSET_SYMBOLS}"
charsetUrl = f"\w\s{self.CHARSET_SYMBOLS}"
title, src = re.search(f'\[([{charsetAlt}]+)\]\(([{charsetUrl}]+)\)', data).groups()
return ['<Tags.Url title={"' + title + '"} src={"' + src + '"} />',]
def parseCallout(self, data):
if self.debuglevel: print('[*] DEBUG: parseCallout()', data)
data = re.sub("</?aside>", "", data)
buffState = self.toggleBuffer("</aside>", data, self.parseCallout)
if buffState: # If buffer is closed
return self.wrapTag("Callout", buffState+data)
else: # If buffer is still opened
return None
def parseBold(self, data):
if self.debuglevel: print('[*] DEBUG: parseBold()')
return ["<b>", re.search("\*\*([\w\s\(\)]+)\*\*", data).groups()[0], "</b>"]
def parseItalic(self, data):
if self.debuglevel: print('[*] DEBUG: parseItalic()')
return ["<i>", re.search("\*([\w\s\(\)]+)\*", data).groups()[0], "</i>"]
def parseInlineCode(self, data):
if self.debuglevel: print('[*] DEBUG: parseInlineCode()')
data = re.search(f"`([\w\s{self.CHARSET_SYMBOLS}]+)`", data).groups()[0]
return self.wrapTag("Code", data, "inline")
def parseUList(self, data):
if self.debuglevel: print('[*] DEBUG: parseUList()')
itemList = data.strip().split("\n")
stripFunc = lambda s: '"' + s.strip(' -') + '"'
parsedList = map(stripFunc, itemList)
itemsStr = ", ".join(parsedList)
itemsListStr = "{" + f"[{itemsStr}]" + "}"
return [f"<Tags.UList items={itemsListStr} />"]
def parseTest(self, data):
if self.debuglevel: print('[*] DEBUG: parseTest()')
self.identifyTag(data.replace("-->", ""))