-
Notifications
You must be signed in to change notification settings - Fork 17
/
xx.py
323 lines (309 loc) · 11.4 KB
/
xx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import sys
import hashlib
import argparse
parser = argparse.ArgumentParser(description="xx")
parser.add_argument('inFile', help='File to open')
parser.add_argument('-x', dest='dumpHex', help='Dump hex instead of writing file', action="store_true")
parser.add_argument('-o', dest='outFile', help='Output Filename')
parser.add_argument('-r', dest='rawOut', help='Dump buffer to stdout instead of writing file', action="store_true")
xxVersion = "0.5"
# Comments - The box drawing comments are generated when checking a token
asciiComments = [ "#", ";", "%", "|","\x1b", "-", "/" ]
twoCharComments = [ "--", "//", ]
filterList = [",","$","\\x","0x","h",":"," "]
# XXX: Add rest of the escape sequences, what do we support here?
escapes = {"n":"\n", "\\":"\\", "t":"\t", "r":"\r"} # List of escape sequences to be interpreted when parsing quoted strings
class xxToken:
"""
Class to hold an xx token.
This is a text element with certain attributes that need to be tracked.
"""
def __init__(self, inData, lineNum, isComment, isString):
# Data
self.lineNum = lineNum # The line number
self.rawData = inData # This is the raw token data. Save a copy of this and don't touch it.
self.rawDataLen = len(inData) # This is the length of the raw token data. Save a copy of this and don't touch it
self.normData = inData # This is where normalized data goes and is what is modified as the token is parsed
self.normDataLen = len(inData) # This will be modified if normData is modified
self.hexData = "" # This is the fully parsed hex data that is passed to the main buffer to output
# State
self.isString = isString # Is this token a string?
self.isAscii = 0 # Does this token contain ASCII data?
self.isComment = isComment # Is this token a comment?
self.hasComment = 0 # Does this token contain a comment?
def __str__(self):
byterepr = bytes(self.rawData, 'latin1')
return f"t\"{byterepr}\""
def __repr__(self):
byterepr = bytes(self.rawData, 'latin1')
return f"xxToken({byterepr}, lineNum={self.lineNum}, isComment={self.isComment})"
def testASCII(self):
"""
Tests if the token can be decoded as ASCII
"""
try:
if self.normData.encode('ascii'):
self.isAscii = 1
except:
return
def testComment(self):
"""
Test if a token either is a comment, or contains comments.
Some may be missed in the first pass so we double check here.
"""
if self.normDataLen > 0:
firstCharComment = testCharComment(self.normData[0]) # This tests the first char
if firstCharComment:
self.isComment = 1
return
else:
if self.normDataLen > 1:
if self.normData[0:2] in twoCharComments:
self.isComment = 1 # This means that the first two chars are a two char comment
return
# If you get here, it could still be a comment, but within a string.
# Example: 41414141#comment
if self.isAscii == 0:
cL = getCommentList()
tempString = self.normData
for comment in cL:
if comment in tempString:
tempString = tempString.split(comment)[0] # Split the comment away from this
self.hasComment = 1
self.normData = tempString
self.normDataLen = len(tempString)
else:
return 0
def testHexData(self):
"""
This attempts to decode the buffer as hex, if it succeeds, self.hexData is filled and self.isHex is set
"""
if self.isComment == 0 and self.isString == False:
tempData = filterIgnored(self.normData)
try:
testHex = bytes.fromhex(tempData)
if(len(testHex) != 0):
# If we pass a string containing whitespace to bytes.fromhex()
# it returns an empty bytes object. We have to fail that or we
# lose the whitespace
self.isHex = 1
self.hexData = tempData
self.normData = tempData
self.normDataLen = len(tempData)
except:
return
def getHexFromString(self):
"""
This takes double quote enclosed string data and converts it to hexData
"""
if self.isAscii and len(self.hexData) == 0:
if self.isComment == 0:
self.hexData = ascii2hex(self.normData)
def testBinary(self):
if self.isString == False:
if len(self.normData) == 10:
if self.normData[0:2] == "0y":
bindata = self.normData[2:]
for c in bindata:
if (c != "0") and (c != "1"):
return
self.normData = "{:02x}".format(int(bindata, 2))
################################################################################
def getTokenAttributes(inTok):
"""
Sets various token attributes
"""
inTok.testASCII()
inTok.testComment()
inTok.testBinary()
inTok.testHexData()
inTok.getHexFromString()
def getCommentList():
"""
This generates a list of all characters for comparison
"""
cList = []
for c in asciiComments:
cList.append(c)
for c in twoCharComments:
cList.append(c)
for c in range(9472,9633):
cList.append(chr(c))
return cList
def filterIgnored(inText):
"""
This function filters out ignored characters.
"""
for f in filterList:
inText = inText.replace(f,"")
return inText
def testCharComment(inChar):
"""
A generic comment tester, checks if the input character is a comment or not
"""
tCom = inChar
o = ord(tCom)
if (o >= 9472) and (o < 9632):
return 1
elif tCom in asciiComments:
return 1
else:
return 0
################################################################################
def ascii2hex(inString):
"""
Convert ASCII string to hex
"""
formatted = ""
for char in inString:
hex_char = format(ord(char), "02x")
formatted += hex_char
return formatted
def writeBin(outbuf,outfile):
"""
Writes the binary file
"""
#outfile = f"{file_name.split('.xx')[0]}.{h}.bin"
with open(outfile,'wb') as f:
f.write(outbuf)
print(outfile)
def dHex(inBytes):
"""
Does a simple hex dump, use yxd library later
"""
offs = 0
while offs < len(inBytes):
bHex = ""
bAsc = ""
bChunk = inBytes[offs:offs+16]
for b in bChunk:
bAsc += chr(b) if chr(b).isprintable() and b < 0x7F else '.'
bHex += "{:02x} ".format(b)
sp = " "*(48-len(bHex))
print("{:08x}: {}{} {}".format(offs, bHex, sp, bAsc))
offs = offs + 16
################################################################################
"""
inputs:
multilineComment: are we currently within a multi-line comment (initial value: False)
joinedLine: incremental fragments around multi-line comment (initial value: "")
line: the current line to filter
outputs:
multilineComment: updated accordingly
joinedLine: updated accordingly
lineResult: filtered line ready to consume, if mustContinue is False
mustContinue: if True, the caller must continue / loop to get a new line
"""
def filterMultLineComments(multilineComment, joinedLine, line):
lineResult = joinedLine
joinedLine = ""
mustContinue = False
while len(line) > 0:
if multilineComment:
if "*/" in line:
l = line.split("*/")
line = "*/".join(l[1:])
multilineComment = False
else:
joinedLine += lineResult
mustContinue = True
break
else:
if "/*" in line:
l = line.split("/*")
lineResult += l[0]
line = "/*".join(l[1:])
multilineComment = True
else:
lineResult += line
break
return multilineComment, joinedLine, lineResult, mustContinue
################################################################################
def tokenizeXX(xxline, lineNum):
# We cannot just split() the string, since it will corrupt repeated whitespace
# We have to interpret quoted string verbatim with no changes.
# XXX: newline.xx: Comment gets inserted into file; "\n" error
xxline = xxline.strip()
tokens = []
buf = ""
verbatim = False # Verbatim mode means we are interpreting data in a string
isEscape = False
isString = False
for c in xxline:
if c == "\\" and not isEscape and verbatim: # Interpret escape sequences
isEscape = True
continue
if isEscape:
# if an escape sequence is known then replace it, otherwise copy as is
if c in escapes:
buf += escapes[c]
else:
buf += "\\"
buf += c
isEscape = False
continue
if c == '"':
# When we find a quote, switch verbatim mode - this preserves
# whitespace and comment characters inside strings
verbatim = not verbatim
isString = True # This flag indicates that this buffer was a string
continue
if c == " " and not verbatim:
# We split, but only if we are not inside a string rn
if buf != "":
# Avoid creating empty tokens if spaces are repeated.
isComment = False
for k in asciiComments + twoCharComments:
if k in buf:
isComment = True
break
tokens.append(xxToken(buf, lineNum, isComment, isString))
isString = False
buf = ""
continue
buf += c
tokens.append(xxToken(buf, lineNum, False, isString)) # Append last token on EOL
return tokens
def parseXX(xxFile):
xxOut = b""
lineNum = 0
joinedLine = ""
multilineComment = False
for line in xxFile:
lineNum = lineNum + 1
multilineComment, joinedLine, line, mustContinue = filterMultLineComments(multilineComment, joinedLine, line)
if mustContinue:
continue
lineTokens = tokenizeXX(line, lineNum)
isComment = 0
linesHexData = ""
for t in lineTokens:
getTokenAttributes(t)
if t.isComment or t.hasComment:
isComment = 1
break
linesHexData += t.hexData
xxOut += bytes.fromhex(linesHexData)
return xxOut
if __name__ == '__main__':
args = parser.parse_args()
inFile = args.inFile
dumpHex = args.dumpHex
outFile = args.outFile
rawOut = args.rawOut
with open(inFile,"r") as f:
xxFileLines = f.readlines()
out = parseXX(xxFileLines)
if dumpHex:
dHex(out)
elif rawOut:
sys.stdout.buffer.write(out)
else:
m = hashlib.sha256()
m.update(out)
shorthash = m.digest().hex()[0:8]
if outFile:
filename = outFile
else:
filename = f"{inFile.split('.xx')[0]}.{shorthash}.bin"
writeBin(out,filename)