-
Notifications
You must be signed in to change notification settings - Fork 7
/
parse.py
499 lines (450 loc) · 20.3 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
# MIT License
# Copyright (c) 2019 Thomas Zhu
# TODO: Support older versions of Python by not using f-strings
# TODO: Support writing to .DS_Stores
# TODO: Documentation
# TODO: macOS alias
import datetime
import plistlib
import sys
import warnings
with open('README.md') as readme:
__doc__ = readme.read()
# The Python types used for the different .DS_Store types:
# 'bool': bool
# 'shor': int
# 'long': int
# 'comp': int
# 'dutc': int
# 'type': str (of length 4)
# 'blob': bytes
# 'ustr': str
def show_date(timestamp):
date = (datetime.datetime.fromtimestamp(timestamp)
- datetime.datetime.fromtimestamp(0)
+ datetime.datetime(1904, 1, 1))
return date.strftime('%B %-d, %Y at %-I:%M %p')
def show_bytes(data):
if data.startswith(b'bplist') and data[6:8].isdecimal():
return show(plistlib.loads(data))
elif data.startswith(b'book'):
# TODO
return f'(in macOS alias type, unparsed) {data!r}'
elif data.startswith(b'Bud1'):
return show('\n'.join(DSStore(b'\x00\x00\x00\x01'
+ data).human_readable()))
else:
return f'0x{data.hex()}'
def is_inline(data):
return not isinstance(data, (dict, tuple, list))
def show(data, tab_depth=0):
tabs = '\t' * tab_depth
if isinstance(data, dict):
for key, value in data.items():
if is_inline(value):
yield f'{tabs}{key}: {show_one(value)}'
else:
yield f'{tabs}{key}:'
yield from show(value, tab_depth=tab_depth + 1)
elif isinstance(data, (tuple, list)):
for value in data:
if is_inline(value):
yield f'{tabs}- {show_one(value)}'
else:
yield f'{tabs}-'
yield from show(value, tab_depth=tab_depth + 1)
elif isinstance(data, bytes):
yield f'{tabs}{show_bytes(data)}'
elif isinstance(data, (bool, int, str)):
yield f'{tabs}{data!s}'
else:
yield f'{tabs}{data!r}'
def show_one(data):
return next(show(data))
class Record:
def __init__(self, name, *args, **kwargs):
self.name = name
self.fields = dict(*args, **kwargs)
def update(self, *args, **kwargs):
self.fields.update(*args, **kwargs)
def __repr__(self):
kwargs = "".join(f", {key}={value!r}"
for key, value in self.fields.items())
return (f'Record({self.name!r}{kwargs})')
def validate_type(self, field, data, data_type, *acceptable_lengths):
if not isinstance(data, data_type):
raise TypeError(f'{self} {field} {data!r} not of type {data_type}')
if acceptable_lengths and len(data) not in acceptable_lengths:
warnings.warn(f'{self} {field} {show_one(data)} not of length'
f' {" or ".join(acceptable_lengths)}')
def human_readable(self):
# TODO: interpret the parsed plists
for field, data in self.fields.items():
if field == 'BKGD':
# BKGD supplanted by TODO in later versions
self.validate_type(field, data, bytes, 12)
background_type = data[:4].decode('ascii')
if background_type == 'DefB':
yield 'Background: Default'
elif background_type == 'ClrB':
hex_color = data[4:10].hex()
yield f'Background: Color #{hex_color}'
elif background_type == 'PctB':
yield f'Background: Picture, see "Picture" field'
else:
warnings.warn('Unrecognized background type'
f' {background_type}')
yield f'Background (unrecognized): {show_one(data)}'
elif field == 'GRP0':
self.validate_type(field, data, str)
yield f'{field} (unknown): {data}'
elif field == 'ICVO':
self.validate_type(field, data, bool)
yield f'{field} (unknown): {data}'
elif field == 'Iloc':
self.validate_type(field, data, bytes, 16)
x = int.from_bytes(data[0:4], 'big', signed=False)
y = int.from_bytes(data[4:8], 'big', signed=False)
# Don't know what data[8:16] is for, but it's variable
rest = data[8:16]
yield f'Icon location: x {x}px, y {y}px, {show_one(rest)}'
elif field == 'LSVO':
self.validate_type(field, data, bool)
yield f'{field} (unknown): {data}'
elif field == 'bwsp':
self.validate_type(field, data, bytes)
yield 'Layout property list:'
yield from show(plistlib.loads(data), tab_depth=1)
elif field == 'cmmt':
self.validate_type(field, data, str)
yield f'Comments: {data}'
elif field == 'dilc':
self.validate_type(field, data, bytes, 32)
x = int.from_bytes(data[16:20], 'big', signed=False)
y = int.from_bytes(data[20:24], 'big', signed=False)
# They appear to be percentages with 0.001 accuracy
x /= 1000
y /= 1000
# Don't know what data[0:16] and data[24:32] are for
before = data[0:16]
after = data[24:32]
yield (f'Icon location on desktop: x {x}%, y {y}%'
f', {show_one(before)}, {show_one(after)}')
elif field == 'dscl':
self.validate_type(field, data, bool)
yield f'Open in list view: {data}'
elif field == 'extn':
self.validate_type(field, data, str)
yield f'Extension: {data}'
elif field == 'fwi0':
# fwi0 somewhat supplanted by vstl, bwsp, lsvp, lsvP later
self.validate_type(field, data, bytes, 16)
yield 'Finder window information:'
top = int.from_bytes(data[0:2], 'big', signed=False)
left = int.from_bytes(data[2:4], 'big', signed=False)
bottom = int.from_bytes(data[4:6], 'big', signed=False)
right = int.from_bytes(data[6:8], 'big', signed=False)
yield (f'\tWindow rectangle: top {top}, left {left}, bottom'
f' {bottom}, right {right}')
# Coverflow view not in Mojave now
# Similarly there's no Gallery view back then
views = {'icnv': 'Icon view',
'clmv': 'Column view',
'Nlsv': 'List view',
'Flwv': 'Coverflow view'}
view_raw = data[8:12].decode('ascii')
view = views.get(view_raw, f'(unrecognized) {view_raw}')
yield f'View style (might be overtaken): {view}'
# Don't know what data[12:16] is for
yield f'{show_one(data[12:16])}'
elif field == 'fwsw':
self.validate_type(field, data, int)
yield f'Finder window sidebar width: {data}'
elif field == 'fwvh':
self.validate_type(field, data, int)
yield ('Finder window vertical height (overrides Finder window'
f' information): {data}')
elif field == 'icgo':
self.validate_type(field, data, bytes, 8)
yield f'{field} (unknown): {show_one(data)}'
elif field == 'icsp':
self.validate_type(field, data, bytes, 8)
yield f'{field} (unknown): {show_one(data)}'
elif field == 'icvo':
# icvo supplanted by icvp in later versions
self.validate_type(field, data, bytes)
yield 'Icon view options:'
icvo_type = data[0:4].decode('ascii')
arranges = {'none': 'None', 'grid': 'Snap to Grid'}
labels = {'botm': 'Bottom', 'rght': 'Right'}
if icvo_type == 'icvo':
self.validate_type(field, data, bytes, 18)
flags = data[4:12]
size = int.from_bytes(data[12:14], 'big', signed=False)
arrange_raw = data[14:18].decode('ascii')
arrange = arranges.get(arrange_raw,
f'(unknown) {arrange_raw}')
yield f'\tFlags (?): {show_one(flags)}'
yield f'\tSize: {size}px'
yield f'\tKeep arranged by: {arrange}'
elif icvo_type == 'icv4':
self.validate_type(field, data, bytes, 26)
size = int.from_bytes(data[4:6], 'big', signed=False)
arrange_raw = data[6:10].decode('ascii')
arrange = arranges.get(arrange_raw,
f'(unknown) {arrange_raw}')
label_raw = data[10:14].decode('ascii')
label = labels.get(label_raw, f'(unknown) {label_raw}')
flags = data[14:26]
info = bool(flags[1] & 0x01)
preview = bool(flags[11] & 0x01)
yield f'\tSize: {size}px'
yield f'\tKeep arranged by: {arrange}'
yield f'\tLabel position: {label}'
yield '\tFlags (partially known):'
yield f'\t\tRaw flags: {show_one(flags)}'
yield f'\t\tShow item info: {info}'
yield f'\t\tShow icon preview: {preview}'
else:
warnings.warn('Unrecognized icon view options type'
f' {icvo_type}')
yield f'\t(unrecognized): {show_one(data)}'
elif field == 'icvp':
self.validate_type(field, data, bytes)
yield 'Icon view property list:'
yield from show(plistlib.loads(data), tab_depth=1)
elif field == 'info':
self.validate_type(field, data, bytes)
yield f'{field} (unknown): {show_one(data)}'
elif field in {'logS', 'lg1S'}:
# logS supplanted by lg1S for unknown reasons
self.validate_type(field, data, int)
yield f'Logical size: {data}B'
elif field == 'lssp':
self.validate_type(field, data, bytes, 8)
yield (f'{field} (unknown, List view scroll position?):'
f' {show_one(data)}')
elif field == 'lsvC':
self.validate_type(field, data, bytes)
yield 'List view properties, alternative:'
yield from show(plistlib.loads(data), tab_depth=1)
elif field == 'lsvP':
self.validate_type(field, data, bytes)
yield 'List view properties, other alternative:'
yield from show(plistlib.loads(data), tab_depth=1)
elif field == 'lsvo':
# lsvo supplanted by lsvp / lsvP
self.validate_type(field, data, bytes, 76)
yield f'List view options (format unknown): {show_one(data)}'
elif field == 'lsvp':
self.validate_type(field, data, bytes)
yield 'List view properties:'
yield from show(plistlib.loads(data), tab_depth=1)
elif field == 'lsvt':
# lsvt supplanted by lsvp / lsvP
self.validate_type(field, data, int)
yield f'List view text size: {data}pt'
# Following 2 may appear at the same time, but difference unknown
# They were originally dutc, but now they use blob
# When dutc, it's the number of 1 / 65536 seconds from 1904
# Otherwise, it's TODO
elif field == 'moDD':
self.validate_type(field, data, (int, bytes))
if isinstance(data, int):
date = data / 65536
yield f'Modification date: {show_date(date)}'
elif isinstance(data, bytes):
# Little endian for some reason
date = int.from_bytes(data, 'little')
yield ('Modification date (timestamp, format unknown:'
f' {date}')
elif field == 'modD':
self.validate_type(field, data, (int, bytes))
if isinstance(data, int):
date = data / 65536
yield f'Modification date, alternative: {show_date(date)}'
elif isinstance(data, bytes):
# Little endian for some reason
date = int.from_bytes(data, 'little')
yield ('Modification date, alternative (timestamp, format'
f' unknown): {date}')
elif field in {'ph1S', 'phyS'}:
# phyS supplanted by ph1S for unknown reasons
self.validate_type(field, data, int)
yield f'Physical size: {data}B'
elif field == 'pict':
# pict, with BKGD, supplanted by TODO in later versions
# pict in format of Apple Finder alias
yield f'Picture: {show_one(data)}'
elif field == 'vSrn':
self.validate_type(field, data, int)
yield f'{field} (unknown): {data}'
elif field == 'vstl':
self.validate_type(field, data, str)
# Coverflow view not in Mojave now
# Similarly there's no Gallery view back then
views = {'icnv': 'Icon view',
'clmv': 'Column view',
'glyv': 'Gallery view',
'Nlsv': 'List view',
'Flwv': 'Coverflow view'}
view = views.get(data, f'(unrecognized) {data}')
yield f'View style: {view}'
else:
yield f'{field} (unrecognized): {data!r}'
class DSStore:
def __init__(self, content):
self.cursor = 0
self.content = content
self.records = []
self.parse()
def read(self):
return self.records
def next_byte(self):
data = content[self.cursor]
self.cursor += 1
return data
def next_bytes(self, n):
data = content[self.cursor:self.cursor + n]
self.cursor += n
return data
def next_uint32(self):
data = int.from_bytes(self.next_bytes(4), 'big', signed=False)
return data
def next_uint64(self):
data = int.from_bytes(self.next_bytes(8), 'big', signed=False)
return data
def parse_header(self):
# Alignment int
alignment = self.next_uint32()
if alignment != 0x00000001:
warnings.warn(f'Alignment int {hex(alignment)} not 0x00000001')
# Magic bytes
magic = self.next_uint32()
if magic != 0x42756431:
warnings.warn(f'Magic bytes {hex(magic)} not 0x42756431 (Bud1)')
# Buddy allocator position & length
# 0x4 for the alignment int
self.allocator_offset = 0x4 + self.next_uint32()
self.allocator_length = self.next_uint32()
allocator_offset_repeat = 0x4 + self.next_uint32()
if allocator_offset_repeat != self.allocator_offset:
warnings.warn(f'Allocator offsets {hex(self.allocator_offset)} and'
f' {hex(allocator_offset_repeat)} unequal')
def parse_allocator(self):
self.cursor = self.allocator_offset
# Offsets
num_offsets = self.next_uint32()
second = self.next_uint32()
if second != 0:
warnings.warn(f'Second int of allocator {hex(second)}'
' not 0x00000000')
self.offsets = [self.next_uint32() for _ in range(num_offsets)]
self.cursor = self.allocator_offset + 0x408
# Table of contents
self.directory = {}
num_keys = self.next_uint32()
for _ in range(num_keys):
key_length = self.next_byte()
key = self.next_bytes(key_length).decode('ascii')
self.directory[key] = self.next_uint32()
if key != 'DSDB':
warnings.warn(f"Directory contains non-'DSDB' key {key!r} and"
f' value {hex(self.directory[key])}')
# Master node ID & offset
if 'DSDB' not in self.directory:
raise ValueError("Key 'DSDB' not found in table of contents")
self.master_id = self.directory['DSDB']
# Free list
self.freelist = {}
for i in range(32):
values_length = self.next_uint32()
self.freelist[1 << i] = [self.next_uint32()
for _ in range(values_length)]
def parse_tree(self, *, node_id=None):
# The master node points to the root node and contains metadata
# The B-tree contains nodes, which contain records of file properties
# or nodes
if node_id is None:
master = True
node_id = self.master_id
else:
master = False
offset_and_size = self.offsets[node_id]
self.cursor = 0x4 + (offset_and_size >> 0x5 << 0x5)
# node size is 1 << (offset_and_size & 0x1f) TODO VALIDATE
if master:
# Master node
self.root_id = self.next_uint32()
self.tree_height = self.next_uint32()
self.num_records = self.next_uint32()
self.num_nodes = self.next_uint32()
fifth = self.next_uint32() # TODO: tree node page size?
if fifth != 0x00001000:
warnings.warn(f'Fifth int of master {hex(fifth)}'
' not 0x00001000')
self.parse_tree(node_id=self.root_id)
else:
next_id = self.next_uint32()
num_records = self.next_uint32()
for _ in range(num_records):
if next_id:
# Has children
child_id = self.next_uint32()
current_cursor = self.cursor
self.parse_tree(node_id=child_id)
self.cursor = current_cursor
name_length = self.next_uint32()
name = self.next_bytes(name_length * 2).decode('utf-16be')
field = self.next_bytes(4).decode('ascii')
data = self.parse_data()
for record in self.records:
if record.name == name:
record.update({field: data})
break
else:
self.records.append(Record(name, {field: data}))
if next_id:
self.parse_tree(node_id=next_id)
def parse_data(self):
data_type = self.next_bytes(4).decode('ascii')
if data_type == 'bool':
return bool(self.next_byte() & 0x01)
elif data_type in {'shor', 'long'}:
# short is also 4, with 2 0x00 bytes padding, for some reason
return self.next_uint32()
elif data_type == 'comp':
return self.next_uint64()
elif data_type == 'dutc':
return self.next_uint64()
elif data_type == 'type':
return self.next_bytes(4).decode('ascii')
elif data_type == 'blob':
data_length = self.next_uint32()
return self.next_bytes(data_length)
elif data_type == 'ustr':
data_length = self.next_uint32()
return self.next_bytes(2 * data_length).decode('utf-16be')
else:
raise NotImplementedError(f'Unrecognized data type {data_type}')
def parse(self):
self.parse_header()
self.parse_allocator()
self.parse_tree()
if __name__ == '__main__':
if len(sys.argv) == 2:
filename = sys.argv[1]
elif len(sys.argv) == 1:
print(f'File unspecified. Use python3 {sys.argv[0]} <.DS_Store file>'
' to specify file. Defaulting to .DS_Store in the current'
' directory...', file=sys.stderr)
filename = '.DS_Store'
else:
print(f'Usage: python3 {sys.argv[0]} <.DS_Store file>')
with open(filename, 'rb') as file:
content = file.read()
ds_store = DSStore(content)
for record in ds_store.read():
print(record.name)
for description in record.human_readable():
print(f'\t{description}')