-
Notifications
You must be signed in to change notification settings - Fork 0
/
feedparser_debug.py
106 lines (97 loc) · 2.69 KB
/
feedparser_debug.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import feedparser, cgi, textwrap
html = """
<html>
<head>
<style>
body {
margin: 30px;
}
table {
border: solid 1px;
}
.bozo {
background: pink;
padding: 2px;
margin-bottom: 15px;
}
.type {
color: gray;
font-size: 11px;
}
</style>
</head>
<body>
%s
</body>
</html>
"""
table = """
<table cellspacing=2 cellpadding=2>
%s
</table>
"""
def bozo(m):
return '<div class="bozo">%s</div>' % m
def tr(k, v, typ, descend_path):
if descend_path:
tooltip = descend_path[0]
for x in descend_path[1:] + [k]:
if x.startswith('['):
tooltip += x
else:
tooltip += '.' + x
tooltip = 'title="%s | %s"' % (
tooltip,
typ.__name__)
else:
tooltip = ''
return ('<tr><td %s valign=top>%s<br/><span class="type">%s</span>' +
'</td><td valign=top>%s</td>') % (
tooltip,
k,
typ.__name__,
v)
def enumerate_seq(seq, sorted_keys=None):
if isinstance(seq, list):
for idx, val in enumerate(seq):
yield '[%s]' % idx, val
elif isinstance(seq, (feedparser.FeedParserDict, dict)):
for attr in (sorted_keys or sorted(seq.keys())):
try:
val = getattr(seq, attr)
except AttributeError:
val = seq[attr]
yield attr, val
def htmlize(obj, sorted_keys=None, descend_path=[]):
if isinstance(obj, (feedparser.FeedParserDict, dict, list)):
res = []
for attr, val in enumerate_seq(obj, sorted_keys):
res.append(tr(attr,
htmlize(val, descend_path=descend_path+[attr]),
type(val),
descend_path))
return table % '\n'.join(res)
else:
#if isinstance(obj, unicode):
# obj = obj.encode('ascii', 'xmlcharrefreplace')
escaped = cgi.escape(repr(obj))
wrapped = textwrap.wrap(escaped)
link = ''
if isinstance(obj, (unicode, str)):
if obj.startswith('http://') or obj.startswith('https://'):
if len(wrapped) < 200:
link = ' <a href="%s">[link]</a>' % obj
return '\n<br/>'.join(wrapped) + link
def main(url):
f = feedparser.parse(url)
keys = sorted(f.keys())
keys.remove('entries')
keys.append('entries') # place it last
sub = htmlize(f, keys, ['f'])
if f.bozo:
sub = bozo(str(f.bozo_exception)) + sub
h = html % sub
return h
if __name__ == '__main__':
h = main('http://defcraft.blogspot.com/feeds/posts/default')
open('test.html', 'w').write(h)