forked from cashgithubs/python-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
temp.py
36 lines (30 loc) · 952 Bytes
/
temp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from ms_spider_fw.DBSerivce import DBService
import json
from ms_spider_fw.CSVService import CSV
import re
import jieba
# connect_dict = {
# 'host': 'localhost',
# 'user': 'root',
# 'passwd': '',
# 'charset': 'utf8'
# }
db_server = DBService(dbName='platform_data', tableName='jd_comment_cellphone')
data = db_server.getData(var='comment_json',limit=10000) # distinct=True, limit=10000)
data = filter(lambda x: 1 if x[0][0] == '{' else 0, filter(lambda x: 1 if x[0] else 0, data))
re_sub_p = re.compile('<.+?>')
# extract_info from json string
def extract_info(x):
try:
d_t = json.loads(x[0])
d = d_t['comments']
return [
it.get("content").replace('\n', '')
for it in d
]
except:
return []
data = reduce(lambda x, y: x + y, map(extract_info, data))
_result='\n'.join(data)
with open('d:/spider/weibo/handle/jd_comment.txt','w')as f:
f.write(_result)