diff --git a/test/TestOne.py b/test/TestOne.py
deleted file mode 100644
index 781b0c5..0000000
--- a/test/TestOne.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import unittest
-
-
-class TestOne(unittest.TestCase):
- def test_for_fun(self):
- pass
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/test/TestTools.py b/test/TestTools.py
new file mode 100644
index 0000000..979659e
--- /dev/null
+++ b/test/TestTools.py
@@ -0,0 +1,60 @@
+import unittest
+from nose.tools import assert_raises, assert_equal
+
+from lxml import etree
+from wechatsogou.tools import (
+ list_or_empty,
+ get_elem_text,
+ replace_html,
+ str_to_dict,
+ replace_space,
+ get_url_param
+)
+
+
+class TestTools(unittest.TestCase):
+ def test_list_or_empty(self):
+ with assert_raises(AssertionError):
+ list_or_empty('test for fun')
+
+ assert_equal(list_or_empty(['1', '2'], int), 1)
+ assert_equal(list_or_empty(['1', '2']), '1')
+ assert_equal(list_or_empty([], int), 0)
+ assert_equal(list_or_empty([], str), '')
+ assert_equal(list_or_empty([], list), [])
+
+ def test_get_elem_text(self):
+ html = '''
+
+ '''
+ elem = etree.HTML(html)
+ assert_equal(get_elem_text(elem), '111222')
+
+ def test_replace_html(self):
+ html = ''''"&¥amp;<> \\'''
+ assert_equal(replace_html(html), '\'"&¥<> ')
+
+ html = [''', '"', '&', '¥', 'amp;', '<', '>', ' ', '\\']
+ assert_equal(replace_html(html), ['\'', '"', '&', '¥', '', '<', '>', ' ', ''])
+
+ html = {''': '"'}
+ assert_equal(replace_html(html), {'\'': '"'})
+
+ def test_str_to_dict(self):
+ string = "{'a':'a'}"
+ assert_equal(str_to_dict(string), {'a': 'a'})
+
+ def test_replace_space(self):
+ string = 'ss ss'
+ assert_equal(replace_space(string), 'ssss')
+
+ def test_get_url_param(self):
+ url = 'http://example.com?a=1&b=2&a=3'
+ assert_equal(get_url_param(url), {'a': ['1', '3'], 'b': ['2']})
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/wechatsogou/basic.py b/wechatsogou/basic.py
index db3b834..e260fbf 100644
--- a/wechatsogou/basic.py
+++ b/wechatsogou/basic.py
@@ -4,6 +4,7 @@
import time
import random
import logging
+from builtins import input
import requests
from lxml import etree
@@ -43,8 +44,7 @@ def readimg(content):
from .ruokuaicode import RClient
from .filecache import WechatCache
from .tools import (
- input,
- replace_all,
+ replace_html,
replace_space,
get_encoding_from_reponse
)
@@ -328,7 +328,7 @@ def _get_gzh_article_by_url_dict(self, text):
msglist = re.findall("var msgList =(.+?)};", text, re.S)[0]
msglist = msglist + '}'
msgdict = eval(msglist)
- msgdict = replace_all(msgdict)
+ msgdict = replace_html(msgdict)
return msgdict
def _deal_gzh_article_dict(self, msgdict, **kwargs):
diff --git a/wechatsogou/tools.py b/wechatsogou/tools.py
index 7fe2771..4abffb7 100644
--- a/wechatsogou/tools.py
+++ b/wechatsogou/tools.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-import json
+import ast
import requests
@@ -10,29 +10,23 @@
import urllib.parse as url_parse
-def prdict(content):
- msg = json.dumps(content, indent=1, ensure_ascii=False)
- print(msg)
-
-
def list_or_empty(content, contype=None):
- if isinstance(content, list):
- if content:
- return contype(content[0]) if contype else content[0]
- else:
- if contype:
- if contype == int:
- return 0
- elif contype == str:
- return ''
- elif contype == list:
- return []
- else:
- raise Exception('only cna deal int str list')
- else:
- return ''
+ assert isinstance(content, list), 'content is not list: {}'.format(content)
+
+ if content:
+ return contype(content[0]) if contype else content[0]
else:
- raise Exception('need list')
+ if contype:
+ if contype == int:
+ return 0
+ elif contype == str:
+ return ''
+ elif contype == list:
+ return []
+ else:
+ raise Exception('only can deal int str list')
+ else:
+ return ''
def get_elem_text(elem):
@@ -44,10 +38,7 @@ def get_elem_text(elem):
Returns:
elem中文字
"""
- rc = []
- for node in elem.itertext():
- rc.append(node.strip())
- return ''.join(rc)
+ return ''.join([node.strip() for node in elem.itertext()])
def get_encoding_from_reponse(r):
@@ -63,7 +54,7 @@ def get_encoding_from_reponse(r):
return encoding[0] if encoding else requests.utils.get_encoding_from_headers(r.headers)
-def _replace_html(s):
+def _replace_str_html(s):
"""替换html‘"’等转义内容为正常内容
Args:
@@ -72,68 +63,42 @@ def _replace_html(s):
Returns:
s: 处理反转义后的文字
"""
- s = s.replace(''', '\'')
- s = s.replace('"', '"')
- s = s.replace('&', '&')
- s = s.replace('>', '>')
- s = s.replace('<', '<')
- s = s.replace('¥', '¥')
- s = s.replace('amp;', '')
- s = s.replace('<', '<')
- s = s.replace('>', '>')
- s = s.replace(' ', ' ')
- s = s.replace('\\', '')
+ html_str_list = [
+ (''', '\''),
+ ('"', '"'),
+ ('&', '&'),
+ ('¥', '¥'),
+ ('amp;', ''),
+ ('<', '<'),
+ ('>', '>'),
+ (' ', ' '),
+ ('\\', '')
+ ]
+ for i in html_str_list:
+ s = s.replace(i[0], i[1])
return s
-def _replace_dict(dicts):
- retu_dict = dict()
- for k, v in dicts.items():
- retu_dict[replace_all(k)] = replace_all(v)
- return retu_dict
-
-
-def _replace_list(lists):
- retu_list = list()
- for l in lists:
- retu_list.append(replace_all(l))
- return retu_list
-
-
-def replace_all(data):
+def replace_html(data):
if isinstance(data, dict):
- return _replace_dict(data)
+ return dict([(replace_html(k), replace_html(v)) for k, v in data.items()])
elif isinstance(data, list):
- return _replace_list(data)
+ return [replace_html(l) for l in data]
elif isinstance(data, str):
- return _replace_html(data)
+ return _replace_str_html(data)
else:
return data
def str_to_dict(json_str):
- json_dict = eval(json_str)
- return replace_all(json_dict)
+ json_dict = ast.literal_eval(json_str)
+ return replace_html(json_dict)
def replace_space(s):
- s = s.replace(' ', '')
- s = s.replace('\r\n', '')
- return s
+ return s.replace(' ', '').replace('\r\n', '')
def get_url_param(url):
result = url_parse.urlparse(url)
return url_parse.parse_qs(result.query, True)
-
-
-def input(msg=''):
- try:
- return raw_input(msg)
- except NameError:
- return input(msg)
-
-
-if __name__ == '__main__':
- aa = list_or_empty(['list'])
- print(aa, type(aa))