Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tools test #68

Merged
merged 7 commits into from
May 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions test/TestOne.py

This file was deleted.

60 changes: 60 additions & 0 deletions test/TestTools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import unittest
from nose.tools import assert_raises, assert_equal

from lxml import etree
from wechatsogou.tools import (
list_or_empty,
get_elem_text,
replace_html,
str_to_dict,
replace_space,
get_url_param
)


class TestTools(unittest.TestCase):
def test_list_or_empty(self):
with assert_raises(AssertionError):
list_or_empty('test for fun')

assert_equal(list_or_empty(['1', '2'], int), 1)
assert_equal(list_or_empty(['1', '2']), '1')
assert_equal(list_or_empty([], int), 0)
assert_equal(list_or_empty([], str), '')
assert_equal(list_or_empty([], list), [])

def test_get_elem_text(self):
html = '''
<div>
<div>111</div>
<div>222</div>
</div>
'''
elem = etree.HTML(html)
assert_equal(get_elem_text(elem), '111222')

def test_replace_html(self):
html = '''&#39;&quot;&amp;&yen;amp;&lt;&gt;&nbsp;\\'''
assert_equal(replace_html(html), '\'"&¥<> ')

html = ['&#39;', '&quot;', '&amp;', '&yen;', 'amp;', '&lt;', '&gt;', '&nbsp;', '\\']
assert_equal(replace_html(html), ['\'', '"', '&', '¥', '', '<', '>', ' ', ''])

html = {'&#39;': '&quot;'}
assert_equal(replace_html(html), {'\'': '"'})

def test_str_to_dict(self):
string = "{'a':'a'}"
assert_equal(str_to_dict(string), {'a': 'a'})

def test_replace_space(self):
string = 'ss ss'
assert_equal(replace_space(string), 'ssss')

def test_get_url_param(self):
url = 'http://example.com?a=1&b=2&a=3'
assert_equal(get_url_param(url), {'a': ['1', '3'], 'b': ['2']})


if __name__ == '__main__':
unittest.main()
6 changes: 3 additions & 3 deletions wechatsogou/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import time
import random
import logging
from builtins import input

import requests
from lxml import etree
Expand Down Expand Up @@ -43,8 +44,7 @@ def readimg(content):
from .ruokuaicode import RClient
from .filecache import WechatCache
from .tools import (
input,
replace_all,
replace_html,
replace_space,
get_encoding_from_reponse
)
Expand Down Expand Up @@ -328,7 +328,7 @@ def _get_gzh_article_by_url_dict(self, text):
msglist = re.findall("var msgList =(.+?)};", text, re.S)[0]
msglist = msglist + '}'
msgdict = eval(msglist)
msgdict = replace_all(msgdict)
msgdict = replace_html(msgdict)
return msgdict

def _deal_gzh_article_dict(self, msgdict, **kwargs):
Expand Down
111 changes: 38 additions & 73 deletions wechatsogou/tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

import json
import ast

import requests

Expand All @@ -10,29 +10,23 @@
import urllib.parse as url_parse


def prdict(content):
msg = json.dumps(content, indent=1, ensure_ascii=False)
print(msg)


def list_or_empty(content, contype=None):
if isinstance(content, list):
if content:
return contype(content[0]) if contype else content[0]
else:
if contype:
if contype == int:
return 0
elif contype == str:
return ''
elif contype == list:
return []
else:
raise Exception('only cna deal int str list')
else:
return ''
assert isinstance(content, list), 'content is not list: {}'.format(content)

if content:
return contype(content[0]) if contype else content[0]
else:
raise Exception('need list')
if contype:
if contype == int:
return 0
elif contype == str:
return ''
elif contype == list:
return []
else:
raise Exception('only can deal int str list')
else:
return ''


def get_elem_text(elem):
Expand All @@ -44,10 +38,7 @@ def get_elem_text(elem):
Returns:
elem中文字
"""
rc = []
for node in elem.itertext():
rc.append(node.strip())
return ''.join(rc)
return ''.join([node.strip() for node in elem.itertext()])


def get_encoding_from_reponse(r):
Expand All @@ -63,7 +54,7 @@ def get_encoding_from_reponse(r):
return encoding[0] if encoding else requests.utils.get_encoding_from_headers(r.headers)


def _replace_html(s):
def _replace_str_html(s):
"""替换html‘&quot;’等转义内容为正常内容

Args:
Expand All @@ -72,68 +63,42 @@ def _replace_html(s):
Returns:
s: 处理反转义后的文字
"""
s = s.replace('&#39;', '\'')
s = s.replace('&quot;', '"')
s = s.replace('&amp;', '&')
s = s.replace('&gt;', '>')
s = s.replace('&lt;', '<')
s = s.replace('&yen;', '¥')
s = s.replace('amp;', '')
s = s.replace('&lt;', '<')
s = s.replace('&gt;', '>')
s = s.replace('&nbsp;', ' ')
s = s.replace('\\', '')
html_str_list = [
('&#39;', '\''),
('&quot;', '"'),
('&amp;', '&'),
('&yen;', '¥'),
('amp;', ''),
('&lt;', '<'),
('&gt;', '>'),
('&nbsp;', ' '),
('\\', '')
]
for i in html_str_list:
s = s.replace(i[0], i[1])
return s


def _replace_dict(dicts):
retu_dict = dict()
for k, v in dicts.items():
retu_dict[replace_all(k)] = replace_all(v)
return retu_dict


def _replace_list(lists):
retu_list = list()
for l in lists:
retu_list.append(replace_all(l))
return retu_list


def replace_all(data):
def replace_html(data):
if isinstance(data, dict):
return _replace_dict(data)
return dict([(replace_html(k), replace_html(v)) for k, v in data.items()])
elif isinstance(data, list):
return _replace_list(data)
return [replace_html(l) for l in data]
elif isinstance(data, str):
return _replace_html(data)
return _replace_str_html(data)
else:
return data


def str_to_dict(json_str):
json_dict = eval(json_str)
return replace_all(json_dict)
json_dict = ast.literal_eval(json_str)
return replace_html(json_dict)


def replace_space(s):
s = s.replace(' ', '')
s = s.replace('\r\n', '')
return s
return s.replace(' ', '').replace('\r\n', '')


def get_url_param(url):
result = url_parse.urlparse(url)
return url_parse.parse_qs(result.query, True)


def input(msg=''):
try:
return raw_input(msg)
except NameError:
return input(msg)


if __name__ == '__main__':
aa = list_or_empty(['list'])
print(aa, type(aa))