-
Notifications
You must be signed in to change notification settings - Fork 10
/
wiki.py
46 lines (36 loc) · 1.4 KB
/
wiki.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from BeautifulSoup import BeautifulSoup
import urllib2
def wiki(term): # wiki <search term>
'Returns a wiki link and the first paragraph of the page'
main_page = 'http://en.wikipedia.org/wiki/Main_Page'
print "Going to fetch wiki of %s" % term
wlink = term # notice the trailing space
if 1 == len(wlink): # no search term given, the Main_Page is "displayed"
response = main_page
else:
#search_term = wlink[1].lstrip().replace(' ', '_')
search_term = wlink.replace(' ', '_')
#print search_term
if len(search_term) < 1:
response = main_page
else:
response = 'http://en.wikipedia.org/wiki/' + search_term
response = response + ' ' + get_para(response)
return response.encode('utf-8')
def get_para(wlink):
'Gets the first paragraph from a wiki link'
msg = ''
try:
page_request = urllib2.Request(wlink)
page_request.add_header('User-agent', 'Mozilla/5.0')
page = urllib2.urlopen(page_request)
except IOError:
msg = 'Cannot acces link!'
else:
soup = BeautifulSoup(page)
msg = ''.join(soup.find('div', { 'id' : 'bodyContent'}).p.findAll(text=True))
while 460 < len(msg): # the paragraph cannot be longer than 510
# characters including the protocol command
pos = msg.rfind('.')
msg = msg[:pos]
return msg