-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunicode.py
executable file
·97 lines (90 loc) · 3 KB
/
unicode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import urllib2;
import re;
import csv;
import codecs;
import sys;
from bs4 import BeautifulSoup
def extractPosition(table):
#print table, "\n";
positions = [];
position = table.find_all("td", "first");
for a in position:
#print a, "\n";
positions.append(a.string);
return positions;
def extractID(table):
#print table, "\n";
ids = [];
member = table.find_all("td", "first");
for a in member:
#print a;
ids.append(a.next_sibling.next_sibling.string);
#print a.next_sibling.next_sibling.string;
return ids;
def myfunction(text):
try:
text = unicode(text, 'ISO-8859-1');
text = text.encode('ascii');
except TypeError:
return text
def extractName(table):
#print table, "\n"
names = [];
member = table.find_all("td", "first");
for a in member:
isoName = a.next_sibling.next_sibling.next_sibling.next_sibling.find_all("a")[0].string;
#uName = unicode (isoName, "ISO-8859-1");
#name = uName.encode ("ascii");
name = myfunction(isoName);
names.append(name);
return names;
def main_2(pageURL, homeName1, awayName1):
#sys.getdefaultencoding();
file = open('lineup.txt', 'a');
#page = 'http://espnfc.com/us/en/gamecast/statistics/id/345817/statistics.html?soccernet=true&cc=5901';
#print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
#print pageURL, "\n";
print "\n";
file.write('\n');
response = urllib2.urlopen(pageURL);
html = response.read();
homePosition = [];
homeID = [];
homeName = [];
awayPosition = [];
awayID = [];
awayName = [];
parse = BeautifulSoup(html);
#print parse.prettify();
tables = parse.find_all("section", "mod-container");
homeTable = tables[3];
awayTable = tables[4];
homePosition = extractPosition(homeTable);
homeID = extractID(homeTable);
homeName = extractName(homeTable);
awayPosition = extractPosition(awayTable);
awayID = extractID(awayTable);
awayName = extractName(awayTable);
#print "\n";
print homeName1, " VS. ", awayName1, ":";
file.write(homeName1+" VS. "+awayName1+":"+'\n');
for i in range (0, 11):
print homePosition[i], " ", homeID[i], " ", homeName[i];
file.write(homePosition[i]+" "+homeID[i]+" "+homeName[i]+'\n');
#print "!!!!!!~~~~~~~~~~~~~~";
print "--------------------------";
file.write("--------------------------\n");
#print awayName1, ":";
for i in range (0, 11):
print awayPosition[i], " ", awayID[i], " ", awayName[i];
file.write(awayPosition[i]+" "+awayID[i]+" "+awayName[i]+'\n');
file.close();
def main_1(pageUrl, home, away):
#print pageUrl;
for i in range(len(pageUrl)):
#print pageUrl[i];
pageID = pageUrl[i][33:39];
pageID = str(pageID);
newUrl = 'http://espnfc.com/us/en/gamecast/statistics/id/'+pageID+'/statistics.html?soccernet=true&cc=5901';
#print newUrl;
main_2(newUrl, home[i], away[i]);