forked from nico/collectiveintelligence-book
-
Notifications
You must be signed in to change notification settings - Fork 1
/
zillow.py
49 lines (37 loc) · 1.52 KB
/
zillow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import urllib2
import xml.dom.minidom
zwskey = 'X1-ZWz1chwxis15aj_9skq6'
def getaddressdata(address, city):
escad = address.replace(' ', '+')
url = 'http://www.zillow.com/webservice/GetDeepSearchResults.htm?'
url += 'zws-id=%s&address=%s&citystatezip=%s' % (zwskey, escad, city)
#print url
doc = xml.dom.minidom.parseString(urllib2.urlopen(url).read())
code = doc.getElementsByTagName('code')[0].firstChild.data
#print doc.toxml()
if code != '0': return None
# Success!
try:
zipcode = doc.getElementsByTagName('zipcode')[0].firstChild.data
use = doc.getElementsByTagName('useCode')[0].firstChild.data
year = doc.getElementsByTagName('yearBuilt')[0].firstChild.data
bath = doc.getElementsByTagName('bathrooms')[0].firstChild.data
bed = doc.getElementsByTagName('bedrooms')[0].firstChild.data
#rooms = doc.getElementsByTagName('totalRooms')[0].firstChild.data
price = doc.getElementsByTagName('amount')[0].firstChild.data
except Exception, e:
#print e
return None
#return zipcode, use, int(year), float(bath), int(bed), int(rooms), price
return zipcode, use, int(year), float(bath), int(bed), price
def getpricelist():
return filter(None, [getaddressdata(line.strip(), 'Cambridge,MA')
for line in open('addresslist.txt')])
if __name__ == '__main__':
import drawtree
import treepredict
housedata = getpricelist()
print housedata
tree = treepredict.buildtree(housedata, scorefun=treepredict.variance)
drawtree.drawtree(tree, 'zillow.png')
print "Wrote zillow.png"