-
Notifications
You must be signed in to change notification settings - Fork 0
/
station.py
68 lines (49 loc) · 1.8 KB
/
station.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from requests_html import HTMLSession
from mongoengine import Document, connect, StringField, IntField
import string
connect("lpf")
class Station(Document):
name = StringField(required=True)
location = StringField()
code = StringField(required=True)
avg_price = IntField()
def parse_location(self):
input = ''.join(filter(lambda x: x in string.printable, self.location))
input = [s.strip() for s in input.split('/')][-1]
parts = [s.strip().strip(';') for s in input.split(' ')]
return float(parts[0]), float(parts[1])
def __repr__(self):
return "[{}] {}".format(self.code, self.name)
def __str__(self):
return self.__repr__()
def __unicode__(self):
return self.__repr__()
class StationList:
def __init__(self):
self.session = HTMLSession()
self.url = 'https://en.wikipedia.org/wiki/List_of_London_railway_stations'
self.stations = list()
if not len(Station.objects):
self.save()
self.load()
def load(self):
self.stations = [x for x in Station.objects]
def save(self):
response = self.session.get(self.url)
for row in response.html.find('tr'):
if len(row.links) == 1:
continue
data = [r.strip() for r in row.text.split('\n')]
if len(data) < 8:
continue
if len(data[3]) != 3:
continue
if data[0].endswith('London'):
data[0] = data[0][:len(data[0]) - len('London')]
if data[0].endswith(']'):
end = data[0].rfind('[')
data[0] = data[0][:end]
Station(name=data[0], location=data[7], code=data[3]).save()
if __name__ == '__main__':
l = StationList()
print(l.stations)