-
Notifications
You must be signed in to change notification settings - Fork 1
/
html2text_regex_demo.py
executable file
·88 lines (58 loc) · 1.66 KB
/
html2text_regex_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
import re
import requests
import html2text
class VenueParser(object):
url = None
consumers = []
def __init__(self):
self.text = None
self.specials = []
def run(self):
response = requests.get(self.url)
html = response.text
self.text = html2text.html2text(html)
#print(self.text)
for consumer in self.consumers:
consumer(self)
print(self.specials)
class PeppinosParser(VenueParser):
url = "http://www.peppinospizza.com/specials"
#@appends(0)
def clean_intro(self):
# TODO: this doesn't actually match anything
matches = re.findall(r'Your Location.*Happy Hour', self.text, re.MULTILINE)
for match in matches:
text = match.group()
print(text)
self.text = self.text.replace(text, '')
def clean_location(self):
pass
#@appends(3)
def consume_late_night(self):
#re.search(None, self.text)
#self.text.replace(match, '')
#self.specials.append(special)
pass
def consume_burger_special(self):
self.text = self.text.replace("""
### Saturday
##### 11AM - 5PM
$5.99 All-American Burger Baskets
$.50 Traditional/$.60 Boneless Wings
##### **‘till 8PM**
$3.75 Coors Light, Miller Lite 22oz Talls""", '')
special = {
'item': 'burger',
'date': 'saturday',
'start': '11am'
}
self.specials.append(special)
consumers = [
clean_intro,
clean_location,
consume_late_night,
consume_burger_special,
]
parser = PeppinosParser()
parser.run()