-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtemp_tags.py
233 lines (208 loc) · 8.63 KB
/
temp_tags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
from bs4 import BeautifulSoup
import requests
class Trie:
##################################################
## basic methods
##################################################
def __init__(self):
self.frequency = 0
self.children = {}
# add word/frequency to the trie. Increment frequency
# if no value supplied.
def insert(self, word, frequency=None):
let = word[:1]
if let not in self.children:
self.children[let] = Trie()
#Inserts each letter of the word, incrementing on the last letter
if len(word) == 1:
if frequency == None:
self.children[let].frequency += 1
else:
self.children[let].frequency = frequency
else:
self.children[let].insert(word[1:], frequency)
# return trie node for specified prefix, None if not in trie
def find(self,prefix):
if len(prefix) == 0:
return self
else:
#Checks the Trie for membership of each letter in the prefix
let = prefix[:1]
if let in self.children:
if len(prefix) == 1:
return self.children[let]
else:
return self.children[let].find(prefix[1:])
else:
return None
# is word in trie? return True or False
def __contains__(self, word):
node = self.find(word)
return node != None and node.frequency != 0
# return list of [word,freq] pairs for all words in
# this trie and its children
def __iter__(self):
for i in self.iterhelper():
yield i
#Explores the Trie keeping track of checked nodes with prefix parameter
#Yields within loops to make sure a list is returned and not a generator obj
def iterhelper(self, prefix = ""):
if self.frequency != 0:
yield [prefix, self.frequency]
for letter, trie in self.children.items():
child = self.children[letter].iterhelper(prefix+letter)
if child != None:
for i in child:
yield i
##################################################
## additional methods
##################################################
# return the list of N most-frequently occurring words that start with prefix.
def autocomplete(self, prefix, N):
"""Reverse sort the list from find(prefix) using frequency (at index 1)
Adds the prefix to each result and returns a slice of the first N
"""
results = self.find(prefix)
if results == None: return []
wordlist = list(results)
ans = []
wordlist.sort(key=lambda x: x[1], reverse = True)
for end in wordlist:
ans.append(prefix+end[0])
return ans[:N]
# return the list of N most-frequent words that start with prefix or that
# are valid words that differ from prefix by a small edit
def autocorrect(self, prefix, N):
complete = self.autocomplete(prefix, N)
if len(complete) == N:
return complete
else:
edits = self.make_edit(prefix)
nodes = []
for word in edits:
if word in self:
# adds frequencies of word found if it matches exactly (no suffix)
nodes += [[word, trie[1]] for trie in list(self.find(word)) if len(trie[0])==0]
# sort by frequency high to low
nodes.sort(key=lambda x: x[1], reverse = True)
# return the autocomplete results + unique autocorrect results
return complete+[x[0] for x in nodes if x[0] not in complete][:N-len(complete)]
# returns a set of edits for a word
def make_edit(self, word):
edits = set()
for i in range(len(word)):
# single character deletion
edits.add(word[:i]+word[i+1:])
# used ascii range
for j in range(97,123):
# single character replacement
edits.add(word[:i]+chr(j)+word[i+1:])
# single character insertion
edits.add(word[:i]+chr(j)+word[i:])
edits.add(word[:len(word)+1]+chr(j))
# two letters transposed
chars = list(word)
for x in range(len(chars)):
for y in range(x, len(chars)):
chars[x], chars[y] = chars[y], chars[x]
edits.add(''.join(chars))
chars = list(word)
return edits
# return list of [word, freq] for all words in trie that match pattern
# pattern is a string, interpreted as explained below
# * matches any sequence of zero or more characters
# ? matches any single character
# otherwise char in pattern char must equal char in word
def filter(self,pattern):
# populate a list of all words and filter them out, sort by alpha for debug
allwords = []
for i in self:
allwords.append(i)
filtered = self.filter_helper(pattern, filtered=allwords)
self.allwords = []
filtered.sort(key=lambda x: x[0])
return filtered
def filter_helper(self, pattern, index=0, ast= False, filtered=[]):
# if every character has been filtered, filter out the suffixes unless ast (*)
if pattern == "":
new = []
for word in filtered:
if len(word[0]) == index or ast:
new.append(word)
return new
# current letter
let = pattern[:1]
if let == '*' :
# if the only one is an asterisk return everything
if len(pattern) == 1:
return filtered
# otherwise continue with the substring and set ast to True
else:
return self.filter_helper(pattern[1:], index, ast=True, filtered=filtered)
# if ? then check if there's a character at index, increment index
elif let == '?':
new = []
for word in filtered:
if len(word[0][index:index+1]) != 0:
new.append(word)
return self.filter_helper(pattern[1:], index+1, ast = False, filtered=new)
else:
# otherwise check if the letter at index matches
new = []
inds = []
for word in filtered:
if word[0][index:index+1] == let:
new.append(word)
# if coming from a * then search the word for matches and add
# the index of those matches to inds, a list of indices
if ast and let in word[0]:
temp = word[0]
while let in temp:
s = [word[0].index(let, len(word[0])-len(temp)), word]
inds.append(s)
temp = temp[temp.index(let)+1:]
# non asterisk branch
all = self.filter_helper(pattern[1:], index+1, filtered=new)
# if inds has elements then explore each branch at respective index
for i in inds:
branch = self.filter_helper(pattern[1:], index=i[0]+1, ast=False, filtered=[i[1]])
if branch != []:
all += branch
return all
# product = input("What type of product are you looking for? ")
# brand = input("Brand? ")
# rating = input("Min rating? ")
# color = input("Color adjectives? ")
# print('Input: ', product, brand, rating, color)
############################
# Getting Temptalia Brands #
############################
brands = 'https://www.temptalia.com/swatches/'
info = requests.get(brands)
soup = BeautifulSoup(info.content, "html.parser")
not_opts = ['Select a Brand...', 'All Brands']
brandlist = []
trie = Trie()
for link in soup.find_all('option'):
current = link.text
if str(current) not in not_opts:
trie.insert(current.replace('-',' '),1)
trie.insert(current.replace('-',' ').lower(),1)
brandlist.append(current)
brandlist.append(current.lower())
brand = input("Brand? ")
if brand and brand not in brandlist:
options = trie.autocorrect(brand, 2)
for op in options:
change = input(f"Did you mean {op}? ")
if change in ['true', '1', 't', 'y', 'yes', 'True', 'Yes', 'Y', 'T']:
print(f"Searching for {op} instead!")
brand = op
break
else:
print(f"Looks like Temptalia doesn't have any reviews on hand for {brand} :(")
rating = input("Min rating? ")
if rating and rating not in ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C-', 'D', 'F']:
rating = input("In format of letter grade with +/- modifiers ")
colors = str.strip(input("Color adjectives? "))
print(colors)