-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
290 lines (273 loc) · 11.7 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import json
from Recipe import *
from jsmin import jsmin # pip install jsmin
import requests
import multiprocessing as mp
import random
import sqlite3
# Returns a connection or None in case of failure
def establish_connection(db):
try:
conn = sqlite3.connect(db)
except Exception as e:
print("Error while establishing database connection:", e)
return(None)
conn.row_factory = dict_factory
return(conn)
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
# Newly added recipes stored in a JSON file at PATH must have the leftover scores added
# File at PATH will be overwritten with updated values
def update_lo_scores(path):
x = 5
# When new recipes are added, the entire database's buddies must be updated
# (A new best bud could be found in the new recipes)
# But maybe to reduce complexity we are only interested in overlapping recipes that share > 0 primary ingredients
def update_new_buddies(path, K):
connection = establish_connection("sqliteRecipeList.db")
cursor = connection.cursor()
ingredients = set() # Union of all primary ingredients required by new recipes
with open("tables.jsonc") as tables:
with open(path) as recipe_file:
new_recipes = json.loads(jsmin(recipe_file.read()))
prim_ingr_table = json.loads(jsmin(tables.read()))["primary_ingredients"]
for recipe in new_recipes:
for ingr in recipe["primary_ingredients"].keys():
ingredients.add(ingr)
#old_recipes = read_from_db("", ingredients) # Only compare recipes that share primary ingredients with new recipes
#recipes = old_recipes + new_recipes # Shallow copies, no memory issue, all new + fetched recipes will be updated
recipes = new_recipes
for recipe in recipes:
r, table_data = instantiate_recipe(recipe, prim_ingr_table)
n_minus_1 = [r for r in recipes if r != recipe]
r.update_buddies(n_minus_1, prim_ingr_table, K)
recipe["buddy_recipes"] = r.buddies
# for key in (r.buddies).keys():
# sql_update_query = "UPDATE LowIngredientsRecipes set " key + " = " + (r.buddies)[key] + "' WHERE title = '" + r["name"] + "'"
#cursor.execute(sql_update_query);
#cursor.execute('COMMIT')
open("output.jsonc", "w").write(json.dumps(recipes)) # Overwrite file with updated recipes from entire DB + new recipes
# fetch all records from specified table
def read_from_db(connection, table):
cur = connection.cursor()
records = cur.execute("SELECT * FROM " + table + ";").fetchall()
return (records)
# read all recipes from the database as well as needed ingredient table data
def update_scores():
connection = establish_connection("sqliteRecipeList.db")
if (connection is not None):
records = read_from_db(connection, "smallSet")
table = read_from_db(connection, "IngredientsList")
for record in records:
r, table_data = instantiate_recipe(records[0], table)
r.update_leftover_score(table_data)
print(r.leftover_score)
# Find the N smallest members of the array
# Returns an array of ints representing the reciped IDs of the best candidates:
# Best candidates are recipes who possess a buddy recipe that has one of the N smallest leftover scores
# Symmetric arrays: candidate ids, buddies, and scores
def find_min_buddies(start, end, array, candidate_ids, candidate_buddies, candidate_scores, lock, N):
# Find the N recipes with the lowest buddy score
maxes = [(0,0,float("inf")) for i in range(N)]
curr_max = float("inf")
max_index = 0
for doc in array[start:end]:
buddies = doc["buddy_recipes"]
for b in buddies:
score = buddies[b]
if score < curr_max:
del maxes[max_index]
maxes.append(tuple((doc["rID"], int(b), score)))
curr_max = score
max_index = N-1
for indx in range(len(maxes)-1):
s = maxes[indx][2]
if s > curr_max:
curr_max = s
max_index = indx
# Acquire the mutex lock to update the shared array
lock.acquire()
candidates = []
for i in range(N):
candidates.append(tuple((candidate_ids[i], candidate_buddies[i], candidate_scores[i])))
maxes += candidates
tupleware = minimize_tuple_array(maxes, 3, N)
for i in range(N):
t = tupleware[i]
candidate_ids[i] = t[0]
candidate_buddies[i] = t[1]
candidate_scores[i] = t[2]
lock.release()
# Expects an array of JSON from the DB representing candidates for the meal plan
# Return an array of N recipe ids
def select_meal_plan_recipes(results, N):
# Maintain a list of N recipes with the lowest buddy score
candidate_ids = mp.RawArray('i', range(N))
candidate_buddies = mp.RawArray('i', range(N))
candidate_scores = mp.RawArray('i', range(999999, 999999+N))
procs = mp.cpu_count()
chunk_size = len(results)//procs
start = 0
end = chunk_size
lock = mp.Lock()
running = []
# Process the recipes in parallel using all CPUs
for i in range(procs):
p = mp.Process(target=find_min_buddies, args=(start, end, results, candidate_ids, candidate_buddies, candidate_scores, lock, N))
running.append(p)
p.start()
start = end
end += chunk_size
p = mp.Process(target=find_min_buddies, args=(start, len(results), results, candidate_ids, candidate_buddies, candidate_scores, lock, N))
#p = mp.Process(target=find_min_buddies, args=(0, len(results), results, candidate_ids, candidate_buddies, candidate_scores, lock, N))
running.append(p)
p.start()
# Join terminated procs to parent
for r in running:
r.join()
# Multiprocessing ctype arrays dont like min or index methods or deletion. and i like those
nice_ids = [candidate_ids[i] for i in range(N)]
nice_buddies = [candidate_buddies[i] for i in range(N)]
nice_scores = [candidate_scores[i] for i in range(N)]
# Remove reciprocals - (id1, id2), (id2, id1)
for i, rID in enumerate(nice_ids):
bID, score = nice_buddies[i], nice_scores[i]
# print("Recipe:", rID, "| Buddy Recipe:", bID, "| Score:", score)
while i < len(nice_ids)-1:
i += 1
if (nice_ids[i] == bID and nice_buddies[i] == rID):
del nice_ids[i]
del nice_buddies[i]
del nice_scores[i]
# Select minimum
total_lo_score = 0
final = []
for i in range(N//2):
m_index = nice_scores.index(min(nice_scores))
final.append(nice_ids[m_index])
final.append(nice_buddies[m_index])
total_lo_score += nice_scores[m_index]
del nice_ids[m_index]
del nice_buddies[m_index]
del nice_scores[m_index]
if (N%2 == 1): # Match N//2 recipes, add a minimal remainder
capstones = find_min_leftover(results, procs, chunk_size, N)
for i, c in enumerate(capstones):
if c[0] in final:
del capstones[i]
minimal_recipe = minimize_tuple_array(capstones, 2, 1)[0]
final.append(minimal_recipe[0])
total_lo_score += minimal_recipe[1]
print("Total Leftover Score:", total_lo_score)
return(final) # Return array of rIDs
# Find the N//2 + 1 recipes with the lowest personal leftover scores (we only need 1, but at most N//2 might overlap with paired recipes)
# Although it would probably be faster to identify the lowest leftover score for each array segment while processing the best buddies
def find_min_leftover(results, num_procs, chunk_size, N):
q = mp.Queue()
running = []
start = 0
end = chunk_size
for i in range(num_procs):
p = mp.Process(target=min_leftover, args=(start, end, results, q))
running.append(p)
p.start()
start = end
end += chunk_size
p = mp.Process(target=min_leftover, args=(start, end, results, q))
running.append(p)
p.start()
for r in running:
r.join()
array = []
while not q.empty():
array.append(q.get(block=True, timeout=1))
return(minimize_tuple_array(array, 2, (N//2)+1))
def min_leftover(start, end, array, q):
curr_min_score = float('inf')
curr_min_id = None
for doc in array[start:end]:
score = doc["leftover_score"]
if score < curr_min_score:
curr_min_score = score
curr_min_id = doc["rID"]
t = tuple((curr_min_id, curr_min_score))
q.put(t)
# Return the N tuples of size X with the minimum (X-1)th values (tuple[X-1])
def minimize_tuple_array(array, X, N):
if (X == 2):
results = [(0, float("inf")) for i in range(N)]
elif (X == 3):
results = [(0, 0, float("inf")) for i in range(N)]
X -= 1
curr_max = results[0][X]
max_index = 0
for a in array:
score = a[X]
if (score < curr_max):
del results[max_index]
results.append(a)
curr_max = score
max_index = N-1
for indx in range(N-1):
s = results[indx][X]
if s > curr_max:
curr_max = s
max_index = indx
return(results)
# Make a meal plan with N recipes
def generate_meal_plan(results, N):
try:
if (len(results) < N):
print("Recipe finder returned fewer results than requested number of recipes in meal plan")
raise ValueError
with open("tables.jsonc") as tables:
prim_ingr_table = json.loads(jsmin(tables.read()))["primary_ingredients"]
rids = select_meal_plan_recipes(results, N)
print(rids)
recipes = [r for r in results if r["rID"] in rids]
mp = MealPlan(recipes, prim_ingr_table)
return(mp)
except:
return None
# Generate a random meal plan, calculate total leftover score
# Needs testing
def random_meal_plan(results, prim_ingr_table, N):
recipes = random.sample(results, N)
ingredients = []
for r in recipes:
ingredients += r["primary_ingredients"]
combined_ingredients = set(ingredients)
table_data = []
for pi in combined_ingredients:
table_data.append(prim_ingr_table[pi])
leftovers = []
for ingr_table in table_data:
_id = ingr_table['id']
QR = 0
for r in recipes:
if _id in r["primary_ingredients"]:
QR += r["primary_ingredients"][_id]
leftovers.append(ingredient_score(ingr_table, QR))
score = sum(leftovers)
print("Random Meal Plan Recipes:")
print([r["rID"] for r in recipes])
print("Total score for random meal plan:", score)
if __name__=="__main__":
update_scores()
#update_lo_scores("output.jsonc")
#update_new_buddies("output.jsonc", 3)
#old = read_from_db("http://127.0.0.1:5000/", {'main_ingredient':'chicken'})
#print(old)
#insertion_wrapper(1, "ingredients", ["code", "quantity"], ["ch", 10])
#c.execute("INSERT INTO nametable " + snames + ";")
#with open("tables.jsonc") as tables:
# with open("output.jsonc") as recipe_file:
# new_recipes = json.loads(jsmin(recipe_file.read()))
# prim_ingr_table = json.loads(jsmin(tables.read()))["primary_ingredients"]
# mp = generate_meal_plan(new_recipes, 3)
#random_meal_plan(new_recipes, prim_ingr_table, 3)
# mp.print_recipes()
# mp.print_grocery()