-
Notifications
You must be signed in to change notification settings - Fork 1
/
recipeparsing.py
133 lines (116 loc) · 4.4 KB
/
recipeparsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import re
def parse(text):
try:
class Tagged:
quantity = ""
unit = ""
comment = ""
name = ""
unitSet = ([ "pinch", "pinches" "dash", "dashes", "teaspoon", "teaspoons", "tsp", "tsp.", "tablespoon", "tablespoons", "tbsp", "tbsp.", "package", "can", "cans", "cup", "cups", "slices", "slice", "dollop", "dollops", "scoops", "scoops", "gram", "grams", "containers", "container", "packets", "packet", "slice", "slices", "liters", "liter", "pounds", "pound", "lbs", "bunch", "bunches", "lb", "sprigs", "sprig", "ounces", "oz", "ounce", "pints", "pint", "gallon", "gallons", "medium", "large", "small", "branch" ])
#testInput = "1 pinch of salt"
#testInput = "1 stick of butter"
#testInput = "2 cups/255 grams all-purpose flour (preferably unbleached), plus more as needed"
testInput = text
testParen = "1 tbsp olive oil (vegetable also works)"
ingredient = Tagged()
ingredient1 = Tagged()
# Text enclosed by parentheses is a comment
parenMatch = re.search(r'\((.*?)\)', testInput)
if parenMatch:
ingredient.comment = parenMatch.group(1)
testInput = re.sub(r'\((.*?)\)', '', testInput)
print(ingredient.comment)
# Text after comma is part of the comment
commaSplit = testInput.split(',', 1)
if len(commaSplit) == 2:
ingredient.comment = commaSplit[1].lstrip()
testInput = commaSplit[0]
# Split text based on spaces
splitTest = list(map(str.strip, testInput.split(' ')))
print(splitTest)
# Find unit of measurement based on vocabulary and regex if the text offers two measurements
unitIndex = -1
ind = 0
for word in splitTest:
foundAlternate = re.search("\D\/\d+", word)
if foundAlternate:
if splitTest[ind + 1] in unitSet:
slashSplit = splitTest[ind].split('/', 1)
splitTest[ind + 1] = ''
splitTest[ind] = ''
ingredient.unit = slashSplit[0]
unitIndex = ind + 1
print(word)
print(unitIndex)
print(splitTest)
if word in unitSet:
ingredient.unit = word
unitIndex = ind
if splitTest[ind + 1] == "of":
splitTest[ind + 1] = ''
print(word)
print(unitIndex)
break
ind+=1
# If unit is not in the vocab set but is something like 1 clove of garlic
if unitIndex == -1:
for i in range(1, len(splitTest) - 1):
if splitTest[i] == "of":
unitIndex = i - 1
ingredient.unit = splitTest[i-1]
splitTest[i] = ''
break
# Get quantity relative to unit, or if no unit, take the first integer
# No integer, assume 1
#print(splitTest)
#print(unitIndex)
quantity = ""
if unitIndex > 0:
for i in range(0, unitIndex):
quantity = quantity + splitTest[i]
else:
if splitTest[0].isdigit():
quantity = splitTest[0]
unitIndex = 0
else:
quantity = 1
ingredient.quantity = quantity
print(quantity)
# Get comment about name
commentIndex = unitIndex
for i in range(unitIndex + 1, len(splitTest) - 1):
word = splitTest[i]
match = re.search(".*ly$", word)
if match:
if ingredient.comment:
ingredient.comment = ingredient.comment + ", " + match.string + " " + splitTest[i + 1]
else:
ingredient.comment = match.string + " " + splitTest[i + 1]
commentIndex = i + 1
break
match2 = re.search(".*ed$", word)
if match2 and (word != "red"):
if ingredient.comment:
ingredient.comment = ingredient.comment + ", " + match2.string
else:
ingredient.comment = match2.string
commentIndex = i
break
# Get name of ingredient
for i in range(commentIndex + 1, len(splitTest)):
ingredient.name = ingredient.name + " " + splitTest[i]
ingredient.name = str.strip(ingredient.name)
print("Unit: " + ingredient.unit)
print("Quantity: " + str(ingredient.quantity))
print("Comment: " + ingredient.comment)
print("Name: " + ingredient.name)
result = {"quantity": ingredient.quantity, "unit": ingredient.unit, "name": ingredient.name, "comment": ingredient.comment}
return(result)
except Exception as e:
print(str(e))
result = {"quantity": "error parsing", "unit": "", "name": "", "comment": ""}
return(result)
def main():
print(parse("1 clove of garlic"))
if __name__== "__main__":
main()