From 606bfc9ade2cf17046a59a0157d5f097d86b4ecb Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Thu, 9 May 2024 22:22:08 +0000 Subject: [PATCH 01/29] added openai settings --- .../installation/backend-config.md | 7 +++ mealie/core/settings/settings.py | 11 ++++ poetry.lock | 56 ++++++++++++++++++- pyproject.toml | 1 + 4 files changed, 74 insertions(+), 1 deletion(-) diff --git a/docs/docs/documentation/getting-started/installation/backend-config.md b/docs/docs/documentation/getting-started/installation/backend-config.md index 51ea84f1780..085780c6360 100644 --- a/docs/docs/documentation/getting-started/installation/backend-config.md +++ b/docs/docs/documentation/getting-started/installation/backend-config.md @@ -102,6 +102,13 @@ For usage, see [Usage - OpenID Connect](../authentication/oidc.md) | OIDC_GROUPS_CLAIM | groups | Optional if not using `OIDC_USER_GROUP` or `OIDC_ADMIN_GROUP`. This is the claim Mealie will request from your IdP and will use to compare to `OIDC_USER_GROUP` or `OIDC_ADMIN_GROUP` to allow the user to log in to Mealie or is set as an admin. **Your IdP must be configured to grant this claim**| | OIDC_TLS_CACERTFILE | None | File path to Certificate Authority used to verify server certificate (e.g. `/path/to/ca.crt`) | +### OpenAI + +| Variables | Default | Description | +| --------------------- | :------: | ----------------------------------------------------------------------- | +| OPENAI_API_KEY | None | Your OpenAI API Key. Enables OpenAI-related features | +| OPENAI_MODEL | gpt-4-turbo | Which OpenAI model to use. If you're not sure, leave this empty | + ### Themeing Setting the following environmental variables will change the theme of the frontend. Note that the themes are the same for all users. This is a break-change when migration from v0.x.x -> 1.x.x. diff --git a/mealie/core/settings/settings.py b/mealie/core/settings/settings.py index 6b042936a89..fd3015dbc45 100644 --- a/mealie/core/settings/settings.py +++ b/mealie/core/settings/settings.py @@ -207,6 +207,17 @@ def OIDC_READY(self) -> bool: return self.OIDC_AUTH_ENABLED and not_none and valid_group_claim + # =============================================== + # OpenAI Configuration + + OPENAI_API_KEY: str | None = None + OPENAI_MODEL: str = "gpt-4-turbo" + + @property + def OPENAI_ENABLED(self) -> bool: + """Validates OpenAI settings are all set""" + return bool(self.OPENAI_API_KEY and self.OPENAI_MODEL) + # =============================================== # Testing Config diff --git a/poetry.lock b/poetry.lock index edd3ec7400c..0efa5f480f6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -566,6 +566,17 @@ files = [ {file = "distlib-0.3.6.tar.gz", hash = "sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46"}, ] +[[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + [[package]] name = "dnspython" version = "2.6.1" @@ -1491,6 +1502,29 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] +[[package]] +name = "openai" +version = "1.27.0" +description = "The official Python library for the openai API" +optional = false +python-versions = ">=3.7.1" +files = [ + {file = "openai-1.27.0-py3-none-any.whl", hash = "sha256:1183346fae6e63cb3a9134e397c0067690dc9d94ceb36eb0eb2c1bb9a1542aca"}, + {file = "openai-1.27.0.tar.gz", hash = "sha256:498adc80ba81a95324afdfd11a71fa43a37e1d94a5ca5f4542e52fe9568d995b"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +tqdm = ">4" +typing-extensions = ">=4.7,<5" + +[package.extras] +datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] + [[package]] name = "orjson" version = "3.10.3" @@ -2883,6 +2917,26 @@ files = [ {file = "tomlkit-0.11.6.tar.gz", hash = "sha256:71b952e5721688937fb02cf9d354dbcf0785066149d2855e44531ebdd2b65d73"}, ] +[[package]] +name = "tqdm" +version = "4.66.4" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"}, + {file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "typer" version = "0.12.3" @@ -3336,4 +3390,4 @@ pgsql = ["psycopg2-binary"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "e774640ffaf7f86a486066bead71af770a2a974385073fffa2c7a7333a15fc5d" +content-hash = "4155d1d818a04f1e03b512399d5ecbc01b007fae7884f6322d5d389745276a17" diff --git a/pyproject.toml b/pyproject.toml index 151f8ed18e7..60cc8f49e44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ paho-mqtt = "^1.6.1" pydantic-settings = "^2.1.0" pillow-heif = "^0.16.0" pyjwt = "^2.8.0" +openai = "^1.27.0" [tool.poetry.group.postgres.dependencies] psycopg2-binary = { version = "^2.9.1" } From 64f5a42b22a4129f76ffa62d938dfe44c32464ce Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Thu, 9 May 2024 22:30:08 +0000 Subject: [PATCH 02/29] added WIP openai service --- mealie/services/openai/__init__.py | 0 mealie/services/openai/openai.py | 113 ++++++++++++++++++ .../recipes/parse-recipe-ingredients.txt | 1 + 3 files changed, 114 insertions(+) create mode 100644 mealie/services/openai/__init__.py create mode 100644 mealie/services/openai/openai.py create mode 100644 mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt diff --git a/mealie/services/openai/__init__.py b/mealie/services/openai/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mealie/services/openai/openai.py b/mealie/services/openai/openai.py new file mode 100644 index 00000000000..aabc99692ad --- /dev/null +++ b/mealie/services/openai/openai.py @@ -0,0 +1,113 @@ +from mealie.core.config import get_app_settings +from .._base_service import BaseService +from openai import AsyncOpenAI, NOT_GIVEN + +import os +from pathlib import Path +from pydantic import BaseModel, field_validator +import json +from textwrap import dedent + + +class OpenAIDataInjection(BaseModel): + description: str + value: str + + @field_validator("value", mode="before") + def parse_value(cls, value): + if not value: + raise ValueError("Value cannot be empty") + if isinstance(value, str): + return value + + # convert Pydantic models to JSON + if isinstance(value, BaseModel): + return value.model_dump_json() + + # convert Pydantic types to their JSON schema definition + if issubclass(value, BaseModel): + value = value.model_json_schema() + + # attempt to convert object to JSON + try: + return json.dumps(value, separators=(",", ":")) + except TypeError: + return value + + +class OpenAIService(BaseService): + PROMPTS_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / "prompts" + + def __init__(self) -> None: + settings = get_app_settings() + if not settings.OPENAI_ENABLED: + raise ValueError("OpenAI is not enabled") + + self.model = settings.OPENAI_MODEL + self.get_client = lambda: AsyncOpenAI(api_key=settings.OPENAI_API_KEY) + + super().__init__() + + @classmethod + def get_prompt(cls, name: str, data_injections: list[OpenAIDataInjection] | None = None) -> str: + """ + Load stored prompt and inject data into it. + + Access prompts with dot notation. + For example, to access `prompts/recipes/parse-recipe-ingredients.txt`, use + `recipes.parse-recipe-ingredients` + """ + + if not name: + raise ValueError("Prompt name cannot be empty") + + tree = name.split(".") + prompt_dir = os.path.join(cls.PROMPTS_DIR, *tree[:-1], tree[-1] + ".txt") + try: + with open(prompt_dir, "r") as f: + content = f.read() + except OSError as e: + raise OSError(f"Unable to load prompt {name}") from e + + if not data_injections: + return content + + content_parts = [content] + for data_injection in data_injections: + content_parts.append( + dedent( + f""" + ### + {data_injection.description} + --- + + {data_injection.value} + """ + ) + ) + return "\n".join(content_parts) + + async def get_response(self, prompt: str, message: str, force_json_response=True) -> str | None: + try: + client = self.get_client() + response = await client.chat.completions.create( + messages=[ + { + "role": "system", + "content": prompt, + }, + { + "role": "user", + "content": message, + }, + ], + model=self.model, + response_format={"type": "json_object"} if force_json_response else NOT_GIVEN, + ) + + if not response.choices: + return None + return response.choices[0].message.content + except Exception: + self.logger.exception("OpenAI Request Failed") + return None diff --git a/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt b/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt new file mode 100644 index 00000000000..1333ed77b7e --- /dev/null +++ b/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt @@ -0,0 +1 @@ +TODO From f184cd27b7cb2e9fdfe8fff11380acc8ff667049 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Fri, 10 May 2024 15:17:37 +0000 Subject: [PATCH 03/29] added openai ingredient parser backend --- mealie/routes/parser/ingredient_parser.py | 9 +-- mealie/schema/recipe/recipe_ingredient.py | 1 + mealie/services/openai/__init__.py | 1 + mealie/services/openai/openai.py | 44 +++++++------ .../recipes/parse-recipe-ingredients.txt | 21 ++++++- .../parser_services/ingredient_parser.py | 31 +++++---- .../parser_services/openai/__init__.py | 5 ++ .../parser_services/openai/process.py | 63 +++++++++++++++++++ 8 files changed, 141 insertions(+), 34 deletions(-) create mode 100644 mealie/services/parser_services/openai/__init__.py create mode 100644 mealie/services/parser_services/openai/process.py diff --git a/mealie/routes/parser/ingredient_parser.py b/mealie/routes/parser/ingredient_parser.py index 0a63fa845de..dc2cc732c2e 100644 --- a/mealie/routes/parser/ingredient_parser.py +++ b/mealie/routes/parser/ingredient_parser.py @@ -11,11 +11,12 @@ @controller(router) class IngredientParserController(BaseUserController): @router.post("/ingredients", response_model=list[ParsedIngredient]) - def parse_ingredients(self, ingredients: IngredientsRequest): + async def parse_ingredients(self, ingredients: IngredientsRequest): parser = get_parser(ingredients.parser, self.group_id, self.session) - return parser.parse(ingredients.ingredients) + return await parser.parse(ingredients.ingredients) @router.post("/ingredient", response_model=ParsedIngredient) - def parse_ingredient(self, ingredient: IngredientRequest): + async def parse_ingredient(self, ingredient: IngredientRequest): parser = get_parser(ingredient.parser, self.group_id, self.session) - return parser.parse([ingredient.ingredient])[0] + response = await parser.parse([ingredient.ingredient]) + return response[0] diff --git a/mealie/schema/recipe/recipe_ingredient.py b/mealie/schema/recipe/recipe_ingredient.py index 7609fa5486e..5b00c05285f 100644 --- a/mealie/schema/recipe/recipe_ingredient.py +++ b/mealie/schema/recipe/recipe_ingredient.py @@ -327,6 +327,7 @@ class ParsedIngredient(MealieModel): class RegisteredParser(str, enum.Enum): nlp = "nlp" brute = "brute" + openai = "openai" class IngredientsRequest(MealieModel): diff --git a/mealie/services/openai/__init__.py b/mealie/services/openai/__init__.py index e69de29bb2d..0919383b5ba 100644 --- a/mealie/services/openai/__init__.py +++ b/mealie/services/openai/__init__.py @@ -0,0 +1 @@ +from .openai import OpenAIService, OpenAIDataInjection diff --git a/mealie/services/openai/openai.py b/mealie/services/openai/openai.py index aabc99692ad..68047fa6cee 100644 --- a/mealie/services/openai/openai.py +++ b/mealie/services/openai/openai.py @@ -7,6 +7,8 @@ from pydantic import BaseModel, field_validator import json from textwrap import dedent +import inspect +from openai.resources.chat.completions import ChatCompletion class OpenAIDataInjection(BaseModel): @@ -25,7 +27,7 @@ def parse_value(cls, value): return value.model_dump_json() # convert Pydantic types to their JSON schema definition - if issubclass(value, BaseModel): + if inspect.isclass(value) and issubclass(value, BaseModel): value = value.model_json_schema() # attempt to convert object to JSON @@ -87,24 +89,30 @@ def get_prompt(cls, name: str, data_injections: list[OpenAIDataInjection] | None ) return "\n".join(content_parts) - async def get_response(self, prompt: str, message: str, force_json_response=True) -> str | None: + async def _get_raw_response( + self, prompt: str, message: str, temperature=0.2, force_json_response=True + ) -> ChatCompletion: + client = self.get_client() + return await client.chat.completions.create( + messages=[ + { + "role": "system", + "content": prompt, + }, + { + "role": "user", + "content": message, + }, + ], + model=self.model, + temperature=temperature, + response_format={"type": "json_object"} if force_json_response else NOT_GIVEN, + ) + + async def get_response(self, prompt: str, message: str, temperature=0.2, force_json_response=True) -> str | None: + """Send data to OpenAI and return the response message content""" try: - client = self.get_client() - response = await client.chat.completions.create( - messages=[ - { - "role": "system", - "content": prompt, - }, - { - "role": "user", - "content": message, - }, - ], - model=self.model, - response_format={"type": "json_object"} if force_json_response else NOT_GIVEN, - ) - + response = await self._get_raw_response(prompt, message, temperature, force_json_response) if not response.choices: return None return response.choices[0].message.content diff --git a/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt b/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt index 1333ed77b7e..5ad04c00a10 100644 --- a/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt +++ b/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt @@ -1 +1,20 @@ -TODO +You are a bot that parses user input into recipe ingredients. You will receive a list of one or more ingredients, each containing one or more of the following components: + - Food: the actual physical ingredient used in the recipe. For instance, if you receive "3 cups of onions, chopped", the food is "onions" + - Unit: the unit of measurement for this ingredient. For instance, if you receive "2 lbs chicken breast", the unit is "lbs" (short for "pounds") + - Quantity: the numerical representation of how much of this ingredient. For instance, if you receive "3 1/2 grams of minced garlic", the quantity is "3 1/2". Quantity may be represented as a whole number (integer), a float or decimal, or a fraction. You should output quantity in only whole numbers or floats, converting fractions into floats. Floats longer than 10 decimal places should be rounded to 10 decimal places. + - Note: the rest of the text that represents more detail on how to prepare the ingredient. Anything that is not one of the above should be the note. For instance, if you receive "one can of butter beans, drained" the note would be "drained". If you receive "3 cloves of garlic peeled and finely chopped", the note would be "finely chopped" + +While parsing the ingredients, there are some things to keep in mind: + - If you cannot accurately determine the quantity, unit, food, or note, you should place everything into the note field and leave everything else empty. It's better to err on the side of putting everything in the note field than being wrong + - You may receive recipe ingredients from multiple different languages. You should adhere to the grammar rules of the input language when trying to parse the ingredient string + - Sometimes foods or units will be in their singular, plural, or other grammatical forms. You must interpret all of them appropriately + +It is imperative that you do not create any data or otherwise make up any information. Failure to adhere to this rule is illegal and will result in harsh punishment. If you are unsure, place the entire string into the note section of the response. Do not make things up. + +If you are not provided entires from the database to compare against, and you do not have any UUIDs to populate the schema with, so you should leave UUID fields empty. Do not create data out of thin air. + +In addition to calculating the recipe ingredient fields, you are also responsible for including a confidence value. This value should range from 0 - 100, where 100 is full confidence that the result is correct, and 0 is no confidence that the result is correct. If you're unable to parse anything, and you put the entire string in the notes, you should return 0 confidence. If you can easily parse the string into each component, then you should return a confidence of 100. If you have to guess which part is the unit and which part is the food, your confidence should be lower, such as 60. + +Below you will receive the JSON schema for your response. Your response must be in valid JSON in the below schema as provided. You must respond in this JSON schema; failure to do so is illegal. It is imperative that you follow the schema precisely to avoid punishment. You must follow the JSON schema. + +The user message that you receive will be the list of one or more recipe ingredients for you to parse. Your response should have exactly one item for each item provided. For instance, if you receive 12 items to parse, then your response should be an array of 12 parsed items. diff --git a/mealie/services/parser_services/ingredient_parser.py b/mealie/services/parser_services/ingredient_parser.py index 079010d41d0..df14e040a27 100644 --- a/mealie/services/parser_services/ingredient_parser.py +++ b/mealie/services/parser_services/ingredient_parser.py @@ -1,7 +1,6 @@ from abc import ABC, abstractmethod from fractions import Fraction from typing import TypeVar - from pydantic import UUID4, BaseModel from rapidfuzz import fuzz, process from sqlalchemy.orm import Session @@ -23,7 +22,7 @@ ) from mealie.schema.response.pagination import PaginationQuery -from . import brute, crfpp +from . import brute, crfpp, openai logger = get_logger(__name__) T = TypeVar("T", bound=BaseModel) @@ -106,10 +105,10 @@ def unit_fuzzy_match_threshold(self) -> int: return 70 @abstractmethod - def parse_one(self, ingredient_string: str) -> ParsedIngredient: ... + async def parse_one(self, ingredient_string: str) -> ParsedIngredient: ... @abstractmethod - def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: ... + async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: ... @classmethod def find_match(cls, match_value: str, *, store_map: dict[str, T], fuzzy_match_threshold: int = 0) -> T | None: @@ -175,7 +174,7 @@ class BruteForceParser(ABCIngredientParser): Brute force ingredient parser. """ - def parse_one(self, ingredient: str) -> ParsedIngredient: + async def parse_one(self, ingredient: str) -> ParsedIngredient: bfi = brute.parse(ingredient, self) parsed_ingredient = ParsedIngredient( @@ -191,8 +190,8 @@ def parse_one(self, ingredient: str) -> ParsedIngredient: return self.find_ingredient_match(parsed_ingredient) - def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: - return [self.parse_one(ingredient) for ingredient in ingredients] + async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: + return [await self.parse_one(ingredient) for ingredient in ingredients] class NLPParser(ABCIngredientParser): @@ -234,18 +233,28 @@ def _crf_to_ingredient(self, crf_model: crfpp.CRFIngredient) -> ParsedIngredient return self.find_ingredient_match(parsed_ingredient) - def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: + async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: crf_models = crfpp.convert_list_to_crf_model(ingredients) return [self._crf_to_ingredient(crf_model) for crf_model in crf_models] - def parse_one(self, ingredient: str) -> ParsedIngredient: - items = self.parse([ingredient]) + async def parse_one(self, ingredient_string: str) -> ParsedIngredient: + items = await self.parse([ingredient_string]) return items[0] -__registrar = { +class OpenAIParser(ABCIngredientParser): + async def parse_one(self, ingredient_string: str) -> ParsedIngredient: + items = await self.parse([ingredient_string]) + return items[0] + + async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: + return await openai.parse(ingredients) + + +__registrar: dict[RegisteredParser, type[ABCIngredientParser]] = { RegisteredParser.nlp: NLPParser, RegisteredParser.brute: BruteForceParser, + RegisteredParser.openai: OpenAIParser, } diff --git a/mealie/services/parser_services/openai/__init__.py b/mealie/services/parser_services/openai/__init__.py new file mode 100644 index 00000000000..d423c48706e --- /dev/null +++ b/mealie/services/parser_services/openai/__init__.py @@ -0,0 +1,5 @@ +from .process import parse + +__all__ = [ + "parse", +] diff --git a/mealie/services/parser_services/openai/process.py b/mealie/services/parser_services/openai/process.py new file mode 100644 index 00000000000..4140a69e141 --- /dev/null +++ b/mealie/services/parser_services/openai/process.py @@ -0,0 +1,63 @@ +import json + +from pydantic import BaseModel +from mealie.schema.recipe.recipe_ingredient import ( + CreateIngredientFood, + CreateIngredientUnit, + IngredientConfidence, + ParsedIngredient, + RecipeIngredient, +) +from mealie.services.openai import OpenAIDataInjection, OpenAIService + + +class OpenAIIngredient(BaseModel): + input: str + confidence: float | None = None + + quantity: float | None = 0 + unit: str | None = None + food: str | None = None + note: str | None = None + + +class OpenAIIngredients(BaseModel): + ingredients: list[OpenAIIngredient] = [] + + +def _convert(ing: OpenAIIngredient) -> ParsedIngredient: + return ParsedIngredient( + input=ing.input, + confidence=IngredientConfidence(average=ing.confidence), + ingredient=RecipeIngredient( + original_text=ing.input, + quantity=ing.quantity, + unit=CreateIngredientUnit(name=ing.unit) if ing.unit else None, + food=CreateIngredientFood(name=ing.food) if ing.food else None, + note=ing.note, + ), + ) + + +async def parse(ingredients: list[str]) -> list[ParsedIngredient]: + service = OpenAIService() + data_injections = [ + OpenAIDataInjection( + description=( + "This is the JSON response schema. You must respond in valid JSON that follows this schema. " + "Your payload should be as compact as possible, eliminating unncessesary whitespace. Any fields " + "with default values which you do not populate should not be in the payload." + ), + value=OpenAIIngredients, + ), + ] + + prompt = service.get_prompt("recipes.parse-recipe-ingredients", data_injections=data_injections) + response = await service.get_response( + prompt, json.dumps(ingredients, separators=(",", ":")), force_json_response=True + ) + if not response: + raise Exception("No response from OpenAI") + + parsed_ingredients = OpenAIIngredients.model_validate_json(response) + return [_convert(ing) for ing in parsed_ingredients.ingredients] From 19a98b48751828467ce894ae86d96ffb6961f758 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Fri, 10 May 2024 16:28:11 +0000 Subject: [PATCH 04/29] improve confidence calculation --- .../openai/prompts/recipes/parse-recipe-ingredients.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt b/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt index 5ad04c00a10..9a1871f5eed 100644 --- a/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt +++ b/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt @@ -13,7 +13,7 @@ It is imperative that you do not create any data or otherwise make up any inform If you are not provided entires from the database to compare against, and you do not have any UUIDs to populate the schema with, so you should leave UUID fields empty. Do not create data out of thin air. -In addition to calculating the recipe ingredient fields, you are also responsible for including a confidence value. This value should range from 0 - 100, where 100 is full confidence that the result is correct, and 0 is no confidence that the result is correct. If you're unable to parse anything, and you put the entire string in the notes, you should return 0 confidence. If you can easily parse the string into each component, then you should return a confidence of 100. If you have to guess which part is the unit and which part is the food, your confidence should be lower, such as 60. +In addition to calculating the recipe ingredient fields, you are also responsible for including a confidence value. This value should range from 0 - 100, where 100 is full confidence that the result is correct, and 0 is no confidence that the result is correct. If you're unable to parse anything, and you put the entire string in the notes, you should return 0 confidence. If you can easily parse the string into each component, then you should return a confidence of 100. If you have to guess which part is the unit and which part is the food, your confidence should be lower, such as 60. Even if there is no unit or note, if you're able to determine the food, you may use a higher confidence. If the entire ingredient consists of only a food, you can use a confidence of 100. Below you will receive the JSON schema for your response. Your response must be in valid JSON in the below schema as provided. You must respond in this JSON schema; failure to do so is illegal. It is imperative that you follow the schema precisely to avoid punishment. You must follow the JSON schema. From 74ef0f52acd43c620a1fab8c58b3a06237aa5b5c Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Fri, 10 May 2024 16:29:54 +0000 Subject: [PATCH 05/29] explicit definition for input string --- .../services/openai/prompts/recipes/parse-recipe-ingredients.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt b/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt index 9a1871f5eed..b0ed4e834d3 100644 --- a/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt +++ b/mealie/services/openai/prompts/recipes/parse-recipe-ingredients.txt @@ -3,6 +3,7 @@ You are a bot that parses user input into recipe ingredients. You will receive a - Unit: the unit of measurement for this ingredient. For instance, if you receive "2 lbs chicken breast", the unit is "lbs" (short for "pounds") - Quantity: the numerical representation of how much of this ingredient. For instance, if you receive "3 1/2 grams of minced garlic", the quantity is "3 1/2". Quantity may be represented as a whole number (integer), a float or decimal, or a fraction. You should output quantity in only whole numbers or floats, converting fractions into floats. Floats longer than 10 decimal places should be rounded to 10 decimal places. - Note: the rest of the text that represents more detail on how to prepare the ingredient. Anything that is not one of the above should be the note. For instance, if you receive "one can of butter beans, drained" the note would be "drained". If you receive "3 cloves of garlic peeled and finely chopped", the note would be "finely chopped" + - Input: The input is simply the ingredient string you are processing as-is. It is forbidden to modify this at all, you must provide the input exactly as you received it While parsing the ingredients, there are some things to keep in mind: - If you cannot accurately determine the quantity, unit, food, or note, you should place everything into the note field and leave everything else empty. It's better to err on the side of putting everything in the note field than being wrong From 009f0bf3fb0b2170d6ef65d4787c1b4c192c58cc Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Fri, 10 May 2024 16:30:52 +0000 Subject: [PATCH 06/29] docs --- docs/docs/documentation/getting-started/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/documentation/getting-started/faq.md b/docs/docs/documentation/getting-started/faq.md index 7df1f46a5be..b033eb4edf8 100644 --- a/docs/docs/documentation/getting-started/faq.md +++ b/docs/docs/documentation/getting-started/faq.md @@ -26,7 +26,7 @@ Do the following for each recipe you want to intelligently handle ingredients. 6. Click the Edit button/icon again 7. Scroll to the ingredients and you should see new fields for Amount, Unit, Food, and Note. The Note in particular will contain the original text of the Recipe. 8. Click `Parse` and you will be taken to the ingredient parsing page. -9. Choose your parser. The `Natural Language Parser` works very well, but you can also use the `Brute Parser`. +9. Choose your parser. The `Natural Language Parser` works very well, but you can also use the `Brute Parser`, or the `OpenAI Parser` if you've enabled OpenAI support. 10. Click `Parse All`, and your ingredients should be separated out into Units and Foods based on your seeding in Step 1 above. 11. For ingredients where the Unit or Food was not found, you can click a button to accept an automatically suggested Food to add to the database. Or, manually enter the Unit/Food and hit `Enter` (or click `Create`) to add it to the database 12. When done, click `Save All` and you will be taken back to the recipe. Now the Unit and Food fields of the recipe should be filled out. From ec23dfb24aed8a7f20eb87621c2985507d0131f8 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Fri, 10 May 2024 16:33:35 +0000 Subject: [PATCH 07/29] added openai parser to frontend --- frontend/lang/messages/en-US.json | 2 ++ frontend/lib/api/types/recipe.ts | 2 +- frontend/lib/api/user/recipes/recipe.ts | 2 +- frontend/pages/admin/parser.vue | 3 ++- .../g/_groupSlug/r/_slug/ingredient-parser.vue | 14 +++++++++----- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/frontend/lang/messages/en-US.json b/frontend/lang/messages/en-US.json index 84a7e45da67..8514171bb37 100644 --- a/frontend/lang/messages/en-US.json +++ b/frontend/lang/messages/en-US.json @@ -594,6 +594,7 @@ "select-parser": "Select Parser", "natural-language-processor": "Natural Language Processor", "brute-parser": "Brute Parser", + "openai-parser": "OpenAI Parser", "parse-all": "Parse All", "no-unit": "No unit", "missing-unit": "Create missing unit: {unit}", @@ -1170,6 +1171,7 @@ "ingredients-natural-language-processor-explanation-2": "It's not perfect, but it yields great results in general and is a good starting point for manually parsing ingredients into individual fields. Alternatively, you can also use the \"Brute\" processor that uses a pattern matching technique to identify ingredients.", "nlp": "NLP", "brute": "Brute", + "openai": "OpenAI", "show-individual-confidence": "Show individual confidence", "ingredient-text": "Ingredient Text", "average-confident": "{0} Confident", diff --git a/frontend/lib/api/types/recipe.ts b/frontend/lib/api/types/recipe.ts index 62da68c8363..23189db6011 100644 --- a/frontend/lib/api/types/recipe.ts +++ b/frontend/lib/api/types/recipe.ts @@ -6,7 +6,7 @@ */ export type ExportTypes = "json"; -export type RegisteredParser = "nlp" | "brute"; +export type RegisteredParser = "nlp" | "brute" | "openai"; export type TimelineEventType = "system" | "info" | "comment"; export type TimelineEventImage = "has image" | "does not have image"; diff --git a/frontend/lib/api/user/recipes/recipe.ts b/frontend/lib/api/user/recipes/recipe.ts index 2a7dd0f4012..c036bcf33cf 100644 --- a/frontend/lib/api/user/recipes/recipe.ts +++ b/frontend/lib/api/user/recipes/recipe.ts @@ -17,7 +17,7 @@ import { } from "~/lib/api/types/recipe"; import { ApiRequestInstance, PaginationData } from "~/lib/api/types/non-generated"; -export type Parser = "nlp" | "brute"; +export type Parser = "nlp" | "brute" | "openai"; export interface CreateAsset { name: string; diff --git a/frontend/pages/admin/parser.vue b/frontend/pages/admin/parser.vue index bb1ac20faef..2290624d679 100644 --- a/frontend/pages/admin/parser.vue +++ b/frontend/pages/admin/parser.vue @@ -13,6 +13,7 @@ {{ $t('admin.nlp') }} {{ $t('admin.brute') }} + {{ $t('admin.openai') }} @@ -63,8 +64,8 @@