Skip to content

Commit

Permalink
feat: add ingredient analysis endpoint (#285)
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 authored Nov 6, 2024
1 parent c9508d7 commit 10c42d2
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 3 deletions.
52 changes: 49 additions & 3 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ All parameters are optional with the exception of user_agent, but here is a desc
- `version`: API version (v2 is the default)
- `environment`: either `org` for production environment (openfoodfacts.org) or `net` for staging (openfoodfacts.net)

*Get information about a product*
### Get information about a product

```python
code = "3017620422003"
api.product.get(code)
```

*Perform text search*
### Perform text search

```python
results = api.product.text_search("pizza")
```

*Create a new product or update an existing one*
### Create a new product or update an existing one

```python
results = api.product.update(body)
Expand All @@ -54,6 +54,52 @@ the key "code" and its value, corresponding to the product that we
want to update. Example:
```body = {'code': '3850334341389', 'product_name': 'Mlinci'}```

### Perform ingredient analysis

You can perform the ingredient analysis of a text in a given language using the API. Please note that ingredient analysis is costly, so prefer using the preprod server for this operation.

```python
from openfoodfacts import API, APIVersion, Environment

api = API(user_agent="<application name>",
version=APIVersion.v3,
environment=Environment.net)

results = api.product.parse_ingredients("water, sugar, salt", lang="en")

print(results)

## [{'ciqual_food_code': '18066',
# 'ecobalyse_code': 'tap-water',
# 'id': 'en:water',
# 'is_in_taxonomy': 1,
# 'percent_estimate': 66.6666666666667,
# 'percent_max': 100,
# 'percent_min': 33.3333333333333,
# 'text': 'water',
# 'vegan': 'yes',
# 'vegetarian': 'yes'},
# {'ciqual_proxy_food_code': '31016',
# 'ecobalyse_code': 'sugar',
# 'id': 'en:sugar',
# 'is_in_taxonomy': 1,
# 'percent_estimate': 16.6666666666667,
# 'percent_max': 50,
# 'percent_min': 0,
# 'text': 'sugar',
# 'vegan': 'yes',
# 'vegetarian': 'yes'},
# {'ciqual_food_code': '11058',
# 'id': 'en:salt',
# 'is_in_taxonomy': 1,
# 'percent_estimate': 16.6666666666667,
# 'percent_max': 33.3333333333333,
# 'percent_min': 0,
# 'text': 'salt',
# 'vegan': 'yes',
# 'vegetarian': 'yes'}]
```

## Using the dataset

If you're planning to perform data analysis on Open Food Facts, the easiest way is to download and use the Open Food Facts dataset dump. Fortunately it can be done really easily using the SDK:
Expand Down
85 changes: 85 additions & 0 deletions openfoodfacts/api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import logging
from typing import Any, Dict, List, Optional, Tuple, Union, cast

import requests

from .types import APIConfig, APIVersion, Country, Environment, Facet, Flavor, JSONType
from .utils import URLBuilder, http_session

logger = logging.getLogger(__name__)


def get_http_auth(environment: Environment) -> Optional[Tuple[str, str]]:
return ("off", "off") if environment is Environment.net else None
Expand Down Expand Up @@ -311,6 +314,88 @@ def select_image(
r.raise_for_status()
return r

def parse_ingredients(
self, text: str, lang: str, timeout: int = 10
) -> list[JSONType]:
"""Parse ingredients text using Product Opener API.
It is only available for `off` flavor (food).
The result is a list of ingredients, each ingredient is a dict with the
following keys:
- id: the ingredient ID. Having an ID does not means that the
ingredient is recognized, you must check if it exists in the
taxonomy.
- text: the ingredient text (as it appears in the input ingredients
list)
- percent_min: the minimum percentage of the ingredient in the product
- percent_max: the maximum percentage of the ingredient in the product
- percent_estimate: the estimated percentage of the ingredient in the
product
- vegan (bool): optional key indicating if the ingredient is vegan
- vegetarian (bool): optional key indicating if the ingredient is
vegetarian
:param server_type: the server type (project) to use
:param text: the ingredients text to parse
:param lang: the language of the text (used for parsing) as a 2-letter
code
:param timeout: the request timeout in seconds, defaults to 10s
:raises RuntimeError: a RuntimeError is raised if the parsing fails
:return: the list of parsed ingredients
"""
if self.api_config.flavor != Flavor.off:
raise ValueError("ingredient parsing is only available for food")

if self.api_config.version != APIVersion.v3:
logger.warning(
"ingredient parsing is only available in v3 of the API (here: %s), using v3",
self.api_config.version,
)
# by using "test" as code, we don't save any information to database
# This endpoint is specifically designed for testing purposes
url = f"{self.base_url}/api/v3/product/test"

if len(text) == 0:
raise ValueError("text must be a non-empty string")

try:
r = http_session.patch(
url,
auth=get_http_auth(self.api_config.environment),
json={
"fields": "ingredients",
"lc": lang,
"tags_lc": lang,
"product": {
"lang": lang,
f"ingredients_text_{lang}": text,
},
},
timeout=timeout,
)
except (
requests.exceptions.ConnectionError,
requests.exceptions.SSLError,
requests.exceptions.Timeout,
) as e:
raise RuntimeError(
f"Unable to parse ingredients: error during HTTP request: {e}"
)

if not r.ok:
raise RuntimeError(
f"Unable to parse ingredients (non-200 status code): {r.status_code}, {r.text}"
)

response_data = r.json()

if response_data.get("status") != "success":
raise RuntimeError(f"Unable to parse ingredients: {response_data}")

return response_data["product"].get("ingredients", [])


class API:
def __init__(
Expand Down
75 changes: 75 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import re
import unittest

import pytest
Expand Down Expand Up @@ -105,6 +106,80 @@ def test_text_search(self):
)
self.assertEqual(res["products"], ["banania", "banania big"])

def test_parse_ingredients(self):
api = openfoodfacts.API(user_agent=TEST_USER_AGENT, version="v2")
ingredients_data = [
{
"ciqual_food_code": "18066",
"ecobalyse_code": "tap-water",
"id": "en:water",
"is_in_taxonomy": 1,
"percent_estimate": 75,
"percent_max": 100,
"percent_min": 50,
"text": "eau",
"vegan": "yes",
"vegetarian": "yes",
},
{
"ciqual_proxy_food_code": "31016",
"ecobalyse_code": "sugar",
"id": "en:sugar",
"is_in_taxonomy": 1,
"percent_estimate": 25,
"percent_max": 50,
"percent_min": 0,
"text": "sucre",
"vegan": "yes",
"vegetarian": "yes",
},
]
with requests_mock.mock() as mock:
response_data = {
"product": {"ingredients": ingredients_data},
"status": "success",
}
mock.patch(
"https://world.openfoodfacts.org/api/v3/product/test",
text=json.dumps(response_data),
)
res = api.product.parse_ingredients("eau, sucre", lang="fr")
assert res == ingredients_data

def test_parse_ingredients_fail(self):
api = openfoodfacts.API(user_agent=TEST_USER_AGENT, version="v2")
with requests_mock.mock() as mock:
response_data = {
"status": "fail",
}
mock.patch(
"https://world.openfoodfacts.org/api/v3/product/test",
text=json.dumps(response_data),
)

with pytest.raises(
RuntimeError,
match="Unable to parse ingredients: {'status': 'fail'}",
):
api.product.parse_ingredients("eau, sucre", lang="fr")

def test_parse_ingredients_fail_non_HTTP_200(self):
api = openfoodfacts.API(user_agent=TEST_USER_AGENT, version="v2")
with requests_mock.mock() as mock:
mock.patch(
"https://world.openfoodfacts.org/api/v3/product/test",
status_code=400,
text='{"error": "Bad Request"}',
)

with pytest.raises(
RuntimeError,
match=re.escape(
'Unable to parse ingredients (non-200 status code): 400, {"error": "Bad Request"}'
),
):
api.product.parse_ingredients("eau, sucre", lang="fr")


if __name__ == "__main__":
unittest.main()

0 comments on commit 10c42d2

Please sign in to comment.