Merge pull request #2810 from michael-genson/fix/translation-issues-w…

…hen-scraping fix: Translate ISO 8601 Datetime Durations During Scraping/Parsing/Migrating
mealie-recipes · Feb 8, 2024 · 3174216 · 3174216
2 parents 4c60feb + 9434208
commit 3174216
Show file tree

Hide file tree

Showing 14 changed files with 137 additions and 54 deletions.
diff --git a/mealie/lang/messages/en-US.json b/mealie/lang/messages/en-US.json
@@ -31,5 +31,14 @@
         "generic-updated-with-url": "{name} has been updated, {url}",
         "generic-duplicated": "{name} has been duplicated",
         "generic-deleted": "{name} has been deleted"
+    },
+    "datetime": {
+        "year": "year|years",
+        "day": "day|days",
+        "hour": "hour|hours",
+        "minute": "minute|minutes",
+        "second": "second|seconds",
+        "millisecond": "millisecond|milliseconds",
+        "microsecond": "microsecond|microseconds"
     }
 }
diff --git a/mealie/pkgs/i18n/json_provider.py b/mealie/pkgs/i18n/json_provider.py
@@ -1,6 +1,7 @@
 import json
 from dataclasses import dataclass
 from pathlib import Path
+from typing import cast
 
 
 @dataclass(slots=True)
@@ -13,6 +14,22 @@ def __init__(self, path: Path | dict):
         else:
             self.translations = path
 
+    def _parse_plurals(self, value: str, count: float):
+        # based off of: https://kazupon.github.io/vue-i18n/guide/pluralization.html
+
+        values = [v.strip() for v in value.split("|")]
+        if len(values) == 1:
+            return value
+        elif len(values) == 2:
+            return values[0] if count == 1 else values[1]
+        elif len(values) == 3:
+            if count == 0:
+                return values[0]
+            else:
+                return values[1] if count == 1 else values[2]
+        else:
+            return values[0]
+
     def t(self, key: str, default=None, **kwargs) -> str:
         keys = key.split(".")
 
@@ -30,9 +47,12 @@ def t(self, key: str, default=None, **kwargs) -> str:
 
             if i == last:
                 for key, value in kwargs.items():
-                    if not value:
+                    translation_value = cast(str, translation_value)
+                    if value is None:
                         value = ""
-                    translation_value = translation_value.replace("{" + key + "}", value)
-                return translation_value
+                    if key == "count":
+                        translation_value = self._parse_plurals(translation_value, float(value))
+                    translation_value = translation_value.replace("{" + key + "}", str(value))  # type: ignore
+                return translation_value  # type: ignore
 
         return default or key
diff --git a/mealie/routes/groups/controller_migrations.py b/mealie/routes/groups/controller_migrations.py
@@ -44,6 +44,7 @@ def start_data_migration(
             "user_id": self.user.id,
             "group_id": self.group_id,
             "add_migration_tag": add_migration_tag,
+            "translator": self.translator,
         }
 
         table: dict[SupportedMigrations, type[BaseMigrator]] = {

diff --git a/mealie/routes/recipe/recipe_crud_routes.py b/mealie/routes/recipe/recipe_crud_routes.py
@@ -164,7 +164,7 @@ def handle_exceptions(self, ex: Exception) -> None:
     async def parse_recipe_url(self, req: ScrapeRecipe):
         """Takes in a URL and attempts to scrape data and load it into the database"""
         try:
-            recipe, extras = await create_from_url(req.url)
+            recipe, extras = await create_from_url(req.url, self.translator)
         except ForceTimeoutException as e:
             raise HTTPException(
                 status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
@@ -193,7 +193,7 @@ async def parse_recipe_url(self, req: ScrapeRecipe):
     @router.post("/create-url/bulk", status_code=202)
     def parse_recipe_url_bulk(self, bulk: CreateRecipeByUrlBulk, bg_tasks: BackgroundTasks):
         """Takes in a URL and attempts to scrape data and load it into the database"""
-        bulk_scraper = RecipeBulkScraperService(self.service, self.repos, self.group)
+        bulk_scraper = RecipeBulkScraperService(self.service, self.repos, self.group, self.translator)
         report_id = bulk_scraper.get_report_id()
         bg_tasks.add_task(bulk_scraper.scrape, bulk)
 
@@ -208,7 +208,7 @@ def parse_recipe_url_bulk(self, bulk: CreateRecipeByUrlBulk, bg_tasks: Backgroun
     async def test_parse_recipe_url(self, url: ScrapeRecipeTest):
         # Debugger should produce the same result as the scraper sees before cleaning
         try:
-            if scraped_data := await RecipeScraperPackage(url.url).scrape_url():
+            if scraped_data := await RecipeScraperPackage(url.url, self.translator).scrape_url():
                 return scraped_data.schema.data
         except ForceTimeoutException as e:
             raise HTTPException(

diff --git a/mealie/services/migrations/_migration_base.py b/mealie/services/migrations/_migration_base.py
@@ -6,6 +6,7 @@
 
 from mealie.core import root_logger
 from mealie.core.exceptions import UnexpectedNone
+from mealie.lang.providers import Translator
 from mealie.repos.all_repositories import AllRepositories
 from mealie.schema.recipe import Recipe
 from mealie.schema.recipe.recipe_settings import RecipeSettings
@@ -35,12 +36,20 @@ class BaseMigrator(BaseService):
     helpers: DatabaseMigrationHelpers
 
     def __init__(
-        self, archive: Path, db: AllRepositories, session, user_id: UUID4, group_id: UUID, add_migration_tag: bool
+        self,
+        archive: Path,
+        db: AllRepositories,
+        session,
+        user_id: UUID4,
+        group_id: UUID,
+        add_migration_tag: bool,
+        translator: Translator,
     ):
         self.archive = archive
         self.db = db
         self.session = session
         self.add_migration_tag = add_migration_tag
+        self.translator = translator
 
         user = db.users.get_one(user_id)
         if not user:
@@ -229,6 +238,6 @@ def clean_recipe_dictionary(self, recipe_dict: dict) -> Recipe:
         with contextlib.suppress(KeyError):
             del recipe_dict["id"]
 
-        recipe_dict = cleaner.clean(recipe_dict, url=recipe_dict.get("org_url", None))
+        recipe_dict = cleaner.clean(recipe_dict, self.translator, url=recipe_dict.get("org_url", None))
 
         return Recipe(**recipe_dict)
diff --git a/mealie/services/scraper/cleaner.py b/mealie/services/scraper/cleaner.py
@@ -10,6 +10,7 @@
 from slugify import slugify
 
 from mealie.core.root_logger import get_logger
+from mealie.lang.providers import Translator
 
 logger = get_logger("recipe-scraper")
 
@@ -32,7 +33,7 @@
 """ Matches multiple new lines and removes erroneous white space """
 
 
-def clean(recipe_data: dict, url=None) -> dict:
+def clean(recipe_data: dict, translator: Translator, url=None) -> dict:
     """Main entrypoint to clean a recipe extracted from the web
     and format the data into an accectable format for the database
 
@@ -45,9 +46,9 @@ def clean(recipe_data: dict, url=None) -> dict:
     recipe_data["description"] = clean_string(recipe_data.get("description", ""))
 
     # Times
-    recipe_data["prepTime"] = clean_time(recipe_data.get("prepTime"))
-    recipe_data["performTime"] = clean_time(recipe_data.get("performTime"))
-    recipe_data["totalTime"] = clean_time(recipe_data.get("totalTime"))
+    recipe_data["prepTime"] = clean_time(recipe_data.get("prepTime"), translator)
+    recipe_data["performTime"] = clean_time(recipe_data.get("performTime"), translator)
+    recipe_data["totalTime"] = clean_time(recipe_data.get("totalTime"), translator)
     recipe_data["recipeCategory"] = clean_categories(recipe_data.get("recipeCategory", []))
     recipe_data["recipeYield"] = clean_yield(recipe_data.get("recipeYield"))
     recipe_data["recipeIngredient"] = clean_ingredients(recipe_data.get("recipeIngredient", []))
@@ -335,7 +336,7 @@ def clean_yield(yld: str | list[str] | None) -> str:
     return yld
 
 
-def clean_time(time_entry: str | timedelta | None) -> None | str:
+def clean_time(time_entry: str | timedelta | None, translator: Translator) -> None | str:
     """_summary_
 
     Supported Structures:
@@ -361,11 +362,11 @@ def clean_time(time_entry: str | timedelta | None) -> None | str:
 
             try:
                 time_delta_instructionsect = parse_duration(time_entry)
-                return pretty_print_timedelta(time_delta_instructionsect)
+                return pretty_print_timedelta(time_delta_instructionsect, translator)
             except ValueError:
                 return str(time_entry)
         case timedelta():
-            return pretty_print_timedelta(time_entry)
+            return pretty_print_timedelta(time_entry, translator)
         case {"minValue": str(value)}:
             return clean_time(value)
         case [str(), *_]:
@@ -374,7 +375,7 @@ def clean_time(time_entry: str | timedelta | None) -> None | str:
             # TODO: Not sure what to do here
             return str(time_entry)
         case _:
-            logger.warning("[SCRAPER] Unexpected type or structure for time_entrys")
+            logger.warning("[SCRAPER] Unexpected type or structure for variable time_entry")
             return None
 
 
@@ -408,25 +409,25 @@ def parse_duration(iso_duration: str) -> timedelta:
     return timedelta(**times)
 
 
-def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places=2):
+def pretty_print_timedelta(t: timedelta, translator: Translator, max_components=None, max_decimal_places=2):
     """
     Print a pretty string for a timedelta.
     For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'.
     Setting max_components to e.g. 1 will change this to '2.2 days', where the number of decimal
     points can also be set.
     """
-    time_scale_names_dict = {
-        timedelta(days=365): "year",
-        timedelta(days=1): "day",
-        timedelta(hours=1): "Hour",
-        timedelta(minutes=1): "Minute",
-        timedelta(seconds=1): "Second",
-        timedelta(microseconds=1000): "millisecond",
-        timedelta(microseconds=1): "microsecond",
+    time_scale_translation_keys_dict = {
+        timedelta(days=365): "datetime.year",
+        timedelta(days=1): "datetime.day",
+        timedelta(hours=1): "datetime.hour",
+        timedelta(minutes=1): "datetime.minute",
+        timedelta(seconds=1): "datetime.second",
+        timedelta(microseconds=1000): "datetime.millisecond",
+        timedelta(microseconds=1): "datetime.microsecond",
     }
     count = 0
     out_list = []
-    for scale, scale_name in time_scale_names_dict.items():
+    for scale, scale_translation_key in time_scale_translation_keys_dict.items():
         if t >= scale:
             count += 1
             n = t / scale if count == max_components else int(t / scale)
@@ -436,7 +437,8 @@ def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places
             if n_txt[-2:] == ".0":
                 n_txt = n_txt[:-2]
 
-            out_list.append(f"{n_txt} {scale_name}{'s' if n > 1 else ''}")
+            scale_value = translator.t(scale_translation_key, count=n)
+            out_list.append(f"{n_txt} {scale_value}")
 
     if out_list == []:
         return "none"

diff --git a/mealie/services/scraper/recipe_bulk_scraper.py b/mealie/services/scraper/recipe_bulk_scraper.py
@@ -2,6 +2,7 @@
 
 from pydantic import UUID4
 
+from mealie.lang.providers import Translator
 from mealie.repos.repository_factory import AllRepositories
 from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe
 from mealie.schema.reports.reports import (
@@ -20,11 +21,14 @@
 class RecipeBulkScraperService(BaseService):
     report_entries: list[ReportEntryCreate]
 
-    def __init__(self, service: RecipeService, repos: AllRepositories, group: GroupInDB) -> None:
+    def __init__(
+        self, service: RecipeService, repos: AllRepositories, group: GroupInDB, translator: Translator
+    ) -> None:
         self.service = service
         self.repos = repos
         self.group = group
         self.report_entries = []
+        self.translator = translator
 
         super().__init__()
 
@@ -81,7 +85,7 @@ async def scrape(self, urls: CreateRecipeByUrlBulk) -> None:
         async def _do(url: str) -> Recipe | None:
             async with sem:
                 try:
-                    recipe, _ = await create_from_url(url)
+                    recipe, _ = await create_from_url(url, self.translator)
                     return recipe
                 except Exception as e:
                     self.service.logger.error(f"failed to scrape url during bulk url import {url}")

diff --git a/mealie/services/scraper/recipe_scraper.py b/mealie/services/scraper/recipe_scraper.py
@@ -1,3 +1,4 @@
+from mealie.lang.providers import Translator
 from mealie.schema.recipe.recipe import Recipe
 from mealie.services.scraper.scraped_extras import ScrapedExtras
 
@@ -14,19 +15,20 @@ class RecipeScraper:
     # List of recipe scrapers. Note that order matters
     scrapers: list[type[ABCScraperStrategy]]
 
-    def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
+    def __init__(self, translator: Translator, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
         if scrapers is None:
             scrapers = DEFAULT_SCRAPER_STRATEGIES
 
         self.scrapers = scrapers
+        self.translator = translator
 
     async def scrape(self, url: str) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
         """
         Scrapes a recipe from the web.
         """
 
         for scraper_type in self.scrapers:
-            scraper = scraper_type(url)
+            scraper = scraper_type(url, self.translator)
             result = await scraper.parse()
 
             if result is not None:

diff --git a/mealie/services/scraper/scraper.py b/mealie/services/scraper/scraper.py
@@ -5,6 +5,7 @@
 from slugify import slugify
 
 from mealie.core.root_logger import get_logger
+from mealie.lang.providers import Translator
 from mealie.pkgs import cache
 from mealie.schema.recipe import Recipe
 from mealie.services.recipe.recipe_data_service import RecipeDataService
@@ -19,7 +20,7 @@ class ParserErrors(str, Enum):
     CONNECTION_ERROR = "CONNECTION_ERROR"
 
 
-async def create_from_url(url: str) -> tuple[Recipe, ScrapedExtras | None]:
+async def create_from_url(url: str, translator: Translator) -> tuple[Recipe, ScrapedExtras | None]:
     """Main entry point for generating a recipe from a URL. Pass in a URL and
     a Recipe object will be returned if successful.
 
@@ -29,7 +30,7 @@ async def create_from_url(url: str) -> tuple[Recipe, ScrapedExtras | None]:
     Returns:
         Recipe: Recipe Object
     """
-    scraper = RecipeScraper()
+    scraper = RecipeScraper(translator)
     new_recipe, extras = await scraper.scrape(url)
 
     if not new_recipe:

diff --git a/mealie/services/scraper/scraper_strategies.py b/mealie/services/scraper/scraper_strategies.py
@@ -11,6 +11,7 @@
 from w3lib.html import get_base_url
 
 from mealie.core.root_logger import get_logger
+from mealie.lang.providers import Translator
 from mealie.schema.recipe.recipe import Recipe, RecipeStep
 from mealie.services.scraper.scraped_extras import ScrapedExtras
 
@@ -77,9 +78,10 @@ class ABCScraperStrategy(ABC):
 
     url: str
 
-    def __init__(self, url: str) -> None:
+    def __init__(self, url: str, translator: Translator) -> None:
         self.logger = get_logger()
         self.url = url
+        self.translator = translator
 
     @abstractmethod
     async def get_html(self, url: str) -> str: ...
@@ -102,7 +104,9 @@ async def get_html(self, url: str) -> str:
         return await safe_scrape_html(url)
 
     def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
-        def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
+        def try_get_default(
+            func_call: Callable | None, get_attr: str, default: Any, clean_func=None, **clean_func_kwargs
+        ):
             value = default
 
             if func_call:
@@ -118,7 +122,7 @@ def try_get_default(func_call: Callable | None, get_attr: str, default: Any, cle
                     self.logger.error(f"Error parsing recipe attribute '{get_attr}'")
 
             if clean_func:
-                value = clean_func(value)
+                value = clean_func(value, **clean_func_kwargs)
 
             return value
 
@@ -138,9 +142,9 @@ def get_instructions() -> list[RecipeStep]:
             except TypeError:
                 return []
 
-        cook_time = try_get_default(None, "performTime", None, cleaner.clean_time) or try_get_default(
-            None, "cookTime", None, cleaner.clean_time
-        )
+        cook_time = try_get_default(
+            None, "performTime", None, cleaner.clean_time, translator=self.translator
+        ) or try_get_default(None, "cookTime", None, cleaner.clean_time, translator=self.translator)
 
         extras = ScrapedExtras()
 
@@ -157,8 +161,8 @@ def get_instructions() -> list[RecipeStep]:
                 scraped_data.ingredients, "recipeIngredient", [""], cleaner.clean_ingredients
             ),
             recipe_instructions=get_instructions(),
-            total_time=try_get_default(None, "totalTime", None, cleaner.clean_time),
-            prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time),
+            total_time=try_get_default(None, "totalTime", None, cleaner.clean_time, translator=self.translator),
+            prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time, translator=self.translator),
             perform_time=cook_time,
             org_url=url,
         )