From 4e134e5c8b3377afefd249d26627eab18f3e161e Mon Sep 17 00:00:00 2001 From: jlssmt Date: Sun, 8 Sep 2024 13:02:09 +0200 Subject: [PATCH] add regex to url before scraping --- mealie/services/scraper/scraper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mealie/services/scraper/scraper.py b/mealie/services/scraper/scraper.py index a6e307ca820..75a29d60ec5 100644 --- a/mealie/services/scraper/scraper.py +++ b/mealie/services/scraper/scraper.py @@ -1,4 +1,5 @@ from enum import Enum +from re import search as regex_search from uuid import uuid4 from fastapi import HTTPException, status @@ -31,7 +32,13 @@ async def create_from_url(url: str, translator: Translator) -> tuple[Recipe, Scr Recipe: Recipe Object """ scraper = RecipeScraper(translator) - new_recipe, extras = await scraper.scrape(url) + + extracted_url = regex_search(r"(https?://|www\.)[^\s]+", url) + + if not extracted_url: + raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) + + new_recipe, extras = await scraper.scrape(extracted_url.group(0)) if not new_recipe: raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})