diff --git a/README.md b/README.md index 992b7c7..e5430f5 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ Le scripte peut désormais être configuré en utilisant des variables d'environ - `GUNICORN_TIMEOUT`: Le délai d'attente pour les requêtes Gunicorn. Par défaut, il est défini sur '120'. - `LOG_PATH`: Le chemin du fichier journal pour le serveur proxy. Par défaut, il est défini sur '/app/config/logs/rss-proxy.log'. Il y a une rotaion de fichier journal déja configuré. Attention c'est le chemin dans le container. - `LOG_LEVEL`: Le niveau de journalisation pour le serveur proxy. Par défaut, il est défini sur 'INFO'. +- `LOG_REDACTED`: Si les journaux doivent être anonymisés. Par défaut, il est défini sur 'True'. - `DB_PATH`: Le chemin de la base de données SQLite pour le serveur proxy. Par défaut, il est défini sur '/app/config/rss-proxy.db'. Attention c'est le chemin dans le container. - `SECRET_KEY`: La clé secrète utilisée pour la signature des cookies de session. Par défaut, il est défini sur 'superkey_that_can_be_changed'. Sécurité suplémentaire pour chiffré la base de donnée. diff --git a/poetry.lock b/poetry.lock index 0ce1db9..82f8892 100644 --- a/poetry.lock +++ b/poetry.lock @@ -985,6 +985,42 @@ files = [ {file = "stackprinter-0.2.12.tar.gz", hash = "sha256:271efc75ebdcc1554e58168ea7779f98066d54a325f57c7dc19f10fa998ef01e"}, ] +[[package]] +name = "tenacity" +version = "8.3.0" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tenacity-8.3.0-py3-none-any.whl", hash = "sha256:3649f6443dbc0d9b01b9d8020a9c4ec7a1ff5f6f3c6c8a036ef371f573fe9185"}, + {file = "tenacity-8.3.0.tar.gz", hash = "sha256:953d4e6ad24357bceffbc9707bc74349aca9d245f68eb65419cf0c249a1949a2"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + +[[package]] +name = "timeout-decorator" +version = "0.5.0" +description = "Timeout decorator" +optional = false +python-versions = "*" +files = [ + {file = "timeout-decorator-0.5.0.tar.gz", hash = "sha256:6a2f2f58db1c5b24a2cc79de6345760377ad8bdc13813f5265f6c3e63d16b3d7"}, +] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -1047,4 +1083,4 @@ dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "5a24f7721e06505143afe012ff41f7df60dadef55de2035a3a0279c1f914905c" +content-hash = "7c1970ebeb73b60551a56d8f9fa496223b31431056e87c7a3cac72840c9e9250" diff --git a/pyproject.toml b/pyproject.toml index b008750..525758f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ygg-rss-proxy" -version = "1.0.0-alpha" +version = "1.3.0-alpha" description = "Proxy for yggtorrent rss feeds" authors = ["LimeDrive <limecat@limedrive.eu>"] readme = "README.md" @@ -18,6 +18,9 @@ flask-session = "^0.8.0" flask-sqlalchemy = "^3.1.1" orjson = "^3.10.4" stackprinter = "^0.2.12" +timeout-decorator = "^0.5.0" +tenacity = "^8.3.0" +toml = "^0.10.2" [build-system] diff --git a/ygg_rss_proxy/__init__.py b/ygg_rss_proxy/__init__.py index 56c9aa1..efd9a5a 100644 --- a/ygg_rss_proxy/__init__.py +++ b/ygg_rss_proxy/__init__.py @@ -3,6 +3,7 @@ from ygg_rss_proxy.app import app from ygg_rss_proxy.logging_config import logger from ygg_rss_proxy.run_gunicorn import GunicornApp +from ygg_rss_proxy.version import get_version options = { "bind": f"{settings.gunicorn_binder}:{settings.gunicorn_port}", @@ -11,11 +12,23 @@ "timeout": settings.gunicorn_timeout, } +try: + version = get_version() + logger.info("----------------------------------------------------------") + logger.info(f"ygg_rss_proxy version: {version}") +except: + logger.info("----------------------------------------------------------") + logger.info("ygg_rss_proxy version: unknown") + logger.info("----------------------------------------------------------") logger.info("Checking directories...") logger.info("----------------------------------------------------------") -directories = ["/app/config", os.path.dirname(settings.db_path), os.path.dirname(settings.log_path)] +directories = [ + "/app/config", + os.path.dirname(settings.db_path), + os.path.dirname(settings.log_path), +] for directory in directories: if not os.path.exists(directory): os.makedirs(directory) diff --git a/ygg_rss_proxy/app.py b/ygg_rss_proxy/app.py index 22e5de6..7085ebc 100644 --- a/ygg_rss_proxy/app.py +++ b/ygg_rss_proxy/app.py @@ -1,6 +1,7 @@ from flask import Flask, request, jsonify, Response from flask_session import Session from flask_sqlalchemy import SQLAlchemy +from timeout_decorator import TimeoutError from ygg_rss_proxy.rss import get_rss_feed, replace_torrent_links from ygg_rss_proxy.settings import settings from ygg_rss_proxy.logging_config import logger @@ -21,6 +22,10 @@ app.config["SESSION_USE_SIGNER"] = True app.config["SESSION_KEY_PREFIX"] = "session:" app.config["SECRET_KEY"] = settings.secret_key +app.config["SQLALCHEMY_ENGINE_OPTIONS"] = { + "connect_args": {"timeout": settings.db_timeout} +} + db = SQLAlchemy(app) app.config["SESSION_SQLALCHEMY"] = db @@ -40,7 +45,10 @@ def proxy_rss(): query_params = request.query_string.decode("utf-8") ygg_session = get_session() - response = get_rss_feed(query_params, requests_session=ygg_session) + try: + response = get_rss_feed(query_params, requests_session=ygg_session) + except TimeoutError as e: + logger.error(f"Timeout Err: {e}") if response.status_code in [401, 403, 307, 301]: # Session may have expired, re-authenticate and retry the request diff --git a/ygg_rss_proxy/auth.py b/ygg_rss_proxy/auth.py index ff61d2c..ed00f55 100644 --- a/ygg_rss_proxy/auth.py +++ b/ygg_rss_proxy/auth.py @@ -1,4 +1,6 @@ import requests +import timeout_decorator +from tenacity import retry, stop_after_attempt, wait_fixed from ygg_rss_proxy.fspy import FlareSolverr from ygg_rss_proxy.settings import settings from ygg_rss_proxy.logging_config import logger @@ -72,10 +74,15 @@ def ygg_cloudflare_login( raise Exception("Failed to connect to FlareSolverr") response = fs_solver.request_get(url="https://www.ygg.re") - logger.debug(f"FlareSolverr response: {response}") + logger.debug(f"FlareSolverr message: {response.message}") + logger.debug(f"FlareSolverr status: {response.solution.status}") + logger.debug(f"FlareSolverr user-agent: {response.solution.user_agent}") + logger.debug(f"FlareSolverr cookies: {response.solution.cookies}") if not response.solution.cookies: - logger.error(f"Failed to get cookies from flaresolverr : {response.solution.cookies}") + logger.error( + f"Failed to get cookies from flaresolverr : {response.solution.cookies}" + ) raise Exception("Failed to get cookies from flaresolverr") if response.message == "Challenge solved!": @@ -110,7 +117,7 @@ def ygg_cloudflare_login( break # Check if cf_clearance cookie is found if not cf_clearance_found: - logger.debug(f"Response : {response}") + logger.debug(f"Full flaresolverr Response : {response}") logger.error(f"Failed to get cf_clearance from flaresolverr") raise Exception("Failed to get cf_clearance from flaresolverr") @@ -122,11 +129,19 @@ def ygg_cloudflare_login( return session else: logger.error( - f"Failed to authenticate to YGG with status code : {response.solution.status}" + f"Failed to authenticate to YGG using flaresolverr: {response.solution.status}" ) - raise Exception("Failed to authenticate to YGG") + raise Exception("Failed to authenticate to YGG using flaresolverr") +@retry( + stop=stop_after_attempt(3), + wait=wait_fixed(0.3), + retry_error_callback=lambda retry_state: Exception( + "Failed to connect to YGG after retries" + ), +) +@timeout_decorator.timeout(90, exception_message=f"Timeout after 90 seconds") def ygg_login( session=requests.Session(), ygg_playload: dict = ygg_playload ) -> requests.Session: @@ -149,7 +164,7 @@ def ygg_login( logger.info("Cloudflare is enabled, using FlareSolverr") return ygg_cloudflare_login(session, ygg_playload) else: - logger.info("Cloudflare is disabled, using basic login") + logger.info("Cloudflare is disabled, using Basic Login") return ygg_basic_login(session, ygg_playload) diff --git a/ygg_rss_proxy/logging_config.py b/ygg_rss_proxy/logging_config.py index de28fae..fe110cb 100644 --- a/ygg_rss_proxy/logging_config.py +++ b/ygg_rss_proxy/logging_config.py @@ -7,6 +7,7 @@ import re import stackprinter +REDACTED = settings.log_redacted class SecretFilter: def __init__(self, patterns): @@ -25,11 +26,11 @@ def redact(self, message): patterns = [ - r"passkey=[^&\s]+", + r"passkey=([^&\s]+)", r"'value': '([^']+)'", - r"value=\'[^\']+\'", - r"cf_clearance=[^;\s]+", - r"ygg_=[^;\s]+", + r"value='([^']+)'", + r"cf_clearance=([^;\s]+)", + r"ygg_=([^;\s]+)" ] logger.remove() @@ -38,12 +39,14 @@ def redact(self, message): def format(record): format_ = "{time} {level} {function} {message}\n" pats = [ - r"passkey=[^&\s]+", + r"passkey=([^&\s]+)", r"'value': '([^']+)'", - r"value=\'[^\']+\'", - r"cf_clearance=[^;\s]+", - r"ygg_=[^;\s]+", - r"[A-Za-z0-9]+\' \[GET\] of ygg_rss_proxy\.app>", + r"value='([^']+)'", + r"cf_clearance=([^;\s]+)", + r"ygg_=([^;\s]+)", + r"([A-Za-z0-9]+)' \[GET\] of ygg_rss_proxy\.app>", + r"session:[A-Za-z0-9_-]+", + r"(\{'session_data':\s*b'|<Session data b'|serialized_session_data\s*=\s*b')[\s\S]*?\.(?=\s)", ] if record["exception"] is not None: @@ -51,8 +54,9 @@ def format(record): record["exception"], suppressed_vars=[r".*ygg_playload.*", r".*query_params.*"], ) - for pat in pats: - stack = re.sub(pat, "**<REDACTED>**", stack) + if REDACTED: + for pat in pats: + stack = re.sub(pat, "**<REDACTED>**", stack) record["extra"]["stack"] = stack format_ += "{extra[stack]}\n" return format_ @@ -63,7 +67,7 @@ def format(record): format=format, level=settings.log_level.value, colorize=True, - filter=SecretFilter(patterns), + filter=SecretFilter(patterns) if REDACTED else None, ) logger.add( @@ -74,7 +78,7 @@ def format(record): retention="5 days", compression="zip", enqueue=True, - filter=SecretFilter(patterns), + filter=SecretFilter(patterns) if REDACTED else None, ) diff --git a/ygg_rss_proxy/rss.py b/ygg_rss_proxy/rss.py index 704a3ad..3c9692f 100644 --- a/ygg_rss_proxy/rss.py +++ b/ygg_rss_proxy/rss.py @@ -2,6 +2,7 @@ from lxml import etree from ygg_rss_proxy.settings import settings import requests +import timeout_decorator # URLs URL_RSS: str = f"{settings.ygg_url}/rss" @@ -9,12 +10,14 @@ URL_PROXY = f"{settings.rss_shema}://{settings.rss_host}:{settings.rss_port}" +@timeout_decorator.timeout(30, exception_message=f"Timeout after 30 seconds") def get_rss_feed(query_params, requests_session: requests.Session) -> requests.Response: rss_url_with_params = f"{URL_RSS}?{query_params}" response = requests_session.get(rss_url_with_params) return response +@timeout_decorator.timeout(30, exception_message=f"Timeout after 30 seconds") def replace_torrent_links(rss_content) -> Any: parser = etree.XMLParser(recover=True) tree = etree.fromstring(rss_content, parser) diff --git a/ygg_rss_proxy/run_gunicorn.py b/ygg_rss_proxy/run_gunicorn.py index a2e4164..b63c6f8 100644 --- a/ygg_rss_proxy/run_gunicorn.py +++ b/ygg_rss_proxy/run_gunicorn.py @@ -2,6 +2,7 @@ from gunicorn.app.base import BaseApplication from ygg_rss_proxy.app import app + class GunicornApp(BaseApplication): def __init__(self, app, options=None): self.options = options or {} diff --git a/ygg_rss_proxy/session_manager.py b/ygg_rss_proxy/session_manager.py index 668e2df..b0552e8 100644 --- a/ygg_rss_proxy/session_manager.py +++ b/ygg_rss_proxy/session_manager.py @@ -2,12 +2,24 @@ import pickle from flask import session from sqlalchemy import text +import timeout_decorator +from tenacity import retry, stop_after_attempt, wait_fixed from requests.utils import dict_from_cookiejar, cookiejar_from_dict from ygg_rss_proxy.auth import ygg_login from ygg_rss_proxy.logging_config import logger + +@timeout_decorator.timeout(3, exception_message=f"Timeout after 3 seconds") +@retry( + stop=stop_after_attempt(3), + wait=wait_fixed(0.3), + retry_error_callback=lambda retry_state: Exception( + "Failed to connect to the database after retries" + ), +) def check_database_connection(): from ygg_rss_proxy.app import db + try: with db.engine.connect() as connection: connection.execute(text("SELECT 1")) @@ -15,6 +27,15 @@ def check_database_connection(): logger.error(f"Failed to connect to the database: {e}") raise Exception("Failed to connect to the database") + +@retry( + stop=stop_after_attempt(3), + wait=wait_fixed(0.3), + retry_error_callback=lambda retry_state: Exception( + "Failed to connect to the database after retries" + ), +) +@timeout_decorator.timeout(90, exception_message=f"Timeout after 90 seconds") def new_session() -> requests.Session: """ This function creates a new session by logging into YGG and saving the session data. @@ -22,7 +43,6 @@ def new_session() -> requests.Session: Returns: requests.Session: The newly created session. """ - check_database_connection() ygg_session = ygg_login() session_data = { "cookies": pickle.dumps(dict_from_cookiejar(ygg_session.cookies)), @@ -32,6 +52,13 @@ def new_session() -> requests.Session: return ygg_session +@retry( + stop=stop_after_attempt(3), + wait=wait_fixed(0.3), + retry_error_callback=lambda retry_state: Exception( + "Failed to connect to the database after retries" + ), +) def init_session() -> None: """ This function initializes a session by checking if session data exists. @@ -45,6 +72,13 @@ def init_session() -> None: new_session() +@retry( + stop=stop_after_attempt(3), + wait=wait_fixed(0.3), + retry_error_callback=lambda retry_state: Exception( + "Failed to connect to the database after retries" + ), +) def get_session() -> requests.Session: """ This function retrieves a session by checking if session data exists. @@ -54,7 +88,6 @@ def get_session() -> requests.Session: Returns: requests.Session: The retrieved or newly created session. """ - check_database_connection() if "session_data" in session: session_data = pickle.loads(session["session_data"]) if "cookies" not in session_data or "headers" not in session_data: @@ -70,6 +103,13 @@ def get_session() -> requests.Session: return new_session() +@retry( + stop=stop_after_attempt(3), + wait=wait_fixed(0.3), + retry_error_callback=lambda retry_state: Exception( + "Failed to connect to the database after retries" + ), +) def save_session(requests_session: requests.Session) -> None: """ This function saves the session data of a requests.Session object into the Flask session. @@ -81,7 +121,6 @@ def save_session(requests_session: requests.Session) -> None: Returns: None """ - check_database_connection() session_data = { "cookies": pickle.dumps(dict_from_cookiejar(requests_session.cookies)), "headers": pickle.dumps(dict(requests_session.headers)), diff --git a/ygg_rss_proxy/settings.py b/ygg_rss_proxy/settings.py index c545dcb..7b41710 100644 --- a/ygg_rss_proxy/settings.py +++ b/ygg_rss_proxy/settings.py @@ -41,10 +41,14 @@ class Settings(BaseSettings): # LOGGING log_level: LogLevel = LogLevel.INFO log_path: str = "/app/config/logs/rss-proxy.log" + log_redacted: bool = True # FLASK SESSIONS secret_key: str = "superkey_that_can_be_changed" + + # SQLITE db_path: str = "/app/config/rss-proxy.db" + db_timeout: int = 15 # User-Agent user_agent: str = ( @@ -56,6 +60,9 @@ class Settings(BaseSettings): dev_host: str = "0.0.0.0" dev_port: int = 8080 + # Version + version_path: str = "/app/pyproject.toml" + model_config = SettingsConfigDict( env_file=".env", secrets_dir="/run/secrets", env_file_encoding="utf-8" ) diff --git a/ygg_rss_proxy/version.py b/ygg_rss_proxy/version.py new file mode 100644 index 0000000..eb585e3 --- /dev/null +++ b/ygg_rss_proxy/version.py @@ -0,0 +1,12 @@ +import toml +from ygg_rss_proxy.settings import settings + + +def get_version(): + with open(settings.version_path, "r") as f: + pyproject_data = toml.load(f) + return pyproject_data["tool"]["poetry"]["version"] + + +if __name__ == "__main__": + pass