From 9f3e51a20c19be3b598ba1dff36fc1bf493a6725 Mon Sep 17 00:00:00 2001 From: Ravencentric <78981416+Ravencentric@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:29:36 +0530 Subject: [PATCH] fix: get rid of useless caching (#9) * fix: get rid of caching * fix: remove dead code * fix: remove more dead code --- poetry.lock | 40 ++++------------------------------- pyproject.toml | 3 --- src/pynyaa/_clients/_async.py | 33 ++++++----------------------- src/pynyaa/_clients/_sync.py | 33 ++++++----------------------- src/pynyaa/_parser.py | 28 ------------------------ src/pynyaa/_utils.py | 18 ---------------- tests/test_async.py | 3 --- tests/test_sync.py | 3 --- 8 files changed, 18 insertions(+), 143 deletions(-) diff --git a/poetry.lock b/poetry.lock index 98a92c5..53e48e2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -353,27 +353,6 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] -[[package]] -name = "hishel" -version = "0.0.30" -description = "Persistent cache implementation for httpx and httpcore" -optional = false -python-versions = ">=3.8" -files = [ - {file = "hishel-0.0.30-py3-none-any.whl", hash = "sha256:0c73a779a6b554b52dff75e5962057df25764fd798c31b9435ce6398b1b171c8"}, - {file = "hishel-0.0.30.tar.gz", hash = "sha256:656393ee77e9c39a0d6c527c74810e15d96e598dcb9b191f20a788608ceaca99"}, -] - -[package.dependencies] -httpx = ">=0.22.0" -typing-extensions = ">=4.8.0" - -[package.extras] -redis = ["redis (==5.0.1)"] -s3 = ["boto3 (>=1.15.0,<=1.15.3)", "boto3 (>=1.15.3)"] -sqlite = ["anysqlite (>=0.0.5)"] -yaml = ["pyyaml (==6.0.1)"] - [[package]] name = "httpcore" version = "1.0.5" @@ -953,13 +932,13 @@ files = [ [[package]] name = "platformdirs" -version = "4.3.3" +version = "4.3.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" files = [ - {file = "platformdirs-4.3.3-py3-none-any.whl", hash = "sha256:50a5450e2e84f44539718293cbb1da0a0885c9d14adf21b77bae4e66fc99d9b5"}, - {file = "platformdirs-4.3.3.tar.gz", hash = "sha256:d4e0b7d8ec176b341fb03cb11ca12d0276faa8c485f9cd218f613840463fc2c0"}, + {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, + {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, ] [package.extras] @@ -1602,18 +1581,7 @@ files = [ [package.extras] watchmedo = ["PyYAML (>=3.10)"] -[[package]] -name = "xmltodict" -version = "0.13.0" -description = "Makes working with XML feel like you are working with JSON" -optional = false -python-versions = ">=3.4" -files = [ - {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, - {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, -] - [metadata] lock-version = "2.0" python-versions = ">=3.10" -content-hash = "f003dcaa020e9c520f9614cde116f43fa21677b1abd96fada7c8182a706b7cb2" +content-hash = "532d680b907cbbe783992963a6106a8d723c158ac918cfb2d733d268c9607239" diff --git a/pyproject.toml b/pyproject.toml index ca2c491..79ae8bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,11 +25,8 @@ httpx = ">=0.27.2" pydantic = ">=2.9.1" beautifulsoup4 = ">=4.12.3" lxml = ">=5.3.0" -hishel = ">=0.0.30" torf = ">=4.2.7" typing-extensions = ">=4.12.2" -platformdirs = ">=4.3.3" -xmltodict = ">=0.13.0" strenum = { version = ">=0.4.15", python = "<3.11" } [tool.poetry.group.dev.dependencies] diff --git a/src/pynyaa/_clients/_async.py b/src/pynyaa/_clients/_async.py index e643e97..6804e6b 100644 --- a/src/pynyaa/_clients/_async.py +++ b/src/pynyaa/_clients/_async.py @@ -1,22 +1,20 @@ from __future__ import annotations from io import BytesIO -from pathlib import Path from typing import Any from urllib.parse import urljoin -from hishel import AsyncCacheClient, AsyncFileStorage +from httpx import AsyncClient from torf import Torrent from typing_extensions import AsyncGenerator from pynyaa._enums import Category, Filter, SortBy from pynyaa._models import NyaaTorrentPage from pynyaa._parser import parse_nyaa_search_results, parse_nyaa_torrent_page -from pynyaa._utils import get_user_cache_path class AsyncNyaa: - def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwargs: Any) -> None: + def __init__(self, base_url: str = "https://nyaa.si/", **kwargs: Any) -> None: """ Async Nyaa client. @@ -25,22 +23,12 @@ def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwa base_url : str, optional The base URL of Nyaa. Default is `https://nyaa.si/`. This is used for constructing the full URL from relative URLs. - cache : bool, optional - Whether to enable caching. Default is `True`. - This will cache the page upon it's first request and then use the cached result - for any subsequent requests for the same page. - This helps in avoiding [HTTP 429 Error](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) but - do note some fields like seeders, leechers, and completed are constantly changing and thus caching would - mean you won't get the latest data on said fields. kwargs : Any, optional Keyword arguments to pass to the underlying [`httpx.AsyncClient`](https://www.python-httpx.org/api/#asyncclient) used to make the GET request. """ self._base_url = base_url - self._cache = cache self._kwargs = kwargs - self._extensions = {"force_cache": self._cache, "cache_disabled": not self._cache} - self._storage = AsyncFileStorage(base_path=get_user_cache_path()) @property def base_url(self) -> str: @@ -49,13 +37,6 @@ def base_url(self) -> str: """ return self._base_url - @property - def cache_path(self) -> Path: - """ - Path where cache files are stored. - """ - return get_user_cache_path() - async def get(self, page: int | str) -> NyaaTorrentPage: """ Retrieve information from a Nyaa torrent page. @@ -85,14 +66,14 @@ async def get(self, page: int | str) -> NyaaTorrentPage: url = page nyaaid = page.split("/")[-1] # type: ignore - async with AsyncCacheClient(storage=self._storage, **self._kwargs) as client: - nyaa = await client.get(url, extensions=self._extensions) + async with AsyncClient(**self._kwargs) as client: + nyaa = await client.get(url) nyaa.raise_for_status() parsed = parse_nyaa_torrent_page(self._base_url, nyaa.text) # Get the torrent file and convert it to a torf.Torrent object - response = await client.get(parsed["torrent_file"], extensions=self._extensions) + response = await client.get(parsed["torrent_file"]) response.raise_for_status() torrent = Torrent.read_stream(BytesIO(response.content)) @@ -133,7 +114,7 @@ async def search( NyaaTorrentPage A NyaaTorrentPage object representing the retrieved data. """ - async with AsyncCacheClient(storage=self._storage, **self._kwargs) as client: + async with AsyncClient(**self._kwargs) as client: params: dict[str, Any] = dict( f=filter, c=category.id, @@ -142,7 +123,7 @@ async def search( o="asc" if reverse else "desc", ) - nyaa = await client.get(self._base_url, params=params, extensions=self._extensions) + nyaa = await client.get(self._base_url, params=params) nyaa.raise_for_status() results = parse_nyaa_search_results(nyaa.text) diff --git a/src/pynyaa/_clients/_sync.py b/src/pynyaa/_clients/_sync.py index 082a5e1..7bc4296 100644 --- a/src/pynyaa/_clients/_sync.py +++ b/src/pynyaa/_clients/_sync.py @@ -1,22 +1,20 @@ from __future__ import annotations from io import BytesIO -from pathlib import Path from typing import Any from urllib.parse import urljoin -from hishel import CacheClient, FileStorage +from httpx import Client from torf import Torrent from typing_extensions import Generator from pynyaa._enums import Category, Filter, SortBy from pynyaa._models import NyaaTorrentPage from pynyaa._parser import parse_nyaa_search_results, parse_nyaa_torrent_page -from pynyaa._utils import get_user_cache_path class Nyaa: - def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwargs: Any) -> None: + def __init__(self, base_url: str = "https://nyaa.si/", **kwargs: Any) -> None: """ Nyaa client. @@ -25,22 +23,12 @@ def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwa base_url : str, optional The base URL of Nyaa. Default is `https://nyaa.si/`. This is used for constructing the full URL from relative URLs. - cache : bool, optional - Whether to enable caching. Default is `True`. - This will cache the page upon it's first request and then use the cached result - for any subsequent requests for the same page. - This helps in avoiding [HTTP 429 Error](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) but - do note some fields like seeders, leechers, and completed are constantly changing and thus caching would - mean you won't get the latest data on said fields. kwargs : Any, optional Keyword arguments to pass to the underlying [`httpx.Client`](https://www.python-httpx.org/api/#client) used to make the GET request. """ self._base_url = base_url - self._cache = cache self._kwargs = kwargs - self._extensions = {"force_cache": self._cache, "cache_disabled": not self._cache} - self._storage = FileStorage(base_path=get_user_cache_path()) @property def base_url(self) -> str: @@ -49,13 +37,6 @@ def base_url(self) -> str: """ return self._base_url - @property - def cache_path(self) -> Path: - """ - Path where cache files are stored. - """ - return get_user_cache_path() - def get(self, page: int | str) -> NyaaTorrentPage: """ Retrieve information from a Nyaa torrent page. @@ -85,12 +66,12 @@ def get(self, page: int | str) -> NyaaTorrentPage: url = page nyaaid = page.split("/")[-1] # type: ignore - with CacheClient(storage=self._storage, **self._kwargs) as client: - nyaa = client.get(url, extensions=self._extensions).raise_for_status() + with Client(**self._kwargs) as client: + nyaa = client.get(url).raise_for_status() parsed = parse_nyaa_torrent_page(self._base_url, nyaa.text) # Get the torrent file and convert it to a torf.Torrent object - torrent_file = client.get(parsed["torrent_file"], extensions=self._extensions).raise_for_status().content + torrent_file = client.get(parsed["torrent_file"]).raise_for_status().content torrent = Torrent.read_stream(BytesIO(torrent_file)) return NyaaTorrentPage(id=nyaaid, url=url, torrent=torrent, **parsed) # type: ignore @@ -130,7 +111,7 @@ def search( NyaaTorrentPage A NyaaTorrentPage object representing the retrieved data. """ - with CacheClient(storage=self._storage, **self._kwargs) as client: + with Client(**self._kwargs) as client: params: dict[str, Any] = dict( f=filter, c=category.id, @@ -139,7 +120,7 @@ def search( o="asc" if reverse else "desc", ) - nyaa = client.get(self._base_url, params=params, extensions=self._extensions).raise_for_status() + nyaa = client.get(self._base_url, params=params).raise_for_status() results = parse_nyaa_search_results(nyaa.text) for link in results: diff --git a/src/pynyaa/_parser.py b/src/pynyaa/_parser.py index 66a7944..5bee959 100644 --- a/src/pynyaa/_parser.py +++ b/src/pynyaa/_parser.py @@ -6,7 +6,6 @@ from bs4 import BeautifulSoup from typing_extensions import Generator -from xmltodict import parse as xmltodict_parse def parse_nyaa_torrent_page(base_url: str, html: str) -> dict[str, Any]: @@ -119,33 +118,6 @@ def parse_nyaa_torrent_page(base_url: str, html: str) -> dict[str, Any]: ) -def parse_nyaa_rss_page(xml: str) -> Generator[str]: - """ - Parse the torrent links out of the RSS page - - Parameters - ---------- - xml : str - Nyaa's RSS XML data as a string - - Yields - ------ - str - The full URL of torrent page, in the order they were present. - """ - try: - items = xmltodict_parse(xml, encoding="utf-8")["rss"]["channel"]["item"] - except KeyError: - yield from () - return - - if isinstance(items, dict): # RSS returns single results as a dict instead of a list - items = [items] - - for item in items: - yield item["guid"]["#text"] - - def parse_nyaa_search_results(html: str, base_url: str = "https://nyaa.si") -> Generator[str]: """ Parses the HTML of a Nyaa search results page to extract torrent links. diff --git a/src/pynyaa/_utils.py b/src/pynyaa/_utils.py index 149c2b9..c64d465 100644 --- a/src/pynyaa/_utils.py +++ b/src/pynyaa/_utils.py @@ -1,29 +1,11 @@ from __future__ import annotations -from functools import cache from typing import TYPE_CHECKING -from platformdirs import user_cache_path - if TYPE_CHECKING: # pragma: no cover - from pathlib import Path - from typing import Callable, ParamSpec, TypeVar - from pynyaa._types import CategoryID - P = ParamSpec("P") - T = TypeVar("T") - - # functools.cache destroys the signature of whatever it wraps, so we use this to fix it. - # This is to only "fool" typecheckers and IDEs, this doesn't exist at runtime. - def cache(user_function: Callable[P, T], /) -> Callable[P, T]: ... # type: ignore - - -def get_user_cache_path() -> Path: - return user_cache_path(appname="pynyaa", ensure_exists=True).resolve() - -@cache def _get_category_id_from_name(key: str) -> CategoryID: mapping: dict[str, CategoryID] = { # All, c=0_0 diff --git a/tests/test_async.py b/tests/test_async.py index 3cfe21a..bb113ae 100644 --- a/tests/test_async.py +++ b/tests/test_async.py @@ -1,7 +1,5 @@ from __future__ import annotations -from platformdirs import user_cache_path - from pynyaa import AsyncNyaa, Category headers = { @@ -12,7 +10,6 @@ async def test_properties() -> None: assert client.base_url == "https://nyaa.si/" - assert client.cache_path == user_cache_path(appname="pynyaa").resolve() async def test_nyaa_default() -> None: diff --git a/tests/test_sync.py b/tests/test_sync.py index ee0badc..9d3d7bb 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -1,7 +1,5 @@ from __future__ import annotations -from platformdirs import user_cache_path - from pynyaa import Category, Nyaa headers = { @@ -12,7 +10,6 @@ def test_properties() -> None: assert client.base_url == "https://nyaa.si/" - assert client.cache_path == user_cache_path(appname="pynyaa").resolve() def test_nyaa_default() -> None: