Skip to content

Commit

Permalink
fix: get rid of useless caching (#9)
Browse files Browse the repository at this point in the history
* fix: get rid of caching

* fix: remove dead code

* fix: remove more dead code
  • Loading branch information
Ravencentric authored Sep 19, 2024
1 parent c0733e0 commit 9f3e51a
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 143 deletions.
40 changes: 4 additions & 36 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,8 @@ httpx = ">=0.27.2"
pydantic = ">=2.9.1"
beautifulsoup4 = ">=4.12.3"
lxml = ">=5.3.0"
hishel = ">=0.0.30"
torf = ">=4.2.7"
typing-extensions = ">=4.12.2"
platformdirs = ">=4.3.3"
xmltodict = ">=0.13.0"
strenum = { version = ">=0.4.15", python = "<3.11" }

[tool.poetry.group.dev.dependencies]
Expand Down
33 changes: 7 additions & 26 deletions src/pynyaa/_clients/_async.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
from __future__ import annotations

from io import BytesIO
from pathlib import Path
from typing import Any
from urllib.parse import urljoin

from hishel import AsyncCacheClient, AsyncFileStorage
from httpx import AsyncClient
from torf import Torrent
from typing_extensions import AsyncGenerator

from pynyaa._enums import Category, Filter, SortBy
from pynyaa._models import NyaaTorrentPage
from pynyaa._parser import parse_nyaa_search_results, parse_nyaa_torrent_page
from pynyaa._utils import get_user_cache_path


class AsyncNyaa:
def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwargs: Any) -> None:
def __init__(self, base_url: str = "https://nyaa.si/", **kwargs: Any) -> None:
"""
Async Nyaa client.
Expand All @@ -25,22 +23,12 @@ def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwa
base_url : str, optional
The base URL of Nyaa. Default is `https://nyaa.si/`.
This is used for constructing the full URL from relative URLs.
cache : bool, optional
Whether to enable caching. Default is `True`.
This will cache the page upon it's first request and then use the cached result
for any subsequent requests for the same page.
This helps in avoiding [HTTP 429 Error](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) but
do note some fields like seeders, leechers, and completed are constantly changing and thus caching would
mean you won't get the latest data on said fields.
kwargs : Any, optional
Keyword arguments to pass to the underlying [`httpx.AsyncClient`](https://www.python-httpx.org/api/#asyncclient)
used to make the GET request.
"""
self._base_url = base_url
self._cache = cache
self._kwargs = kwargs
self._extensions = {"force_cache": self._cache, "cache_disabled": not self._cache}
self._storage = AsyncFileStorage(base_path=get_user_cache_path())

@property
def base_url(self) -> str:
Expand All @@ -49,13 +37,6 @@ def base_url(self) -> str:
"""
return self._base_url

@property
def cache_path(self) -> Path:
"""
Path where cache files are stored.
"""
return get_user_cache_path()

async def get(self, page: int | str) -> NyaaTorrentPage:
"""
Retrieve information from a Nyaa torrent page.
Expand Down Expand Up @@ -85,14 +66,14 @@ async def get(self, page: int | str) -> NyaaTorrentPage:
url = page
nyaaid = page.split("/")[-1] # type: ignore

async with AsyncCacheClient(storage=self._storage, **self._kwargs) as client:
nyaa = await client.get(url, extensions=self._extensions)
async with AsyncClient(**self._kwargs) as client:
nyaa = await client.get(url)
nyaa.raise_for_status()

parsed = parse_nyaa_torrent_page(self._base_url, nyaa.text)

# Get the torrent file and convert it to a torf.Torrent object
response = await client.get(parsed["torrent_file"], extensions=self._extensions)
response = await client.get(parsed["torrent_file"])
response.raise_for_status()
torrent = Torrent.read_stream(BytesIO(response.content))

Expand Down Expand Up @@ -133,7 +114,7 @@ async def search(
NyaaTorrentPage
A NyaaTorrentPage object representing the retrieved data.
"""
async with AsyncCacheClient(storage=self._storage, **self._kwargs) as client:
async with AsyncClient(**self._kwargs) as client:
params: dict[str, Any] = dict(
f=filter,
c=category.id,
Expand All @@ -142,7 +123,7 @@ async def search(
o="asc" if reverse else "desc",
)

nyaa = await client.get(self._base_url, params=params, extensions=self._extensions)
nyaa = await client.get(self._base_url, params=params)
nyaa.raise_for_status()
results = parse_nyaa_search_results(nyaa.text)

Expand Down
33 changes: 7 additions & 26 deletions src/pynyaa/_clients/_sync.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
from __future__ import annotations

from io import BytesIO
from pathlib import Path
from typing import Any
from urllib.parse import urljoin

from hishel import CacheClient, FileStorage
from httpx import Client
from torf import Torrent
from typing_extensions import Generator

from pynyaa._enums import Category, Filter, SortBy
from pynyaa._models import NyaaTorrentPage
from pynyaa._parser import parse_nyaa_search_results, parse_nyaa_torrent_page
from pynyaa._utils import get_user_cache_path


class Nyaa:
def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwargs: Any) -> None:
def __init__(self, base_url: str = "https://nyaa.si/", **kwargs: Any) -> None:
"""
Nyaa client.
Expand All @@ -25,22 +23,12 @@ def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwa
base_url : str, optional
The base URL of Nyaa. Default is `https://nyaa.si/`.
This is used for constructing the full URL from relative URLs.
cache : bool, optional
Whether to enable caching. Default is `True`.
This will cache the page upon it's first request and then use the cached result
for any subsequent requests for the same page.
This helps in avoiding [HTTP 429 Error](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) but
do note some fields like seeders, leechers, and completed are constantly changing and thus caching would
mean you won't get the latest data on said fields.
kwargs : Any, optional
Keyword arguments to pass to the underlying [`httpx.Client`](https://www.python-httpx.org/api/#client)
used to make the GET request.
"""
self._base_url = base_url
self._cache = cache
self._kwargs = kwargs
self._extensions = {"force_cache": self._cache, "cache_disabled": not self._cache}
self._storage = FileStorage(base_path=get_user_cache_path())

@property
def base_url(self) -> str:
Expand All @@ -49,13 +37,6 @@ def base_url(self) -> str:
"""
return self._base_url

@property
def cache_path(self) -> Path:
"""
Path where cache files are stored.
"""
return get_user_cache_path()

def get(self, page: int | str) -> NyaaTorrentPage:
"""
Retrieve information from a Nyaa torrent page.
Expand Down Expand Up @@ -85,12 +66,12 @@ def get(self, page: int | str) -> NyaaTorrentPage:
url = page
nyaaid = page.split("/")[-1] # type: ignore

with CacheClient(storage=self._storage, **self._kwargs) as client:
nyaa = client.get(url, extensions=self._extensions).raise_for_status()
with Client(**self._kwargs) as client:
nyaa = client.get(url).raise_for_status()
parsed = parse_nyaa_torrent_page(self._base_url, nyaa.text)

# Get the torrent file and convert it to a torf.Torrent object
torrent_file = client.get(parsed["torrent_file"], extensions=self._extensions).raise_for_status().content
torrent_file = client.get(parsed["torrent_file"]).raise_for_status().content
torrent = Torrent.read_stream(BytesIO(torrent_file))

return NyaaTorrentPage(id=nyaaid, url=url, torrent=torrent, **parsed) # type: ignore
Expand Down Expand Up @@ -130,7 +111,7 @@ def search(
NyaaTorrentPage
A NyaaTorrentPage object representing the retrieved data.
"""
with CacheClient(storage=self._storage, **self._kwargs) as client:
with Client(**self._kwargs) as client:
params: dict[str, Any] = dict(
f=filter,
c=category.id,
Expand All @@ -139,7 +120,7 @@ def search(
o="asc" if reverse else "desc",
)

nyaa = client.get(self._base_url, params=params, extensions=self._extensions).raise_for_status()
nyaa = client.get(self._base_url, params=params).raise_for_status()
results = parse_nyaa_search_results(nyaa.text)

for link in results:
Expand Down
28 changes: 0 additions & 28 deletions src/pynyaa/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from bs4 import BeautifulSoup
from typing_extensions import Generator
from xmltodict import parse as xmltodict_parse


def parse_nyaa_torrent_page(base_url: str, html: str) -> dict[str, Any]:
Expand Down Expand Up @@ -119,33 +118,6 @@ def parse_nyaa_torrent_page(base_url: str, html: str) -> dict[str, Any]:
)


def parse_nyaa_rss_page(xml: str) -> Generator[str]:
"""
Parse the torrent links out of the RSS page
Parameters
----------
xml : str
Nyaa's RSS XML data as a string
Yields
------
str
The full URL of torrent page, in the order they were present.
"""
try:
items = xmltodict_parse(xml, encoding="utf-8")["rss"]["channel"]["item"]
except KeyError:
yield from ()
return

if isinstance(items, dict): # RSS returns single results as a dict instead of a list
items = [items]

for item in items:
yield item["guid"]["#text"]


def parse_nyaa_search_results(html: str, base_url: str = "https://nyaa.si") -> Generator[str]:
"""
Parses the HTML of a Nyaa search results page to extract torrent links.
Expand Down
18 changes: 0 additions & 18 deletions src/pynyaa/_utils.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,11 @@
from __future__ import annotations

from functools import cache
from typing import TYPE_CHECKING

from platformdirs import user_cache_path

if TYPE_CHECKING: # pragma: no cover
from pathlib import Path
from typing import Callable, ParamSpec, TypeVar

from pynyaa._types import CategoryID

P = ParamSpec("P")
T = TypeVar("T")

# functools.cache destroys the signature of whatever it wraps, so we use this to fix it.
# This is to only "fool" typecheckers and IDEs, this doesn't exist at runtime.
def cache(user_function: Callable[P, T], /) -> Callable[P, T]: ... # type: ignore


def get_user_cache_path() -> Path:
return user_cache_path(appname="pynyaa", ensure_exists=True).resolve()


@cache
def _get_category_id_from_name(key: str) -> CategoryID:
mapping: dict[str, CategoryID] = {
# All, c=0_0
Expand Down
3 changes: 0 additions & 3 deletions tests/test_async.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

from platformdirs import user_cache_path

from pynyaa import AsyncNyaa, Category

headers = {
Expand All @@ -12,7 +10,6 @@

async def test_properties() -> None:
assert client.base_url == "https://nyaa.si/"
assert client.cache_path == user_cache_path(appname="pynyaa").resolve()


async def test_nyaa_default() -> None:
Expand Down
3 changes: 0 additions & 3 deletions tests/test_sync.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

from platformdirs import user_cache_path

from pynyaa import Category, Nyaa

headers = {
Expand All @@ -12,7 +10,6 @@

def test_properties() -> None:
assert client.base_url == "https://nyaa.si/"
assert client.cache_path == user_cache_path(appname="pynyaa").resolve()


def test_nyaa_default() -> None:
Expand Down

0 comments on commit 9f3e51a

Please sign in to comment.