Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: get rid of useless caching #9

Merged
merged 3 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 4 additions & 36 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,8 @@ httpx = ">=0.27.2"
pydantic = ">=2.9.1"
beautifulsoup4 = ">=4.12.3"
lxml = ">=5.3.0"
hishel = ">=0.0.30"
torf = ">=4.2.7"
typing-extensions = ">=4.12.2"
platformdirs = ">=4.3.3"
xmltodict = ">=0.13.0"
strenum = { version = ">=0.4.15", python = "<3.11" }

[tool.poetry.group.dev.dependencies]
Expand Down
33 changes: 7 additions & 26 deletions src/pynyaa/_clients/_async.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
from __future__ import annotations

from io import BytesIO
from pathlib import Path
from typing import Any
from urllib.parse import urljoin

from hishel import AsyncCacheClient, AsyncFileStorage
from httpx import AsyncClient
from torf import Torrent
from typing_extensions import AsyncGenerator

from pynyaa._enums import Category, Filter, SortBy
from pynyaa._models import NyaaTorrentPage
from pynyaa._parser import parse_nyaa_search_results, parse_nyaa_torrent_page
from pynyaa._utils import get_user_cache_path


class AsyncNyaa:
def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwargs: Any) -> None:
def __init__(self, base_url: str = "https://nyaa.si/", **kwargs: Any) -> None:
"""
Async Nyaa client.

Expand All @@ -25,22 +23,12 @@ def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwa
base_url : str, optional
The base URL of Nyaa. Default is `https://nyaa.si/`.
This is used for constructing the full URL from relative URLs.
cache : bool, optional
Whether to enable caching. Default is `True`.
This will cache the page upon it's first request and then use the cached result
for any subsequent requests for the same page.
This helps in avoiding [HTTP 429 Error](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) but
do note some fields like seeders, leechers, and completed are constantly changing and thus caching would
mean you won't get the latest data on said fields.
kwargs : Any, optional
Keyword arguments to pass to the underlying [`httpx.AsyncClient`](https://www.python-httpx.org/api/#asyncclient)
used to make the GET request.
"""
self._base_url = base_url
self._cache = cache
self._kwargs = kwargs
self._extensions = {"force_cache": self._cache, "cache_disabled": not self._cache}
self._storage = AsyncFileStorage(base_path=get_user_cache_path())

@property
def base_url(self) -> str:
Expand All @@ -49,13 +37,6 @@ def base_url(self) -> str:
"""
return self._base_url

@property
def cache_path(self) -> Path:
"""
Path where cache files are stored.
"""
return get_user_cache_path()

async def get(self, page: int | str) -> NyaaTorrentPage:
"""
Retrieve information from a Nyaa torrent page.
Expand Down Expand Up @@ -85,14 +66,14 @@ async def get(self, page: int | str) -> NyaaTorrentPage:
url = page
nyaaid = page.split("/")[-1] # type: ignore

async with AsyncCacheClient(storage=self._storage, **self._kwargs) as client:
nyaa = await client.get(url, extensions=self._extensions)
async with AsyncClient(**self._kwargs) as client:
nyaa = await client.get(url)
nyaa.raise_for_status()

parsed = parse_nyaa_torrent_page(self._base_url, nyaa.text)

# Get the torrent file and convert it to a torf.Torrent object
response = await client.get(parsed["torrent_file"], extensions=self._extensions)
response = await client.get(parsed["torrent_file"])
response.raise_for_status()
torrent = Torrent.read_stream(BytesIO(response.content))

Expand Down Expand Up @@ -133,7 +114,7 @@ async def search(
NyaaTorrentPage
A NyaaTorrentPage object representing the retrieved data.
"""
async with AsyncCacheClient(storage=self._storage, **self._kwargs) as client:
async with AsyncClient(**self._kwargs) as client:
params: dict[str, Any] = dict(
f=filter,
c=category.id,
Expand All @@ -142,7 +123,7 @@ async def search(
o="asc" if reverse else "desc",
)

nyaa = await client.get(self._base_url, params=params, extensions=self._extensions)
nyaa = await client.get(self._base_url, params=params)
nyaa.raise_for_status()
results = parse_nyaa_search_results(nyaa.text)

Expand Down
33 changes: 7 additions & 26 deletions src/pynyaa/_clients/_sync.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
from __future__ import annotations

from io import BytesIO
from pathlib import Path
from typing import Any
from urllib.parse import urljoin

from hishel import CacheClient, FileStorage
from httpx import Client
from torf import Torrent
from typing_extensions import Generator

from pynyaa._enums import Category, Filter, SortBy
from pynyaa._models import NyaaTorrentPage
from pynyaa._parser import parse_nyaa_search_results, parse_nyaa_torrent_page
from pynyaa._utils import get_user_cache_path


class Nyaa:
def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwargs: Any) -> None:
def __init__(self, base_url: str = "https://nyaa.si/", **kwargs: Any) -> None:
"""
Nyaa client.

Expand All @@ -25,22 +23,12 @@ def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwa
base_url : str, optional
The base URL of Nyaa. Default is `https://nyaa.si/`.
This is used for constructing the full URL from relative URLs.
cache : bool, optional
Whether to enable caching. Default is `True`.
This will cache the page upon it's first request and then use the cached result
for any subsequent requests for the same page.
This helps in avoiding [HTTP 429 Error](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) but
do note some fields like seeders, leechers, and completed are constantly changing and thus caching would
mean you won't get the latest data on said fields.
kwargs : Any, optional
Keyword arguments to pass to the underlying [`httpx.Client`](https://www.python-httpx.org/api/#client)
used to make the GET request.
"""
self._base_url = base_url
self._cache = cache
self._kwargs = kwargs
self._extensions = {"force_cache": self._cache, "cache_disabled": not self._cache}
self._storage = FileStorage(base_path=get_user_cache_path())

@property
def base_url(self) -> str:
Expand All @@ -49,13 +37,6 @@ def base_url(self) -> str:
"""
return self._base_url

@property
def cache_path(self) -> Path:
"""
Path where cache files are stored.
"""
return get_user_cache_path()

def get(self, page: int | str) -> NyaaTorrentPage:
"""
Retrieve information from a Nyaa torrent page.
Expand Down Expand Up @@ -85,12 +66,12 @@ def get(self, page: int | str) -> NyaaTorrentPage:
url = page
nyaaid = page.split("/")[-1] # type: ignore

with CacheClient(storage=self._storage, **self._kwargs) as client:
nyaa = client.get(url, extensions=self._extensions).raise_for_status()
with Client(**self._kwargs) as client:
nyaa = client.get(url).raise_for_status()
parsed = parse_nyaa_torrent_page(self._base_url, nyaa.text)

# Get the torrent file and convert it to a torf.Torrent object
torrent_file = client.get(parsed["torrent_file"], extensions=self._extensions).raise_for_status().content
torrent_file = client.get(parsed["torrent_file"]).raise_for_status().content
torrent = Torrent.read_stream(BytesIO(torrent_file))

return NyaaTorrentPage(id=nyaaid, url=url, torrent=torrent, **parsed) # type: ignore
Expand Down Expand Up @@ -130,7 +111,7 @@ def search(
NyaaTorrentPage
A NyaaTorrentPage object representing the retrieved data.
"""
with CacheClient(storage=self._storage, **self._kwargs) as client:
with Client(**self._kwargs) as client:
params: dict[str, Any] = dict(
f=filter,
c=category.id,
Expand All @@ -139,7 +120,7 @@ def search(
o="asc" if reverse else "desc",
)

nyaa = client.get(self._base_url, params=params, extensions=self._extensions).raise_for_status()
nyaa = client.get(self._base_url, params=params).raise_for_status()
results = parse_nyaa_search_results(nyaa.text)

for link in results:
Expand Down
28 changes: 0 additions & 28 deletions src/pynyaa/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from bs4 import BeautifulSoup
from typing_extensions import Generator
from xmltodict import parse as xmltodict_parse


def parse_nyaa_torrent_page(base_url: str, html: str) -> dict[str, Any]:
Expand Down Expand Up @@ -119,33 +118,6 @@ def parse_nyaa_torrent_page(base_url: str, html: str) -> dict[str, Any]:
)


def parse_nyaa_rss_page(xml: str) -> Generator[str]:
"""
Parse the torrent links out of the RSS page

Parameters
----------
xml : str
Nyaa's RSS XML data as a string

Yields
------
str
The full URL of torrent page, in the order they were present.
"""
try:
items = xmltodict_parse(xml, encoding="utf-8")["rss"]["channel"]["item"]
except KeyError:
yield from ()
return

if isinstance(items, dict): # RSS returns single results as a dict instead of a list
items = [items]

for item in items:
yield item["guid"]["#text"]


def parse_nyaa_search_results(html: str, base_url: str = "https://nyaa.si") -> Generator[str]:
"""
Parses the HTML of a Nyaa search results page to extract torrent links.
Expand Down
18 changes: 0 additions & 18 deletions src/pynyaa/_utils.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,11 @@
from __future__ import annotations

from functools import cache
from typing import TYPE_CHECKING

from platformdirs import user_cache_path

if TYPE_CHECKING: # pragma: no cover
from pathlib import Path
from typing import Callable, ParamSpec, TypeVar

from pynyaa._types import CategoryID

P = ParamSpec("P")
T = TypeVar("T")

# functools.cache destroys the signature of whatever it wraps, so we use this to fix it.
# This is to only "fool" typecheckers and IDEs, this doesn't exist at runtime.
def cache(user_function: Callable[P, T], /) -> Callable[P, T]: ... # type: ignore


def get_user_cache_path() -> Path:
return user_cache_path(appname="pynyaa", ensure_exists=True).resolve()


@cache
def _get_category_id_from_name(key: str) -> CategoryID:
mapping: dict[str, CategoryID] = {
# All, c=0_0
Expand Down
3 changes: 0 additions & 3 deletions tests/test_async.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

from platformdirs import user_cache_path

from pynyaa import AsyncNyaa, Category

headers = {
Expand All @@ -12,7 +10,6 @@

async def test_properties() -> None:
assert client.base_url == "https://nyaa.si/"
assert client.cache_path == user_cache_path(appname="pynyaa").resolve()


async def test_nyaa_default() -> None:
Expand Down
3 changes: 0 additions & 3 deletions tests/test_sync.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

from platformdirs import user_cache_path

from pynyaa import Category, Nyaa

headers = {
Expand All @@ -12,7 +10,6 @@

def test_properties() -> None:
assert client.base_url == "https://nyaa.si/"
assert client.cache_path == user_cache_path(appname="pynyaa").resolve()


def test_nyaa_default() -> None:
Expand Down