Skip to content

Commit

Permalink
fix: use urljoin()
Browse files Browse the repository at this point in the history
  • Loading branch information
Ravencentric committed Jun 29, 2024
1 parent d6edce6 commit 41c0f9c
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 14 deletions.
15 changes: 8 additions & 7 deletions src/pynyaa/_clients/_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from io import BytesIO
from typing import Any
from urllib.parse import urljoin

from bs4 import BeautifulSoup
from hishel import AsyncCacheClient, AsyncFileStorage
Expand All @@ -18,14 +19,14 @@

class AsyncNyaa:
@validate_call
def __init__(self, base_url: str = "https://nyaa.si", cache: bool = True, **kwargs: Any) -> None:
def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwargs: Any) -> None:
"""
Async Nyaa client.
Parameters
----------
base_url : str, optional
The base URL of Nyaa. Default is `https://nyaa.si`.
The base URL of Nyaa. Default is `https://nyaa.si/`.
This is only used when a Nyaa ID is passed.
If a full URL is passed then this gets ignored and the base_url is parsed from the given URL instead.
cache : bool, optional
Expand All @@ -39,7 +40,7 @@ def __init__(self, base_url: str = "https://nyaa.si", cache: bool = True, **kwar
Keyword arguments to pass to the underlying [httpx.AsyncClient()](https://www.python-httpx.org/api/#asyncclient)
used to make the GET request.
"""
self._base_url = base_url.strip("/")
self._base_url = base_url
self._cache = cache
self._kwargs = kwargs
self._extensions = {"force_cache": self._cache, "cache_disabled": not self._cache}
Expand Down Expand Up @@ -91,7 +92,7 @@ async def _parse_nyaa(self, html: str) -> dict[str, Any]:
row_two = rows[1].find_all("div", class_="col-md-5")
submitter = row_two[0].get_text().strip()
if submitter.lower() != "anonymous":
submitter_url = f"{self._base_url}{row_two[0].find('a').get('href', f'/user/{submitter}')}"
submitter_url = urljoin(self._base_url, row_two[0].find("a").get("href", f"/user/{submitter}"))
submitter_status = row_two[0].find("a").get("title", None)

if submitter_status is not None:
Expand Down Expand Up @@ -125,7 +126,7 @@ async def _parse_nyaa(self, html: str) -> dict[str, Any]:

# ROW FOOTER
footer = body.find("div", class_="panel-footer clearfix").find_all("a") # type: ignore
torrent_file = f"{self._base_url}{footer[0]['href']}"
torrent_file = urljoin(self._base_url, footer[0]["href"])
magnet = footer[1]["href"]

# DESCRIPTION
Expand Down Expand Up @@ -178,13 +179,13 @@ async def get(self, page: int | str) -> NyaaTorrentPage:
"""

if isinstance(page, int):
url = f"{self._base_url}/view/{page}"
url = urljoin(self._base_url, f"/view/{page}")
id = page
else:
url = page
id = page.split("/")[-1] # type: ignore
host = Url(page).host
self._base_url = f"https://{host}" if host is not None else "https://nyaa.si"
self._base_url = f"https://{host}/" if host is not None else "https://nyaa.si/"

async with AsyncCacheClient(storage=self._storage, **self._kwargs) as client:

Expand Down
15 changes: 8 additions & 7 deletions src/pynyaa/_clients/_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from io import BytesIO
from typing import Any
from urllib.parse import urljoin

from bs4 import BeautifulSoup
from hishel import CacheClient, FileStorage
Expand All @@ -18,14 +19,14 @@

class Nyaa:
@validate_call
def __init__(self, base_url: str = "https://nyaa.si", cache: bool = True, **kwargs: Any) -> None:
def __init__(self, base_url: str = "https://nyaa.si/", cache: bool = True, **kwargs: Any) -> None:
"""
Nyaa client.
Parameters
----------
base_url : str, optional
The base URL of Nyaa. Default is `https://nyaa.si`.
The base URL of Nyaa. Default is `https://nyaa.si/`.
This is only used when a Nyaa ID is passed.
If a full URL is passed then this gets ignored and the base_url is parsed from the given URL instead.
cache : bool, optional
Expand All @@ -39,7 +40,7 @@ def __init__(self, base_url: str = "https://nyaa.si", cache: bool = True, **kwar
Keyword arguments to pass to the underlying [httpx.Client()](https://www.python-httpx.org/api/#client)
used to make the GET request.
"""
self._base_url = base_url.strip("/")
self._base_url = base_url
self._cache = cache
self._kwargs = kwargs
self._extensions = {"force_cache": self._cache, "cache_disabled": not self._cache}
Expand Down Expand Up @@ -91,7 +92,7 @@ def _parse_nyaa(self, html: str) -> dict[str, Any]:
row_two = rows[1].find_all("div", class_="col-md-5")
submitter = row_two[0].get_text().strip()
if submitter.lower() != "anonymous":
submitter_url = f"{self._base_url}{row_two[0].find('a').get('href', f'/user/{submitter}')}"
submitter_url = urljoin(self._base_url, row_two[0].find('a').get('href', f'/user/{submitter}'))
submitter_status = row_two[0].find("a").get("title", None)

if submitter_status is not None:
Expand Down Expand Up @@ -125,7 +126,7 @@ def _parse_nyaa(self, html: str) -> dict[str, Any]:

# ROW FOOTER
footer = body.find("div", class_="panel-footer clearfix").find_all("a") # type: ignore
torrent_file = f"{self._base_url}{footer[0]['href']}"
torrent_file = urljoin(self._base_url, footer[0]['href'])
magnet = footer[1]["href"]

# DESCRIPTION
Expand Down Expand Up @@ -178,13 +179,13 @@ def get(self, page: int | str) -> NyaaTorrentPage:
"""

if isinstance(page, int):
url = f"{self._base_url}/view/{page}"
url = urljoin(self._base_url, f"/view/{page}")
id = page
else:
url = page
id = page.split("/")[-1] # type: ignore
host = Url(page).host
self._base_url = f"https://{host}" if host is not None else "https://nyaa.si"
self._base_url = f"https://{host}/" if host is not None else "https://nyaa.si/"

with CacheClient(storage=self._storage, **self._kwargs) as client:

Expand Down

0 comments on commit 41c0f9c

Please sign in to comment.