Skip to content

Commit

Permalink
added RedirectSaveError - for failed saves if the URL is a permanent … (
Browse files Browse the repository at this point in the history
#93)

* added RedirectSaveError - for failed saves if the URL is a permanent redirect.

* check if url is redirect before throwing exceptions, res.url is the redirect url if redirected at all

* update tests and cli errors
  • Loading branch information
akamhy authored Apr 2, 2021
1 parent db8f902 commit dd1917c
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 3 deletions.
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_save():
get=None,
)
reply = cli.args_handler(args)
assert "could happen because either your waybackpy" in str(reply)
assert "could happen because either your waybackpy" or "cannot be archived by wayback machine as it is a redirect" in str(reply)


def test_json():
Expand Down
2 changes: 2 additions & 0 deletions waybackpy/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def _save(obj):
version=__version__, header=header
)
)
if "URL cannot be archived by wayback machine as it is a redirect" in e:
return ("URL cannot be archived by wayback machine as it is a redirect")
raise WaybackError(err)


Expand Down
7 changes: 7 additions & 0 deletions waybackpy/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ class WaybackError(Exception):
"""


class RedirectSaveError(WaybackError):
"""
Raised when the original URL is redirected and the
redirect URL is archived but not the original URL.
"""


class URLError(Exception):
"""
Raised when malformed URLs are passed as arguments.
Expand Down
23 changes: 21 additions & 2 deletions waybackpy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import requests
from datetime import datetime

from .exceptions import WaybackError, URLError
from .exceptions import WaybackError, URLError, RedirectSaveError
from .__version__ import __version__

from urllib3.util.retry import Retry
Expand Down Expand Up @@ -302,7 +302,9 @@ def _get_total_pages(url, user_agent):
return int((_get_response(total_pages_url, headers=headers).text).strip())


def _archive_url_parser(header, url, latest_version=__version__, instance=None):
def _archive_url_parser(
header, url, latest_version=__version__, instance=None, response=None
):
"""Returns the archive after parsing it from the response header.
Parameters
Expand Down Expand Up @@ -388,6 +390,16 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
if m:
return m.group(1)

if response:
if response.url:
if "web.archive.org/web" in response.url:
m = re.search(
r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$",
str(response.url).strip(),
)
if m:
return m.group(0)

if instance:
newest_archive = None
try:
Expand All @@ -414,6 +426,13 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
"Wayback Machine is malfunctioning or it refused to archive your URL."
"\nHeader:\n{header}".format(url=url, header=header)
)

if "save redirected" == header.strip():
raise RedirectSaveError(
"URL cannot be archived by wayback machine as it is a redirect.\nHeader:\n{header}".format(
header=header
)
)
else:
exc_message = (
"No archive URL found in the API response. "
Expand Down
1 change: 1 addition & 0 deletions waybackpy/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def save(self):
self.url,
latest_version=self.latest_version,
instance=self,
response=response,
)

m = re.search(
Expand Down

0 comments on commit dd1917c

Please sign in to comment.