Skip to content

Commit

Permalink
Merge branch 'master' into fix_metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
eggplants authored Feb 9, 2022
2 parents d325f47 + cd5c3c6 commit 28e742e
Show file tree
Hide file tree
Showing 11 changed files with 127 additions and 154 deletions.
4 changes: 3 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ keywords =
CDX API
savepagenow
classifiers =
Development Status :: 4 - Beta
Development Status :: 5 - Production/Stable
Intended Audience :: Developers
Intended Audience :: End Users/Desktop
Natural Language :: English
Typing :: Typed
License :: OSI Approved :: MIT License
Programming Language :: Python
Programming Language :: Python :: 3
Expand Down
11 changes: 1 addition & 10 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,9 @@
from waybackpy import __version__
from waybackpy.utils import (
DEFAULT_USER_AGENT,
latest_version_github,
latest_version_pypi,
)
from waybackpy.utils import DEFAULT_USER_AGENT


def test_default_user_agent() -> None:
assert (
DEFAULT_USER_AGENT
== f"waybackpy {__version__} - https://github.com/akamhy/waybackpy"
)


def test_latest_version() -> None:
package_name = "waybackpy"
assert latest_version_github(package_name) == latest_version_pypi(package_name)
4 changes: 2 additions & 2 deletions waybackpy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Module initializer and provider of static infomation."""
"""Module initializer and provider of static information."""

__version__ = "3.0.2"
__version__ = "3.0.3"

from .availability_api import WaybackMachineAvailabilityAPI
from .cdx_api import WaybackMachineCDXServerAPI
Expand Down
45 changes: 23 additions & 22 deletions waybackpy/availability_api.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
"""
This module interfaces the Wayback Machine's availability API.
The interface could be useful for looking up archives and finding archives
The interface is useful for looking up archives and finding archives
that are close to a specific date and time.
It has a class called WaybackMachineAvailabilityAPI, and the class has
methods such as:
It has a class WaybackMachineAvailabilityAPI, and the class has
methods like:
near() for looking up archives close to a specific date and time.
near() for retrieving archives close to a specific date and time.
oldest() for retrieving the first archive URL of the webpage.
newest() for retrieving the latest archive of an URL.
newest() for retrieving the latest archive of the webpage.
The Wayback Machine Availability response should be a valid JSON and
The Wayback Machine Availability API response must be a valid JSON and
if it is not then an exception, InvalidJSONInAvailabilityAPIResponse is raised.
If the Availability API returned valid JSON but archive URL could not be found
Expand All @@ -39,7 +39,7 @@

class WaybackMachineAvailabilityAPI:
"""
Class that interfaces the availability API of the Wayback Machine.
Class that interfaces the Wayback Machine's availability API.
"""

def __init__(
Expand All @@ -61,7 +61,7 @@ def __init__(
@staticmethod
def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
"""
Converts Unix time to wayback Machine timestamp and the Wayback Machine
Converts Unix time to Wayback Machine timestamp, Wayback Machine
timestamp format is yyyyMMddhhmmss.
"""
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
Expand All @@ -76,10 +76,10 @@ def __str__(self) -> str:
"""
String representation of the class. If atleast one API
call was successfully made then return the archive URL
as a string. Else returns "".
as a string. Else returns "" (empty string literal).
"""
# String should not return anything other than a string object
# So, if a string repr is asked for before making any API requests
# __str__ can not return anything other than a string object
# So, if a string repr is asked even before making a API request
# just return ""
if not self.json:
return ""
Expand Down Expand Up @@ -147,7 +147,7 @@ def timestamp(self) -> datetime:
self.json["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
)

raise ValueError("Could not get timestamp from result")
raise ValueError("Timestamp not found in the Availability API's JSON response.")

@property
def archive_url(self) -> str:
Expand All @@ -159,8 +159,8 @@ def archive_url(self) -> str:
archive_url = ""
data = self.json

# If the user didn't invoke oldest, newest or near but tries to access the
# archive_url attribute then assume they are fine with any archive
# If the user didn't invoke oldest, newest or near but tries to access
# archive_url attribute then assume they that are fine with any archive
# and invoke the oldest method.
if not data:
self.oldest()
Expand All @@ -172,10 +172,10 @@ def archive_url(self) -> str:
not data or not data["archived_snapshots"]
):
self.setup_json() # It makes a new API call
data = self.json # json() updated the value of JSON attribute
data = self.json # setup_json() updates value of json attribute

# If we exhausted the max_tries, then we give up and
# raise exception.
# If exhausted max_tries, then give up and
# raise ArchiveNotInAvailabilityAPIResponse.

if not data or not data["archived_snapshots"]:
raise ArchiveNotInAvailabilityAPIResponse(
Expand All @@ -198,7 +198,7 @@ def archive_url(self) -> str:
def wayback_timestamp(**kwargs: int) -> str:
"""
Prepends zero before the year, month, day, hour and minute so that they
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
are conformable with the YYYYMMDDhhmmss Wayback Machine timestamp format.
"""
return "".join(
str(kwargs[key]).zfill(2)
Expand All @@ -218,7 +218,7 @@ def newest(self) -> "WaybackMachineAvailabilityAPI":
Passes the current UNIX time to near() for retrieving the newest archive
from the availability API.
We assume that wayback machine can not archive the future of a webpage.
Remember UNIX time is UTC and Wayback Machine is also UTC based.
"""
return self.near(unix_timestamp=int(time.time()))

Expand All @@ -232,16 +232,17 @@ def near(
unix_timestamp: Optional[int] = None,
) -> "WaybackMachineAvailabilityAPI":
"""
The main method for the Class, oldest() and newest() are dependent on it.
The most important method of this Class, oldest() and newest() are
dependent on it.
It generates the timestamp based on the input either by calling the
unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
appropriate arguments for their respective parameters.
Adds the timestamp to the payload dictionary.
And finally invoking the json method to make the API call then returns
the instance.
And finally invokes the setup_json method to make the API call then
finally returns the instance.
"""
if unix_timestamp:
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
Expand Down
1 change: 0 additions & 1 deletion waybackpy/cdx_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ def add_payload(self, payload: Dict[str, str]) -> None:
for i, collapse in enumerate(self.collapses):
payload["collapse" + str(i)] = collapse

# Don't need to return anything as it's dictionary.
payload["url"] = self.url

def snapshots(self) -> Generator[CDXSnapshot, None, None]:
Expand Down
2 changes: 1 addition & 1 deletion waybackpy/cdx_snapshot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Module that contains the CDXSnapshot class, CDX records are casted
Module that contains the CDXSnapshot class, CDX records/lines are casted
to CDXSnapshot objects for easier access.
The CDX index format is plain text data. Each line ('record') indicates a
Expand Down
4 changes: 2 additions & 2 deletions waybackpy/cdx_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Utility functions required for accessing the CDX server API.
These are here in this module so that we don’t make any module too
big.
long.
"""

import re
Expand Down Expand Up @@ -63,7 +63,7 @@ def get_response(
backoff_factor: float = 0.5,
) -> Union[requests.Response, Exception]:
"""
Make get request to the CDX server and return the response.
Makes get request to the CDX server and returns the response.
"""
session = requests.Session()

Expand Down
Loading

0 comments on commit 28e742e

Please sign in to comment.