From 7b2cf0f0957c71ce82b51d59bd1c1533d0c94751 Mon Sep 17 00:00:00 2001 From: Akash Mahanty Date: Fri, 18 Feb 2022 14:10:48 +0530 Subject: [PATCH 1/5] Update README.md --- README.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 69b2c3e..ad16a39 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,25 @@ False datetime.datetime(2022, 1, 18, 12, 52, 49) ``` +#### CDX API aka CDXServerAPI + +```python +>>> from waybackpy import WaybackMachineCDXServerAPI +>>> url = "https://pypi.org" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> cdx = WaybackMachineCDXServerAPI(url, user_agent, start_timestamp=2016, end_timestamp=2017) +>>> for item in cdx.snapshots(): +... print(item.archive_url) +... +https://web.archive.org/web/20160110011047/http://pypi.org/ +https://web.archive.org/web/20160305104847/http://pypi.org/ +. +. # URLS REDACTED FOR READABILITY +. +https://web.archive.org/web/20171127171549/https://pypi.org/ +https://web.archive.org/web/20171206002737/http://pypi.org:80/ +``` + #### Availability API ```python @@ -104,25 +123,6 @@ https://web.archive.org/web/20220118150444/https://www.google.com/ https://web.archive.org/web/20101010101708/http://www.google.com/ ``` -#### CDX API aka CDXServerAPI - -```python ->>> from waybackpy import WaybackMachineCDXServerAPI ->>> url = "https://pypi.org" ->>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" ->>> cdx = WaybackMachineCDXServerAPI(url, user_agent, start_timestamp=2016, end_timestamp=2017) ->>> for item in cdx.snapshots(): -... print(item.archive_url) -... -https://web.archive.org/web/20160110011047/http://pypi.org/ -https://web.archive.org/web/20160305104847/http://pypi.org/ -. -. # URLS REDACTED FOR READABILITY -. -https://web.archive.org/web/20171127171549/https://pypi.org/ -https://web.archive.org/web/20171206002737/http://pypi.org:80/ -``` - > Documentation is at . ### As a CLI tool From 67dac04f9d8c46fdf898c28fc18ddebd21407044 Mon Sep 17 00:00:00 2001 From: Akash Mahanty Date: Fri, 18 Feb 2022 14:33:17 +0530 Subject: [PATCH 2/5] Update README.md --- README.md | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ad16a39..3644726 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,66 @@ datetime.datetime(2022, 1, 18, 12, 52, 49) #### CDX API aka CDXServerAPI +```python +>>> from waybackpy import WaybackMachineCDXServerAPI +>>> url = "https://google.com" +>>> user_agent = "my new app's user agent" +>>> cdx_api = WaybackMachineCDXServerAPI(url, user_agent) +``` +##### oldest +```python +>>> cdx_api.oldest() +com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381 +>>> oldest = cdx_api.oldest() +>>> oldest +com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381 +>>> oldest.archive_url +'https://web.archive.org/web/19981111184551/http://google.com:80/' +>>> oldest.original +'http://google.com:80/' +>>> oldest.urlkey +'com,google)/' +>>> oldest.timestamp +'19981111184551' +>>> oldest.datetime_timestamp +datetime.datetime(1998, 11, 11, 18, 45, 51) +>>> oldest.statuscode +'200' +>>> oldest.mimetype +'text/html' +``` +##### newest +```python +>>> newest = cdx_api.newest() +>>> newest +com,google)/ 20220217234427 http://@google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 563 +>>> newest.archive_url +'https://web.archive.org/web/20220217234427/http://@google.com/' +>>> newest.timestamp +'20220217234427' +``` +##### near +```python +>>> near = cdx_api.near(year=2010, month=10, day=10, hour=10, minute=10) +>>> near.archive_url +'https://web.archive.org/web/20101010101435/http://google.com/' +>>> near +com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391 +>>> near.timestamp +'20101010101435' +>>> near.timestamp +'20101010101435' +>>> near = cdx_api.near(wayback_machine_timestamp=2008080808) +>>> near.archive_url +'https://web.archive.org/web/20080808051143/http://google.com/' +>>> near = cdx_api.near(unix_timestamp=1286705410) +>>> near +com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391 +>>> near.archive_url +'https://web.archive.org/web/20101010101435/http://google.com/' +>>> +``` +##### snapshots ```python >>> from waybackpy import WaybackMachineCDXServerAPI >>> url = "https://pypi.org" @@ -105,6 +165,8 @@ https://web.archive.org/web/20171206002737/http://pypi.org:80/ #### Availability API +It is recommended to not use the availability API due to performance issues. All the methods of availability API interface class, `WaybackMachineAvailabilityAPI`, are also implemented in the CDX server API interface class, `WaybackMachineCDXServerAPI`. + ```python >>> from waybackpy import WaybackMachineAvailabilityAPI >>> @@ -112,13 +174,19 @@ https://web.archive.org/web/20171206002737/http://pypi.org:80/ >>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" >>> >>> availability_api = WaybackMachineAvailabilityAPI(url, user_agent) ->>> +``` +##### oldest +```python >>> availability_api.oldest() https://web.archive.org/web/19981111184551/http://google.com:80/ ->>> +``` +##### newest +```python >>> availability_api.newest() https://web.archive.org/web/20220118150444/https://www.google.com/ ->>> +``` +##### near +```python >>> availability_api.near(year=2010, month=10, day=10, hour=10) https://web.archive.org/web/20101010101708/http://www.google.com/ ``` From df380984eceb0c0fcdd8a27aa975a2f2994975b2 Mon Sep 17 00:00:00 2001 From: Akash Mahanty Date: Fri, 18 Feb 2022 15:52:55 +0530 Subject: [PATCH 3/5] update asciinema link --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3644726..b60a541 100644 --- a/README.md +++ b/README.md @@ -195,9 +195,9 @@ https://web.archive.org/web/20101010101708/http://www.google.com/ ### As a CLI tool -Demo video on [asciinema.org](https://asciinema.org), you can copy the text from video: +Demo video on [asciinema.org](https://asciinema.org/a/469890), you can copy the text from video: -[![asciicast](https://asciinema.org/a/464367.svg)](https://asciinema.org/a/464367) +[![asciicast](https://asciinema.org/a/469890.svg)](https://asciinema.org/a/469890) > CLI documentation is at . From 4b5092d2b55795dda10943cbd9fd41d8fc533c99 Mon Sep 17 00:00:00 2001 From: Akash Mahanty Date: Fri, 18 Feb 2022 15:57:53 +0530 Subject: [PATCH 4/5] v3.0.4 --- waybackpy/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/waybackpy/__init__.py b/waybackpy/__init__.py index 98cba3d..1f4661a 100644 --- a/waybackpy/__init__.py +++ b/waybackpy/__init__.py @@ -1,6 +1,6 @@ """Module initializer and provider of static information.""" -__version__ = "3.0.3" +__version__ = "3.0.4" from .availability_api import WaybackMachineAvailabilityAPI from .cdx_api import WaybackMachineCDXServerAPI From 46f3c715098a3f82e04deeb47f9df484d87d0ee1 Mon Sep 17 00:00:00 2001 From: Akash Mahanty Date: Fri, 18 Feb 2022 16:04:06 +0530 Subject: [PATCH 5/5] update video link --- waybackpy/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/waybackpy/cli.py b/waybackpy/cli.py index 7342fd4..96d6445 100644 --- a/waybackpy/cli.py +++ b/waybackpy/cli.py @@ -379,7 +379,7 @@ def main( # pylint: disable=no-value-for-parameter Documentation: https://github.com/akamhy/waybackpy/wiki/CLI-docs - waybackpy - CLI usage(Demo video): https://asciinema.org/a/464367 + waybackpy - CLI usage(Demo video): https://asciinema.org/a/469890 Released under the MIT License. Use the flag --license for license.