From 281828b58b86e4893cf58eb26f4dfeff91f7e74a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 24 Dec 2021 03:42:28 +0100 Subject: [PATCH] [tumblrgallery] improve search pagination (fixes #2132) --- gallery_dl/extractor/tumblrgallery.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/gallery_dl/extractor/tumblrgallery.py b/gallery_dl/extractor/tumblrgallery.py index cd2d0f5837..e790613fe3 100644 --- a/gallery_dl/extractor/tumblrgallery.py +++ b/gallery_dl/extractor/tumblrgallery.py @@ -98,7 +98,10 @@ class TumblrgallerySearchExtractor(TumblrgalleryExtractor): filename_fmt = "{category}_{num:>03}_{gallery_id}_{id}_{title}.{extension}" directory_fmt = ("{category}", "{search_term}") pattern = BASE_PATTERN + r"(/s\.php\?q=([^&#]+))" - test = ("https://tumblrgallery.xyz/s.php?q=everyday-life",) + test = ("https://tumblrgallery.xyz/s.php?q=everyday-life", { + "pattern": r"https://\d+\.media\.tumblr\.com/.+", + "count": "< 1000", + }) def __init__(self, match): TumblrgalleryExtractor.__init__(self, match) @@ -110,17 +113,9 @@ def metadata(self, page): } def images(self, _): - page_num = 1 + page_url = "s.php?q=" + self.search_term while True: - url = "{}/s.php?q={}&page={}".format( - self.root, self.search_term, page_num) - response = self.request(url, allow_redirects=False, fatal=False) - - if response.status_code >= 300: - return - - page = response.text - page_num += 1 + page = self.request(self.root + "/" + page_url).text for gallery_id in text.extract_iter( page, '
", "")[0] )).replace("_", "-") yield url, data + + next_url = text.extract( + page, '