From 1e2713b8957ff26fc6634891ae240c4e890a6cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 25 Dec 2019 17:25:50 +0100 Subject: [PATCH] [artstation] fix search result pagination (closes #537) --- gallery_dl/extractor/artstation.py | 59 ++++++++++++++++++------------ 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 2892bd4880..ceda29c201 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -83,14 +83,20 @@ def get_user_info(self, username): response = self.request(url, notfound="user") return response.json() - def _pagination(self, url, params=None): - if not params: - params = {} + def _pagination(self, url, params=None, json=None): + if json: + params = json + kwargs = {"json": json} + else: + if not params: + params = {} + kwargs = {"params": params} + params["page"] = 1 total = 0 while True: - data = self.request(url, params=params).json() + data = self.request(url, **kwargs).json() yield from data["data"] total += len(data["data"]) @@ -268,34 +274,38 @@ def _id_from_url(url): class ArtstationSearchExtractor(ArtstationExtractor): """Extractor for artstation search results""" subcategory = "search" - directory_fmt = ("{category}", "Searches", "{search[searchterm]}") - archive_fmt = "s_{search[searchterm]}_{asset[id]}" + directory_fmt = ("{category}", "Searches", "{search[query]}") + archive_fmt = "s_{search[query]}_{asset[id]}" pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com" r"/search/?\?([^#]+)") - test = ("https://www.artstation.com/search?sorting=recent&q=ancient",) + test = ("https://www.artstation.com/search?q=ancient&sort_by=rank", { + "range": "1-20", + "count": 20, + }) def __init__(self, match): ArtstationExtractor.__init__(self, match) query = text.parse_query(match.group(1)) - self.searchterm = query.get("q", "") - self.order = query.get("sorting", "recent").lower() + self.query = query.get("q", "") + self.sorting = query.get("sort_by", "rank").lower() def metadata(self): return {"search": { - "searchterm": self.searchterm, - "order": self.order, + "query" : self.query, + "sorting": self.sorting, }} def projects(self): - order = "likes_count" if self.order == "likes" else "published_at" - url = "{}/search/projects.json".format(self.root) - params = { - "direction": "desc", - "order": order, - "q": self.searchterm, - # "show_pro_first": "true", - } - return self._pagination(url, params) + url = "{}/api/v2/search/projects.json".format(self.root) + return self._pagination(url, json={ + "additional_fields": "[]", + "filters" : "[]", + "page" : None, + "per_page" : "50", + "pro_first" : "1", + "query" : self.query, + "sorting" : self.sorting, + }) class ArtstationArtworkExtractor(ArtstationExtractor): @@ -305,7 +315,10 @@ class ArtstationArtworkExtractor(ArtstationExtractor): archive_fmt = "A_{asset[id]}" pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com" r"/artwork/?\?([^#]+)") - test = ("https://www.artstation.com/artwork?sorting=latest",) + test = ("https://www.artstation.com/artwork?sorting=latest", { + "range": "1-20", + "count": 20, + }) def __init__(self, match): ArtstationExtractor.__init__(self, match) @@ -316,9 +329,7 @@ def metadata(self): def projects(self): url = "{}/projects.json".format(self.root) - params = self.query.copy() - params["page"] = 1 - return self._pagination(url, params) + return self._pagination(url, self.query.copy()) class ArtstationImageExtractor(ArtstationExtractor):