From 7958ab1946c00fea832432817dda38c8da85f4fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 21 Oct 2023 13:22:55 +0200 Subject: [PATCH] [newgrounds] support 'imageData' files (#4642) --- gallery_dl/extractor/newgrounds.py | 67 +++++++++++++++++++++--------- test/results/newgrounds.py | 11 +++++ 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index b119c96638..a6971e84c2 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -54,27 +54,30 @@ def items(self): if metadata: post.update(metadata) yield Message.Directory, post + post["num"] = 0 yield Message.Url, url, text.nameext_from_url(url, post) - ext = post["extension"] - for num, url in enumerate(text.extract_iter( - post["_images"] + post["_comment"], - 'data-smartload-src="', '"'), 1): - post["num"] = num - post["_index"] = "{}_{:>02}".format(post["index"], num) + if "_multi" in post: + for data in post["_multi"]: + post["num"] += 1 + post["_index"] = "{}_{:>02}".format( + post["index"], post["num"]) + post.update(data) + url = data["image"] + + text.nameext_from_url(url, post) + yield Message.Url, url, post + + if "_fallback" in post: + del post["_fallback"] + + for url in text.extract_iter( + post["_comment"], 'data-smartload-src="', '"'): + post["num"] += 1 + post["_index"] = "{}_{:>02}".format( + post["index"], post["num"]) url = text.ensure_http_scheme(url) text.nameext_from_url(url, post) - - if "_fallback" in post: - del post["_fallback"] - - if "/comments/" not in url: - url = url.replace("/medium_views/", "/images/", 1) - if post["extension"] == "webp": - post["_fallback"] = (url,) - post["extension"] = ext - url = url.replace(".webp", "." + ext) - yield Message.Url, url, post else: self.log.warning( @@ -149,7 +152,6 @@ def extract_post(self, post_url): extr = text.extract_from(page) data = extract_data(extr, post_url) - data["_images"] = extr('
').partition(">")[2] data["comment"] = text.unescape(text.remove_html( @@ -168,8 +170,7 @@ def extract_post(self, post_url): data["post_url"] = post_url return data - @staticmethod - def _extract_image_data(extr, url): + def _extract_image_data(self, extr, url): full = text.extract_from(util.json_loads(extr( '"full_image_text":', '});'))) data = { @@ -187,8 +188,34 @@ def _extract_image_data(extr, url): index = data["url"].rpartition("/")[2].partition("_")[0] data["index"] = text.parse_int(index) data["_index"] = index + + image_data = extr("let imageData =", "\n];") + if image_data: + data["_multi"] = self._extract_images_multi(image_data) + else: + art_images = extr('