Skip to content

Commit

Permalink
[newgrounds] support 'imageData' files (#4642)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Oct 21, 2023
1 parent b52fd91 commit 7958ab1
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 20 deletions.
67 changes: 47 additions & 20 deletions gallery_dl/extractor/newgrounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,27 +54,30 @@ def items(self):
if metadata:
post.update(metadata)
yield Message.Directory, post
post["num"] = 0
yield Message.Url, url, text.nameext_from_url(url, post)

ext = post["extension"]
for num, url in enumerate(text.extract_iter(
post["_images"] + post["_comment"],
'data-smartload-src="', '"'), 1):
post["num"] = num
post["_index"] = "{}_{:>02}".format(post["index"], num)
if "_multi" in post:
for data in post["_multi"]:
post["num"] += 1
post["_index"] = "{}_{:>02}".format(
post["index"], post["num"])
post.update(data)
url = data["image"]

text.nameext_from_url(url, post)
yield Message.Url, url, post

if "_fallback" in post:
del post["_fallback"]

for url in text.extract_iter(
post["_comment"], 'data-smartload-src="', '"'):
post["num"] += 1
post["_index"] = "{}_{:>02}".format(
post["index"], post["num"])
url = text.ensure_http_scheme(url)
text.nameext_from_url(url, post)

if "_fallback" in post:
del post["_fallback"]

if "/comments/" not in url:
url = url.replace("/medium_views/", "/images/", 1)
if post["extension"] == "webp":
post["_fallback"] = (url,)
post["extension"] = ext
url = url.replace(".webp", "." + ext)

yield Message.Url, url, post
else:
self.log.warning(
Expand Down Expand Up @@ -149,7 +152,6 @@ def extract_post(self, post_url):
extr = text.extract_from(page)
data = extract_data(extr, post_url)

data["_images"] = extr('<div class="art-images', '\n</div>')
data["_comment"] = extr(
'id="author_comments"', '</div>').partition(">")[2]
data["comment"] = text.unescape(text.remove_html(
Expand All @@ -168,8 +170,7 @@ def extract_post(self, post_url):
data["post_url"] = post_url
return data

@staticmethod
def _extract_image_data(extr, url):
def _extract_image_data(self, extr, url):
full = text.extract_from(util.json_loads(extr(
'"full_image_text":', '});')))
data = {
Expand All @@ -187,8 +188,34 @@ def _extract_image_data(extr, url):
index = data["url"].rpartition("/")[2].partition("_")[0]
data["index"] = text.parse_int(index)
data["_index"] = index

image_data = extr("let imageData =", "\n];")
if image_data:
data["_multi"] = self._extract_images_multi(image_data)
else:
art_images = extr('<div class="art-images', '\n</div>')
if art_images:
data["_multi"] = self._extract_images_art(art_images, data)

return data

def _extract_images_multi(self, html):
data = util.json_loads(html + "]")
yield from data[1:]

def _extract_images_art(self, html, data):
ext = text.ext_from_url(data["url"])
for url in text.extract_iter(html, 'data-smartload-src="', '"'):
url = text.ensure_http_scheme(url)
url = url.replace("/medium_views/", "/images/", 1)
if text.ext_from_url(url) == "webp":
yield {
"image" : url.replace(".webp", "." + ext),
"_fallback": (url,),
}
else:
yield {"image": url}

@staticmethod
def _extract_audio_data(extr, url):
index = url.split("/")[5]
Expand Down
11 changes: 11 additions & 0 deletions test/results/newgrounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@
),
},

{
"#url" : "https://www.newgrounds.com/art/view/bacun/kill-la-kill-10th-anniversary",
"#comment" : "extra files in 'imageData' block (#4642)",
"#category": ("", "newgrounds", "image"),
"#class" : newgrounds.NewgroundsImageExtractor,
"#urls" : (
"https://art.ngfiles.com/images/5127000/5127150_93307_bacun_kill-la-kill-10th-anniversary.61adfe309bec342f9db55fd44397235b.png?f1697310027",
"https://art.ngfiles.com/images/5127000/5127150_94250_bacun_kill-la-kill-10th-anniversary.64fdf525fa38c1ab34defac4b354bc7a.png?f1697332109",
),
},

{
"#url" : "https://www.newgrounds.com/art/view/kekiiro/red",
"#comment" : "'adult' rated (#2456)",
Expand Down

0 comments on commit 7958ab1

Please sign in to comment.