From da7297c0b9d9d37e3955851bdabf662cb2b4095f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 23 Jul 2021 02:01:51 +0200 Subject: [PATCH] [comicvine] add extractor (closes #1712) --- docs/supportedsites.md | 6 +++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/comicvine.py | 78 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + 4 files changed, 86 insertions(+) create mode 100644 gallery_dl/extractor/comicvine.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2e2c4292ef..230901e216 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -97,6 +97,12 @@ Consider all sites to be NSFW unless otherwise known. Blogs, Posts, Search Results + + Comic Vine + https://comicvine.gamespot.com/ + Tag Searches + + Cyberdrop https://cyberdrop.me/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 740fb7d7c9..1a6a899bc9 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -24,6 +24,7 @@ "bcy", "behance", "blogger", + "comicvine", "cyberdrop", "danbooru", "deviantart", diff --git a/gallery_dl/extractor/comicvine.py b/gallery_dl/extractor/comicvine.py new file mode 100644 index 0000000000..3a57886408 --- /dev/null +++ b/gallery_dl/extractor/comicvine.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://comicvine.gamespot.com/""" + +from .booru import BooruExtractor +from .. import text +import operator + + +class ComicvineTagExtractor(BooruExtractor): + """Extractor for a gallery on comicvine.gamespot.com""" + category = "comicvine" + subcategory = "tag" + basecategory = "" + root = "https://comicvine.gamespot.com" + per_page = 1000 + directory_fmt = ("{category}", "{tag}") + filename_fmt = "{filename}.{extension}" + archive_fmt = "{id}" + pattern = (r"(?:https?://)?comicvine\.gamespot\.com" + r"(/([^/?#]+)/(\d+-\d+)/images/.*)") + test = ( + ("https://comicvine.gamespot.com/jock/4040-5653/images/", { + "pattern": r"https://comicvine\.gamespot\.com/a/uploads" + r"/original/\d+/\d+/\d+-.+\.(jpe?g|png)", + "count": ">= 140", + }), + (("https://comicvine.gamespot.com/batman/4005-1699" + "/images/?tag=Fan%20Art%20%26%20Cosplay"), { + "pattern": r"https://comicvine\.gamespot\.com/a/uploads" + r"/original/\d+/\d+/\d+-.+", + "count": ">= 450", + }), + ) + + def __init__(self, match): + BooruExtractor.__init__(self, match) + self.path, self.object_name, self.object_id = match.groups() + + def metadata(self): + return {"tag": text.unquote(self.object_name)} + + def posts(self): + url = self.root + "/js/image-data.json" + params = { + "images": text.extract( + self.request(self.root + self.path).text, + 'data-gallery-id="', '"')[0], + "start" : self.page_start, + "count" : self.per_page, + "object": self.object_id, + } + + while True: + images = self.request(url, params=params).json()["images"] + yield from images + + if len(images) < self.per_page: + return + params["start"] += self.per_page + + def skip(self, num): + self.page_start = num + return num + + _file_url = operator.itemgetter("original") + + @staticmethod + def _prepare(post): + post["date"] = text.parse_datetime( + post["dateCreated"], "%a, %b %d %Y") + post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]] diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 50868fa7b0..e2c5bd2a74 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -24,6 +24,7 @@ "baraag" : "baraag", "bbc" : "BBC", "bcy" : "半次元", + "comicvine" : "Comic Vine", "deviantart" : "DeviantArt", "dokireader" : "Doki Reader", "drawfriends" : "Draw Friends",