diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index a65cce38a3..e545be7abb 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -8,21 +8,21 @@ Site URL Capabilities 35PHOTO https://35photo.pro/ |35photo-C| 3dbooru http://behoimi.org/ Pools, Popular Images, Posts, Tag Searches 4chan https://www.4chan.org/ Boards, Threads -4plebs https://archive.4plebs.org/ Threads +4plebs https://archive.4plebs.org/ Boards, Threads 500px https://500px.com/ Galleries, individual Images, User Profiles 8kun https://8kun.top/ Boards, Threads 8muses https://comics.8muses.com/ Albums Adobe Portfolio https://www.myportfolio.com/ Galleries Adult Empire https://www.adultempire.com/ Galleries -arch.b4k.co https://arch.b4k.co/ Threads -Archive of Sins https://archiveofsins.com/ Threads -Archived.Moe https://archived.moe/ Threads +arch.b4k.co https://arch.b4k.co/ Boards, Threads +Archive of Sins https://archiveofsins.com/ Boards, Threads +Archived.Moe https://archived.moe/ Boards, Threads ArtStation https://www.artstation.com/ |artstation-C| baraag https://baraag.net/ Images from Statuses, User Profiles `OAuth `__ Behance https://www.behance.net/ Collections, Galleries, User Profiles Blogger https://www.blogger.com/ Blogs, Posts, Search Results Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag Searches Supported -Desuarchive https://desuarchive.org/ Threads +Desuarchive https://desuarchive.org/ Boards, Threads DeviantArt https://www.deviantart.com/ |deviantart-C| `OAuth `__ Doki Reader https://kobato.hologfx.com/reader/ Chapters, Manga Dynasty Reader https://dynasty-scans.com/ Chapters, individual Images, Search Results @@ -32,7 +32,7 @@ Eka's Portal https://aryion.com/ Galleries, Posts ExHentai https://exhentai.org/ Favorites, Galleries, Search Results Supported Fallen Angels Scans https://www.fascans.com/ Chapters, Manga Fashion Nova https://www.fashionnova.com/ Collections, Products -Fireden https://boards.fireden.net/ Threads +Fireden https://boards.fireden.net/ Boards, Threads Flickr https://www.flickr.com/ |flickr-C| `OAuth `__ Fur Affinity https://www.furaffinity.net/ |furaffinity-C| `Cookies `__ Fuskator https://fuskator.com/ Galleries, Search Results @@ -89,7 +89,7 @@ Niconico Seiga https://seiga.nicovideo.jp/ individual Images, User nijie https://nijie.info/ |nijie-C| Required Nozomi.la https://nozomi.la/ Posts, Search Results, Tag Searches NSFWalbum.com https://nsfwalbum.com/ Albums -Nyafuu Archive https://archive.nyafuu.org/ Threads +Nyafuu Archive https://archive.nyafuu.org/ Boards, Threads Patreon https://www.patreon.com/ Creators, Posts, User Profiles `Cookies `__ Pawoo https://pawoo.net/ Images from Statuses, User Profiles `OAuth `__ Photobucket https://photobucket.com/ Albums, individual Images @@ -104,7 +104,7 @@ PowerManga https://read.powermanga.org/ Chapters, Manga Pururin https://pururin.io/ Galleries Read Comic Online https://readcomiconline.to/ Comic Issues, Comics Realbooru https://realbooru.com/ Pools, Posts, Tag Searches -RebeccaBlackTech https://rbt.asia/ Threads +RebeccaBlackTech https://rbt.asia/ Boards, Threads Reddit https://www.reddit.com/ |reddit-C| `OAuth `__ RedGIFs https://redgifs.com/ individual Images, Search Results, User Profiles rule #34 https://rule34.paheal.net/ Posts, Tag Searches @@ -122,7 +122,7 @@ SlideShare https://www.slideshare.net/ Presentations SmugMug https://www.smugmug.com/ |smugmug-C| `OAuth `__ Speaker Deck https://speakerdeck.com/ Presentations SubscribeStar https://www.subscribestar.com/ Posts, User Profiles Supported -The /b/ Archive https://thebarchive.com/ Threads +The /b/ Archive https://thebarchive.com/ Boards, Threads Tsumino https://www.tsumino.com/ Galleries, Search Results Supported Tumblr https://www.tumblr.com/ Likes, Posts, Tag Searches, User Profiles `OAuth `__ Twitter https://twitter.com/ |twitter-C| Supported diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 8a03dc996a..65aba29e8e 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -14,19 +14,13 @@ import operator -class FoolfuukaThreadExtractor(Extractor): +class FoolfuukaExtractor(Extractor): """Base extractor for FoolFuuka based boards/archives""" basecategory = "foolfuuka" - subcategory = "thread" - directory_fmt = ("{category}", "{board[shortname]}", - "{thread_num}{title:? - //}") - archive_fmt = "{board[shortname]}_{num}_{timestamp}" - pattern_fmt = r"/([^/]+)/thread/(\d+)" external = "default" def __init__(self, match): Extractor.__init__(self, match) - self.board, self.thread = match.groups() self.session.headers["Referer"] = self.root if self.external == "direct": self.remote = self._remote_direct @@ -54,16 +48,7 @@ def items(self): yield Message.Url, url, post def posts(self): - """Return an iterable with all posts in this thread""" - url = self.root + "/_/api/chan/thread/" - params = {"board": self.board, "num": self.thread} - data = self.request(url, params=params).json()[self.thread] - - # sort post-objects by key - posts = sorted(data.get("posts", {}).items()) - posts = map(operator.itemgetter(1), posts) - - return itertools.chain((data["op"],), posts) + """Return an iterable with all relevant posts""" def remote(self, media): """Resolve a remote media link""" @@ -76,6 +61,60 @@ def _remote_direct(media): return media["remote_media_link"] +class FoolfuukaThreadExtractor(FoolfuukaExtractor): + """Base extractor for threads on FoolFuuka based boards/archives""" + subcategory = "thread" + directory_fmt = ("{category}", "{board[shortname]}", + "{thread_num}{title:? - //}") + archive_fmt = "{board[shortname]}_{num}_{timestamp}" + pattern_fmt = r"/([^/?#]+)/thread/(\d+)" + + def __init__(self, match): + FoolfuukaExtractor.__init__(self, match) + self.board, self.thread = match.groups() + + def posts(self): + url = self.root + "/_/api/chan/thread/" + params = {"board": self.board, "num": self.thread} + data = self.request(url, params=params).json()[self.thread] + + # sort post objects by key + posts = sorted(data.get("posts", {}).items()) + posts = map(operator.itemgetter(1), posts) + + return itertools.chain((data["op"],), posts) + + +class FoolfuukaBoardExtractor(FoolfuukaExtractor): + """Base extractor for FoolFuuka based boards/archives""" + subcategory = "board" + pattern_fmt = r"/([^/?#]+)/\d*$" + + def __init__(self, match): + FoolfuukaExtractor.__init__(self, match) + self.board = match.group(1) + + def items(self): + index_base = "{}/_/api/chan/index/?board={}&page=".format( + self.root, self.board) + thread_base = "{}/{}/thread/".format(self.root, self.board) + + for page in itertools.count(1): + with self.request(index_base + format(page)) as response: + try: + threads = response.json() + except ValueError: + threads = None + + if not threads: + return + + for num, thread in threads.items(): + thread["url"] = thread_base + format(num) + thread["_extractor"] = self.childclass + yield Message.Queue, thread["url"], thread + + EXTRACTORS = { "4plebs": { "name": "_4plebs", @@ -84,6 +123,7 @@ def _remote_direct(media): "test-thread": ("https://archive.4plebs.org/tg/thread/54059290", { "url": "07452944164b602502b02b24521f8cee5c484d2a", }), + "test-board": ("https://archive.4plebs.org/tg/",), }, "archivedmoe": { "root": "https://archived.moe", @@ -96,6 +136,7 @@ def _remote_direct(media): "url": "ffec05a1a1b906b5ca85992513671c9155ee9e87", }), ), + "test-board": ("https://archived.moe/gd/",), }, "archiveofsins": { "root": "https://archiveofsins.com", @@ -104,6 +145,7 @@ def _remote_direct(media): "url": "f612d287087e10a228ef69517cf811539db9a102", "content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4", }), + "test-board": ("https://archiveofsins.com/h/",), }, "b4k": { "root": "https://arch.b4k.co", @@ -111,18 +153,21 @@ def _remote_direct(media): "test-thread": ("https://arch.b4k.co/meta/thread/196/", { "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a", }), + "test-board": ("https://arch.b4k.co/meta/",), }, "desuarchive": { "root": "https://desuarchive.org", "test-thread": ("https://desuarchive.org/a/thread/159542679/", { "url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406", }), + "test-board": ("https://desuarchive.org/a/",), }, "fireden": { "root": "https://boards.fireden.net", "test-thread": ("https://boards.fireden.net/sci/thread/11264294/", { "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43", }), + "test-board": ("https://boards.fireden.net/sci/",), }, "nyafuu": { "root": "https://archive.nyafuu.org", @@ -130,6 +175,7 @@ def _remote_direct(media): "test-thread": ("https://archive.nyafuu.org/c/thread/2849220/", { "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f", }), + "test-board": ("https://archive.nyafuu.org/c/",), }, "rbt": { "root": "https://rbt.asia", @@ -142,6 +188,7 @@ def _remote_direct(media): "url": "61896d9d9a2edb556b619000a308a984307b6d30", }), ), + "test-board": ("https://rbt.asia/g/",), }, "thebarchive": { "root": "https://thebarchive.com", @@ -149,9 +196,12 @@ def _remote_direct(media): "test-thread": ("https://thebarchive.com/b/thread/739772332/", { "url": "e8b18001307d130d67db31740ce57c8561b5d80c", }), + "test-board": ("https://thebarchive.com/b/",), }, + "_ckey": "childclass", } generate_extractors(EXTRACTORS, globals(), ( FoolfuukaThreadExtractor, + FoolfuukaBoardExtractor, ))