mikf · mikf · Apr 25, 2021 · Apr 10, 2021 · Apr 10, 2021 · Apr 10, 2021
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
@@ -151,6 +151,18 @@ Consider all sites to be NSFW unless otherwise known.
     <td>Chapters, Manga</td>
     <td></td>
 </tr>
+<tr>
+    <td>Fanbox</td>
+    <td>https://www.fanbox.cc/</td>
+    <td>Creators, Posts</td>
+    <td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
+</tr>
+<tr>
+    <td>Fantia</td>
+    <td>https://fantia.jp/</td>
+    <td>Creators, Posts</td>
+    <td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
+</tr>
 <tr>
     <td>Flickr</td>
     <td>https://www.flickr.com/</td>

diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
@@ -31,6 +31,8 @@
     "erome",
     "exhentai",
     "fallenangels",
+    "fanbox",
+    "fantia",
     "flickr",
     "furaffinity",
     "fuskator",

diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.fanbox.cc/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+BASE_PATTERN = (
+    r"(?:https?://)?(?:([a-zA-Z0-9_-]+)\.fanbox\.cc|"
+    r"(?:www\.)?fanbox\.cc/@([^/?#]+))"
+)
+
+
+class FanboxExtractor(Extractor):
+    """Base class for Fanbox extractors"""
+    category = "fanbox"
+    root = "https://www.fanbox.cc"
+    directory_fmt = ("{category}", "{creatorId}")
+    filename_fmt = "{id}_{num}.{extension}"
+    archive_fmt = "{id}_{num}"
+    _warning = True
+
+    def items(self):
+        yield Message.Version, 1
+
+        if self._warning:
+            if "FANBOXSESSID" not in self.session.cookies:
+                self.log.warning("no 'FANBOXSESSID' cookie set")
+            FanboxExtractor._warning = False
+
+        for content_body, post in self.posts():
+            yield Message.Directory, post
+            yield from self._get_urls_from_post(content_body, post)
+
+    def posts(self):
+        """Return all relevant post objects"""
+
+    def _pagination(self, url):
+        headers = {"Origin": self.root}
+
+        while url:
+            url = text.ensure_http_scheme(url)
+            body = self.request(url, headers=headers).json()["body"]
+            for item in body["items"]:
+                yield self._get_post_data(item["id"])
+
+            url = body["nextUrl"]
+
+    def _get_post_data(self, post_id):
+        """Fetch and process post data"""
+        headers = {"Origin": self.root}
+        url = "https://api.fanbox.cc/post.info?postId="+post_id
+        post = self.request(url, headers=headers).json()["body"]
+
+        content_body = post.pop("body", None)
+        post["date"] = text.parse_datetime(post["publishedDatetime"])
+        post["text"] = content_body.get("text") if content_body else None
+        post["isCoverImage"] = False
+
+        return content_body, post
+
+    def _get_urls_from_post(self, content_body, post):
+        num = 0
+        cover_image = post.get("coverImageUrl")
+        if cover_image:
+            final_post = post.copy()
+            final_post["isCoverImage"] = True
+            final_post["fileUrl"] = cover_image
+            text.nameext_from_url(cover_image, final_post)
+            final_post["num"] = num
+            num += 1
+            yield Message.Url, cover_image, final_post
+
+        for group in ("images", "imageMap"):
+            if group in (content_body or []):
+                for item in content_body[group]:
+                    final_post = post.copy()
+                    final_post["fileUrl"] = item["originalUrl"]
+                    text.nameext_from_url(item["originalUrl"], final_post)
+                    if "extension" in item:
+                        final_post["extension"] = item["extension"]
+                    final_post["fileId"] = item.get("id")
+                    final_post["width"] = item.get("width")
+                    final_post["height"] = item.get("height")
+                    final_post["num"] = num
+                    num += 1
+                    yield Message.Url, item["originalUrl"], final_post
+
+        for group in ("files", "fileMap"):
+            if group in (content_body or []):
+                for item in content_body[group]:
+                    final_post = post.copy()
+                    final_post["fileUrl"] = item["url"]
+                    text.nameext_from_url(item["url"], final_post)
+                    if "extension" in item:
+                        final_post["extension"] = item["extension"]
+                    if "name" in item:
+                        final_post["filename"] = item["name"]
+                    final_post["fileId"] = item.get("id")
+                    final_post["num"] = num
+                    num += 1
+                    yield Message.Url, item["url"], final_post
+
+
+class FanboxCreatorExtractor(FanboxExtractor):
+    """Extractor for a Fanbox creator's works"""
+    subcategory = "creator"
+    pattern = BASE_PATTERN + r"/?$"
+    test = (
+        ("https://xub.fanbox.cc", {
+            "range": "1-15",
+            "count": ">= 15",
+            "keyword": {
+                "creatorId" : "xub",
+                "tags"       : list,
+                "title"      : str,
+            },
+        }),
+    )
+
+    def __init__(self, match):
+        FanboxExtractor.__init__(self, match)
+        self.creator_id = match.group(1) or match.group(2)
+
+    def posts(self):
+        url = "https://api.fanbox.cc/post.listCreator?creatorId={}&limit=10"
+
+        return self._pagination(url.format(self.creator_id))
+
+
+class FanboxPostExtractor(FanboxExtractor):
+    """Extractor for media from a single Fanbox post"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/posts/(\d+)"
+    test = (
+        ("https://www.fanbox.cc/@xub/posts/1910054", {
+            "count": 3,
+            "keyword": {
+                "title": "えま★おうがすと",
+                "tags": list,
+                "hasAdultContent": True,
+                "isCoverImage": False
+            },
+        }),
+    )
+
+    def __init__(self, match):
+        FanboxExtractor.__init__(self, match)
+        self.post_id = match.group(3)
+
+    def posts(self):
+        yield self._get_post_data(self.post_id)
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
@@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fantia.jp/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class FantiaExtractor(Extractor):
+    """Base class for Fantia extractors"""
+    category = "fantia"
+    root = "https://fantia.jp"
+    directory_fmt = ("{category}", "{fanclub_id}")
+    filename_fmt = "{post_id}_{file_id}.{extension}"
+    archive_fmt = "{post_id}_{file_id}"
+    _warning = True
+
+    def items(self):
+        yield Message.Version, 1
+
+        if self._warning:
+            if "_session_id" not in self.session.cookies:
+                self.log.warning("no '_session_id' cookie set")
+            FantiaExtractor._warning = False
+
+        for full_response, post in self.posts():
+            yield Message.Directory, post
+            for url, url_data in self._get_urls_from_post(full_response, post):
+                fname = url_data["content_filename"] or url
+                text.nameext_from_url(fname, url_data)
+                url_data["file_url"] = url
+                yield Message.Url, url, url_data
+
+    def posts(self):
+        """Return all relevant post objects"""
+
+    def _pagination(self, base_url):
+        headers = {"Referer": self.root}
+        page = 1
+        posts_found = True
+
+        while posts_found:
+            url = base_url+str(page)
+            url = text.ensure_http_scheme(url)
+            gallery_page_html = self.request(url, headers=headers).text
+            posts_found = False
+            for post_id in text.extract_iter(
+                gallery_page_html, 'class="link-block" href="/posts/', '"'
+            ):
+                posts_found = True
+                yield self._get_post_data(post_id)
+
+            page += 1
+
+    def _get_post_data(self, post_id):
+        """Fetch and process post data"""
+        headers = {"Referer": self.root}
+        url = self.root+"/api/v1/posts/"+post_id
+        resp = self.request(url, headers=headers).json()["post"]
+        post = {
+            "post_id": resp["id"],
+            "post_url": self.root + "/posts/" + str(resp["id"]),
+            "post_title": resp["title"],
+            "comment": resp["comment"],
+            "rating": resp["rating"],
+            "posted_at": resp["posted_at"],
+            "fanclub_id": resp["fanclub"]["id"],
+            "fanclub_user_id": resp["fanclub"]["user"]["id"],
+            "fanclub_user_name": resp["fanclub"]["user"]["name"],
+            "fanclub_name": resp["fanclub"]["name"],
+            "fanclub_url": self.root+"/fanclubs/"+str(resp["fanclub"]["id"]),
+            "tags": resp["tags"]
+        }
+        return resp, post
+
+    def _get_urls_from_post(self, resp, post):
+        """Extract individual URL data from the response"""
+        if "thumb" in resp and resp["thumb"] and "original" in resp["thumb"]:
+            post["content_filename"] = ""
+            post["content_category"] = "thumb"
+            post["file_id"] = "thumb"
+            yield resp["thumb"]["original"], post
+
+        for content in resp["post_contents"]:
+            post["content_category"] = content["category"]
+            post["content_title"] = content["title"]
+            post["content_filename"] = content.get("filename", "")
+            post["content_id"] = content["id"]
+            if "post_content_photos" in content:
+                for photo in content["post_content_photos"]:
+                    post["file_id"] = photo["id"]
+                    yield photo["url"]["original"], post
+            if "download_uri" in content:
+                post["file_id"] = content["id"]
+                yield self.root+"/"+content["download_uri"], post
+
+
+class FantiaCreatorExtractor(FantiaExtractor):
+    """Extractor for a Fantia creator's works"""
+    subcategory = "creator"
+    pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/fanclubs/(\d+)"
+    test = (
+        ("https://fantia.jp/fanclubs/6939", {
+            "range": "1-25",
+            "count": ">= 25",
+            "keyword": {
+                "fanclub_user_id" : 52152,
+                "tags"            : list,
+                "title"           : str,
+            },
+        }),
+    )
+
+    def __init__(self, match):
+        FantiaExtractor.__init__(self, match)
+        self.creator_id = match.group(1)
+
+    def posts(self):
+        base_url = self.root+"/fanclubs/"+self.creator_id+"/posts?page="
+
+        return self._pagination(base_url)
+
+
+class FantiaPostExtractor(FantiaExtractor):
+    """Extractor for media from a single Fantia post"""
+    subcategory = "post"
+    pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/posts/(\d+)"
+    test = (
+        ("https://fantia.jp/posts/508363", {
+            "count": 6,
+            "keyword": {
+                "post_title": "zunda逆バニーでおしりｺｯｼｮﾘ",
+                "tags": list,
+                "rating": "adult",
+                "post_id": 508363
+            },
+        }),
+    )
+
+    def __init__(self, match):
+        FantiaExtractor.__init__(self, match)
+        self.post_id = match.group(1)
+
+    def posts(self):
+        yield self._get_post_data(self.post_id)
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
@@ -204,6 +204,8 @@
     "e621"           : "Supported",
     "e-hentai"       : "Supported",
     "exhentai"       : "Supported",
+    "fanbox"         : _COOKIES,
+    "fantia"         : _COOKIES,
     "flickr"         : _OAUTH,
     "furaffinity"    : _COOKIES,
     "idolcomplex"    : "Supported",