diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 30f221e2901..43f205344dd 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -104,6 +104,7 @@ Warosu https://warosu.org/ Threads Weibo https://www.weibo.com/ Images from Users, Images from Statuses WikiArt.org https://www.wikiart.org/ Artists, Artworks World Three http://www.slide.world-three.org/ Chapters, Manga +xHamster https://xhamster.com/ Images from Users, Galleries XVideos https://www.xvideos.com/ Images from Users, Galleries Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches yaplog! https://yaplog.jp/ Blogs, Posts diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index ea3c8bfeec5..2ceb009d122 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -93,6 +93,7 @@ "warosu", "weibo", "wikiart", + "xhamster", "xvideos", "yandere", "yaplog", diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py new file mode 100644 index 00000000000..44de7843c68 --- /dev/null +++ b/gallery_dl/extractor/xhamster.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://xhamster.com/""" + +from .common import Extractor, Message +from .. import text +import json + + +BASE_PATTERN = r"(?:https?://)?(?:[^.]+\.)?xhamster\.(?:com|one|desi)" + + +class XhamsterExtractor(Extractor): + """Base class for xhamster extractors""" + category = "xhamster" + root = "https://xhamster.com" + + +class XhamsterGalleryExtractor(XhamsterExtractor): + """Extractor for image galleries on xhamster.com""" + subcategory = "gallery" + directory_fmt = ("{category}", "{user[name]}", + "{gallery[id]} {gallery[title]}") + filename_fmt = "{num:>03}_{id}.{extension}" + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"(/photos/gallery/[^/?&#]+)" + test = ( + ("https://xhamster.com/photos/gallery/11748968", { + "pattern": r"https://thumb-p\d+.xhcdn.com/./[\w/-]+_1000.jpg$", + "count": 143, + "keyword": { + "comments": int, + "count": 143, + "favorite": bool, + "id": int, + "num": int, + "height": int, + "width": int, + "imageURL": str, + "pageURL": str, + "thumbURL": str, + "gallery": { + "date": "type:datetime", + "description": "", + "dislikes": int, + "id": 11748968, + "likes": int, + "tags": ["NON-Porn"], + "thumbnail": str, + "title": "Make the world better.", + "views": int, + }, + "user": { + "id": 16874672, + "name": "Anonymousrants", + "retired": bool, + "subscribers": int, + "url": "https://xhamster.com/users/anonymousrants", + "verified": bool, + }, + }, + }), + ("https://xhamster.com/photos/gallery/make-the-world-better-11748968"), + ("https://xhamster.com/photos/gallery/11748968"), + ("https://xhamster.one/photos/gallery/11748968"), + ("https://xhamster.desi/photos/gallery/11748968"), + ("https://en.xhamster.com/photos/gallery/11748968"), + ) + + def __init__(self, match): + XhamsterExtractor.__init__(self, match) + self.path = match.group(1) + self.data = None + + def items(self): + data = self.metadata() + yield Message.Version, 1 + yield Message.Directory, data + for num, image in enumerate(self.images(), 1): + url = image["imageURL"] + image.update(data) + image["num"] = num + yield Message.Url, url, text.nameext_from_url(url, image) + + def metadata(self): + self.data = self._data(self.root + self.path) + user = self.data["authorModel"] + imgs = self.data["photosGalleryModel"] + + return { + "user": + { + "id" : text.parse_int(user["id"]), + "url" : user["pageURL"], + "name" : user["name"], + "retired" : user["retired"], + "verified" : user["verified"], + "subscribers": user["subscribers"], + }, + "gallery": + { + "id" : text.parse_int(imgs["id"]), + "tags" : [c["name"] for c in imgs["categories"]], + "date" : text.parse_timestamp(imgs["created"]), + "views" : text.parse_int(imgs["views"]), + "likes" : text.parse_int(imgs["rating"]["likes"]), + "dislikes" : text.parse_int(imgs["rating"]["dislikes"]), + "title" : imgs["title"], + "description": imgs["description"], + "thumbnail" : imgs["thumbURL"], + }, + "count": text.parse_int(imgs["quantity"]), + } + + def images(self): + data = self.data + self.data = None + + while True: + for image in data["photosGalleryModel"]["photos"]: + del image["modelName"] + yield image + + pgntn = data["pagination"] + if pgntn["active"] == pgntn["maxPage"]: + return + url = pgntn["pageLinkTemplate"][:-3] + str(pgntn["next"]) + data = self._data(url) + + def _data(self, url): + page = self.request(url).text + return json.loads(text.extract( + page, "window.initials =", "")[0].rstrip("\n\r;")) + + +class XhamsterUserExtractor(XhamsterExtractor): + """Extractor for all galleries of an xhamster user""" + subcategory = "user" + pattern = BASE_PATTERN + r"/users/([^/?&#]+)(?:/photos)?/?(?:$|[?#])" + test = ( + ("https://xhamster.com/users/nickname68/photos", { + "pattern": XhamsterGalleryExtractor.pattern, + "count": 50, + "range": "1-50", + }), + ("https://xhamster.com/users/nickname68"), + ) + + def __init__(self, match): + XhamsterExtractor.__init__(self, match) + self.user = match.group(1) + + def items(self): + yield Message.Version, 1 + url = "{}/users/{}/photos".format(self.root, self.user) + data = {"_extractor": XhamsterGalleryExtractor} + + while url: + extr = text.extract_from(self.request(url).text) + while True: + url = extr('thumb-image-container" href="', '"') + if not url: + break + yield Message.Queue, url, data + url = extr('data-page="next" href="', '"') diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index cb73092fc5e..abd7aa12df7 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -69,6 +69,7 @@ "thebarchive" : "The /b/ Archive", "wikiart" : "WikiArt.org", "worldthree" : "World Three", + "xhamster" : "xHamster", "xvideos" : "XVideos", "yaplog" : "yaplog!", "yuki" : "yuki.la 4chan archive",