Skip to content

Commit

Permalink
[xhamster] add gallery & user extractor (#281)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Jun 5, 2019
1 parent 208202b commit 0960093
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/supportedsites.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Warosu https://warosu.org/ Threads
Weibo https://www.weibo.com/ Images from Users, Images from Statuses
WikiArt.org https://www.wikiart.org/ Artists, Artworks
World Three http://www.slide.world-three.org/ Chapters, Manga
xHamster https://xhamster.com/ Images from Users, Galleries
XVideos https://www.xvideos.com/ Images from Users, Galleries
Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches
yaplog! https://yaplog.jp/ Blogs, Posts
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
"warosu",
"weibo",
"wikiart",
"xhamster",
"xvideos",
"yandere",
"yaplog",
Expand Down
171 changes: 171 additions & 0 deletions gallery_dl/extractor/xhamster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-

# Copyright 2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://xhamster.com/"""

from .common import Extractor, Message
from .. import text
import json


BASE_PATTERN = r"(?:https?://)?(?:[^.]+\.)?xhamster\.(?:com|one|desi)"


class XhamsterExtractor(Extractor):
"""Base class for xhamster extractors"""
category = "xhamster"
root = "https://xhamster.com"


class XhamsterGalleryExtractor(XhamsterExtractor):
"""Extractor for image galleries on xhamster.com"""
subcategory = "gallery"
directory_fmt = ("{category}", "{user[name]}",
"{gallery[id]} {gallery[title]}")
filename_fmt = "{num:>03}_{id}.{extension}"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"(/photos/gallery/[^/?&#]+)"
test = (
("https://xhamster.com/photos/gallery/11748968", {
"pattern": r"https://thumb-p\d+.xhcdn.com/./[\w/-]+_1000.jpg$",
"count": 143,
"keyword": {
"comments": int,
"count": 143,
"favorite": bool,
"id": int,
"num": int,
"height": int,
"width": int,
"imageURL": str,
"pageURL": str,
"thumbURL": str,
"gallery": {
"date": "type:datetime",
"description": "",
"dislikes": int,
"id": 11748968,
"likes": int,
"tags": ["NON-Porn"],
"thumbnail": str,
"title": "Make the world better.",
"views": int,
},
"user": {
"id": 16874672,
"name": "Anonymousrants",
"retired": bool,
"subscribers": int,
"url": "https://xhamster.com/users/anonymousrants",
"verified": bool,
},
},
}),
("https://xhamster.com/photos/gallery/make-the-world-better-11748968"),
("https://xhamster.com/photos/gallery/11748968"),
("https://xhamster.one/photos/gallery/11748968"),
("https://xhamster.desi/photos/gallery/11748968"),
("https://en.xhamster.com/photos/gallery/11748968"),
)

def __init__(self, match):
XhamsterExtractor.__init__(self, match)
self.path = match.group(1)
self.data = None

def items(self):
data = self.metadata()
yield Message.Version, 1
yield Message.Directory, data
for num, image in enumerate(self.images(), 1):
url = image["imageURL"]
image.update(data)
image["num"] = num
yield Message.Url, url, text.nameext_from_url(url, image)

def metadata(self):
self.data = self._data(self.root + self.path)
user = self.data["authorModel"]
imgs = self.data["photosGalleryModel"]

return {
"user":
{
"id" : text.parse_int(user["id"]),
"url" : user["pageURL"],
"name" : user["name"],
"retired" : user["retired"],
"verified" : user["verified"],
"subscribers": user["subscribers"],
},
"gallery":
{
"id" : text.parse_int(imgs["id"]),
"tags" : [c["name"] for c in imgs["categories"]],
"date" : text.parse_timestamp(imgs["created"]),
"views" : text.parse_int(imgs["views"]),
"likes" : text.parse_int(imgs["rating"]["likes"]),
"dislikes" : text.parse_int(imgs["rating"]["dislikes"]),
"title" : imgs["title"],
"description": imgs["description"],
"thumbnail" : imgs["thumbURL"],
},
"count": text.parse_int(imgs["quantity"]),
}

def images(self):
data = self.data
self.data = None

while True:
for image in data["photosGalleryModel"]["photos"]:
del image["modelName"]
yield image

pgntn = data["pagination"]
if pgntn["active"] == pgntn["maxPage"]:
return
url = pgntn["pageLinkTemplate"][:-3] + str(pgntn["next"])
data = self._data(url)

def _data(self, url):
page = self.request(url).text
return json.loads(text.extract(
page, "window.initials =", "</script>")[0].rstrip("\n\r;"))


class XhamsterUserExtractor(XhamsterExtractor):
"""Extractor for all galleries of an xhamster user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/users/([^/?&#]+)(?:/photos)?/?(?:$|[?#])"
test = (
("https://xhamster.com/users/nickname68/photos", {
"pattern": XhamsterGalleryExtractor.pattern,
"count": 50,
"range": "1-50",
}),
("https://xhamster.com/users/nickname68"),
)

def __init__(self, match):
XhamsterExtractor.__init__(self, match)
self.user = match.group(1)

def items(self):
yield Message.Version, 1
url = "{}/users/{}/photos".format(self.root, self.user)
data = {"_extractor": XhamsterGalleryExtractor}

while url:
extr = text.extract_from(self.request(url).text)
while True:
url = extr('thumb-image-container" href="', '"')
if not url:
break
yield Message.Queue, url, data
url = extr('data-page="next" href="', '"')
1 change: 1 addition & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
"thebarchive" : "The /b/ Archive",
"wikiart" : "WikiArt.org",
"worldthree" : "World Three",
"xhamster" : "xHamster",
"xvideos" : "XVideos",
"yaplog" : "yaplog!",
"yuki" : "yuki.la 4chan archive",
Expand Down

0 comments on commit 0960093

Please sign in to comment.