Skip to content

Commit

Permalink
[simplyhentai] add video extractor (#89)
Browse files Browse the repository at this point in the history
All videos hosted on their own servers seem be to dead,
but myhentai.tv embeds, which are most of the videos, work fine.
  • Loading branch information
mikf committed May 30, 2018
1 parent f9a6a19 commit cdcc342
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
2 changes: 1 addition & 1 deletion docs/supportedsites.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searc
Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga
Sen Manga http://raw.senmanga.com/ Chapters
Sense-Scans http://sensescans.com/ Chapters, Manga
Simply Hentai https://www.simply-hentai.com/ Galleries
Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos
SlideShare https://www.slideshare.net/ Presentations
SmugMug https://www.smugmug.com/ |Albums, individ-5| Optional (OAuth)
Subapics https://subapics.com/ Chapters, Manga
Expand Down
62 changes: 61 additions & 1 deletion gallery_dl/extractor/simplyhentai.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ class SimplyhentaiGalleryExtractor(ChapterExtractor):
filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}"
archive_fmt = "{image_id}"
pattern = [r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com"
r"(?:/(?!page|series|album|all-pages|image|gif)[^/?&#]+)+)"]
r"(?!/(?:album|gif|image|series)/)"
r"(?:/(?!(?:page|all-pages)/)[^/?&#]+)+)"]
test = [
(("https://original-work.simply-hentai.com"
"/amazon-no-hiyaku-amazon-elixir"), {
Expand Down Expand Up @@ -119,3 +120,62 @@ def items(self):
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data


class SimplyhentaiVideoExtractor(Extractor):
"""Extractor for hentai videos from simply-hentai.com"""
category = "simplyhentai"
subcategory = "video"
directory_fmt = ["{category}", "{type}s"]
filename_fmt = "{title}{episode:?_//>02}.{extension}"
archive_fmt = "{title}_{episode}"
pattern = [r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)"]
test = [
("https://videos.simply-hentai.com/creamy-pie-episode-02", {
"pattern": r"https://www\.googleapis\.com/drive/v3/files"
r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+",
"keyword": "315201bd4f3ce6bff57f4fbc631788c004d0eb7d",
"count": 1,
}),
(("https://videos.simply-hentai.com"
"/1715-tifa-in-hentai-gang-bang-3d-movie"), {
"url": "ad9a36ae06c601b6490e3c401834b4949d947eb0",
"keyword": "fef03513d5e1a9958d63e45a1d583e2f658b1168",
}),
]

def __init__(self, match):
Extractor.__init__(self)
self.url = "https://" + match.group(1)

def items(self):
page = self.request(self.url).text

title, pos = text.extract(page, "<title>", "</title>")
tags , pos = text.extract(page, ">Tags</div>", "</div>", pos)
date , pos = text.extract(page, ">Upload Date</div>", "</div>", pos)
title = title.rpartition(" - ")[0]

if "<video" in page:
video_url = text.extract(page, '<source src="', '"', pos)[0]
episode = 0
else:
# video url from myhentai.tv embed
pos = page.index('<div class="video-frame-container">', pos)
embed_url = text.extract(page, 'src="', '"', pos)[0].replace(
"embedplayer.php?link=", "embed.php?name=")
embed_page = self.request(embed_url).text
video_url = text.extract(embed_page, '"file":"', '"')[0]
title, _, episode = title.rpartition(" Episode ")

data = text.nameext_from_url(video_url, {
"title": text.unescape(title),
"episode": text.parse_int(episode),
"tags": "".join(text.split_html(tags)),
"date": text.remove_html(date),
"type": "video",
})

yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, video_url, data

0 comments on commit cdcc342

Please sign in to comment.