From cc5079c8441b3072bb934a36ed159b41c4a99f12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 21 Feb 2020 23:40:32 +0100 Subject: [PATCH] [hiperdex] add chapter and manga extractors (closes #606) --- docs/supportedsites.rst | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/hiperdex.py | 138 +++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 gallery_dl/extractor/hiperdex.py diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 27067f6de5..1fb38b1b7f 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -46,6 +46,7 @@ HentaiFox https://hentaifox.com/ Galleries, Search Resul HentaiHand https://hentaihand.com/ Galleries, Search Results, Tag-Searches HentaiHere https://hentaihere.com/ Chapters, Manga Hentainexus https://hentainexus.com/ Galleries, Search Results +Hiperdex https://hiperdex.com/ Chapters, Manga Hitomi.la https://hitomi.la/ Galleries Hypnohub https://hypnohub.net/ Pools, Popular Images, Posts, Tag-Searches Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index f6b33a4cce..dc52959afa 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -42,6 +42,7 @@ "hentaihand", "hentaihere", "hentainexus", + "hiperdex", "hitomi", "hypnohub", "idolcomplex", diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py new file mode 100644 index 0000000000..1831acf5d7 --- /dev/null +++ b/gallery_dl/extractor/hiperdex.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://hiperdex.com/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text +from ..cache import memcache +import re + + +class HiperdexBase(): + """Base class for hiperdex extractors""" + category = "hiperdex" + root = "https://hiperdex.com" + + @memcache(keyarg=1) + def manga_data(self, manga, page=None): + if not page: + url = "{}/manga/{}/".format(self.root, manga) + page = self.request(url).text + extr = text.extract_from(page) + + return { + "manga" : text.unescape(extr( + "", "<").rpartition("&")[0].strip()), + "score" : text.parse_float(extr( + 'id="averagerate">', '<')), + "author" : text.remove_html(extr( + 'class="author-content">', '</div>')), + "artist" : text.remove_html(extr( + 'class="artist-content">', '</div>')), + "genre" : text.split_html(extr( + 'class="genres-content">', '</div>'))[::2], + "type" : extr( + 'class="summary-content">', '<').strip(), + "release": text.parse_int(text.remove_html(extr( + 'class="summary-content">', '</div>'))), + "status" : extr( + 'class="summary-content">', '<').strip(), + "description": text.remove_html(text.unescape(extr( + 'class="description-summary">', '</div>'))), + "language": "English", + "lang" : "en", + } + + def chapter_data(self, chapter): + chapter, _, minor = chapter.partition("-") + data = { + "chapter" : text.parse_int(chapter), + "chapter-minor": "." + minor if minor and minor != "end" else "", + } + data.update(self.manga_data(self.manga.lower())) + return data + + +class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): + """Extractor for manga chapters from hiperdex.com""" + archive_fmt = "{manga}_{chapter}_{page}" + pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.com" + r"(/manga/([^/?&#]+)/([^/?&#]+))") + test = ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { + "url": "111bc3ee14ce91d78c275770ef63b56c9ac15d8d", + "keyword": { + "artist" : "Sasuga Kei", + "author" : "Sasuga Kei", + "chapter": 154, + "chapter-minor": ".5", + "description": "re:Natsuo Fujii is in love with his teacher, Hina", + "genre" : list, + "manga" : "Domestic na Kanojo", + "release": 2014, + "score" : float, + "type" : "Manga", + }, + }) + + def __init__(self, match): + path, self.manga, self.chapter = match.groups() + ChapterExtractor.__init__(self, match, self.root + path + "/") + + def metadata(self, _): + return self.chapter_data(self.chapter) + + def images(self, page): + return [ + (url.strip(), None) + for url in re.findall(r'id="image-\d+"\s+src="([^"]+)', page) + ] + + +class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): + """Extractor for manga from hiperdex.com""" + chapterclass = HiperdexChapterExtractor + pattern = r"(?:https?://)?(?:www\.)?hiperdex\.com(/manga/([^/?&#]+))/?$" + test = ("https://hiperdex.com/manga/youre-not-that-special/", { + "count": 51, + "pattern": HiperdexChapterExtractor.pattern, + "keyword": { + "artist" : "Bolp", + "author" : "Abyo4", + "chapter": int, + "chapter-minor": "", + "description": "re:I didn’t think much of the creepy girl in ", + "genre" : list, + "manga" : "You're Not That Special!", + "release": 2019, + "score" : float, + "status" : "Completed", + "type" : "Manhwa", + }, + }) + + def __init__(self, match): + path, self.manga = match.groups() + MangaExtractor.__init__(self, match, self.root + path + "/") + + def chapters(self, page): + self.manga_data(self.manga, page) + results = [] + last = None + + page = text.extract(page, 'class="page-content-listing', '</ul>')[0] + for match in HiperdexChapterExtractor.pattern.finditer(page): + path = match.group(1) + if last != path: + last = path + results.append(( + self.root + path, + self.chapter_data(path.rpartition("/")[2]), + )) + + return results