[hitomi] add extractor for tag searches (closes #697)

mikf · Apr 20, 2020 · a3de234 · a3de234
1 parent 456f6e8
commit a3de234
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 2 deletions.
diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst
@@ -49,7 +49,7 @@ HentaiHand           https://hentaihand.com/             Galleries, Search Resul
 HentaiHere           https://hentaihere.com/             Chapters, Manga
 Hentainexus          https://hentainexus.com/            Galleries, Search Results
 Hiperdex             https://hiperdex.net/               Artists, Chapters, Manga
-Hitomi.la            https://hitomi.la/                  Galleries
+Hitomi.la            https://hitomi.la/                  Galleries, Tag Searches
 Hypnohub             https://hypnohub.net/               Pools, Popular Images, Posts, Tag Searches
 Idol Complex         https://idol.sankakucomplex.com/    Pools, Posts, Tag Searches                         Optional
 ImageBam             http://www.imagebam.com/            Galleries, individual Images

diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
@@ -8,7 +8,8 @@
 
 """Extractors for https://hitomi.la/"""
 
-from .common import GalleryExtractor
+from .common import GalleryExtractor, Extractor, Message
+from .nozomi import decode_nozomi
 from .. import text, util
 import string
 import json
@@ -149,3 +150,35 @@ def images(self, _):
             )
             result.append((url, idata))
         return result
+
+
+class HitomiTagExtractor(Extractor):
+    """Extractor for galleries from tag searches on hitomi.la"""
+    category = "hitomi"
+    subcategory = "tag"
+    pattern = (r"(?:https?://)?hitomi\.la/"
+               r"(tag|artist|group|series|type|character)/"
+               r"([^/?&#]+)-\d+\.html")
+    test = (
+        ("https://hitomi.la/tag/screenshots-japanese-1.html", {
+            "pattern": HitomiGalleryExtractor.pattern,
+            "count": ">= 35",
+        }),
+        ("https://hitomi.la/artist/a1-all-1.html"),
+        ("https://hitomi.la/group/initial%2Dg-all-1.html"),
+        ("https://hitomi.la/series/amnesia-all-1.html"),
+        ("https://hitomi.la/type/doujinshi-all-1.html"),
+        ("https://hitomi.la/character/a2-all-1.html"),
+    )
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.type, self.tag = match.groups()
+
+    def items(self):
+        url = "https://ltn.hitomi.la/{}/{}.nozomi".format(self.type, self.tag)
+        data = {"_extractor": HitomiGalleryExtractor}
+
+        for gallery_id in decode_nozomi(self.request(url).content):
+            url = "https://hitomi.la/galleries/{}.html".format(gallery_id)
+            yield Message.Queue, url, data