[lineblog] add blog and post extractors (closes #404)

mikf · Sep 6, 2019 · 6fe9a13 · 6fe9a13
1 parent 4e8a548
commit 6fe9a13
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 0 deletions.
diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst
@@ -62,6 +62,7 @@ Kirei Cake           https://reader.kireicake.com/       Chapters, Manga
 KissManga            https://kissmanga.com/              Chapters, Manga
 Komikcast            https://komikcast.com/              Chapters, Manga
 Konachan             https://konachan.com/               Pools, Popular Images, Posts, Tag-Searches
+LINE BLOG            https://www.lineblog.me/            Blogs, Posts
 livedoor Blog        http://blog.livedoor.jp/            Blogs, Posts
 Luscious             https://luscious.net/               Albums, Search Results                             Optional
 Manga Fox            https://fanfox.net/                 Chapters

diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
@@ -54,6 +54,7 @@
     "kissmanga",
     "komikcast",
     "konachan",
+    "lineblog",
     "livedoor",
     "luscious",
     "mangadex",

diff --git a/gallery_dl/extractor/lineblog.py b/gallery_dl/extractor/lineblog.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.lineblog.me/"""
+
+from .livedoor import LivedoorBlogExtractor, LivedoorPostExtractor
+from .. import text
+
+
+class LineblogBase():
+    """Base class for lineblog extractors"""
+    category = "lineblog"
+    root = "https://lineblog.me"
+
+    def _images(self, post):
+        imgs = []
+        body = post.pop("body")
+
+        for num, img in enumerate(text.extract_iter(body, "<img ", ">"), 1):
+            src = text.extract(img, 'src="', '"')[0]
+            alt = text.extract(img, 'alt="', '"')[0]
+
+            if not src:
+                continue
+            if src.startswith("https://obs.line-scdn.") and src.count("/") > 3:
+                src = src.rpartition("/")[0]
+
+            imgs.append(text.nameext_from_url(alt or src, {
+                "url" : src,
+                "num" : num,
+                "hash": src.rpartition("/")[2],
+                "post": post,
+            }))
+
+        return imgs
+
+
+class LineblogBlogExtractor(LineblogBase, LivedoorBlogExtractor):
+    """Extractor for a user's blog on lineblog.me"""
+    pattern = r"(?:https?://)?lineblog\.me/(\w+)/?(?:$|[?&#])"
+    test = ("https://lineblog.me/mamoru_miyano/", {
+        "range": "1-20",
+        "count": 20,
+        "pattern": r"https://obs.line-scdn.net/[\w-]+$",
+        "keyword": {
+            "post": {
+                "categories" : tuple,
+                "date"       : "type:datetime",
+                "description": str,
+                "id"         : int,
+                "tags"       : list,
+                "title"      : str,
+                "user"       : "mamoru_miyano"
+            },
+            "filename": str,
+            "hash"    : r"re:\w{32,}",
+            "num"     : int,
+        },
+    })
+
+
+class LineblogPostExtractor(LineblogBase, LivedoorPostExtractor):
+    """Extractor for blog posts on lineblog.me"""
+    pattern = r"(?:https?://)?lineblog\.me/(\w+)/archives/(\d+)"
+    test = ("https://lineblog.me/mamoru_miyano/archives/1919150.html", {
+        "url": "24afeb4044c554f80c374b52bf8109c6f1c0c757",
+        "keyword": "76a38e2c0074926bd3362f66f9fc0e6c41591dcb",
+    })
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
@@ -45,6 +45,7 @@
     "jaiminisbox"    : "Jaimini's Box",
     "kireicake"      : "Kirei Cake",
     "kissmanga"      : "KissManga",
+    "lineblog"       : "LINE BLOG",
     "livedoor"       : "livedoor Blog",
     "mangadex"       : "MangaDex",
     "mangafox"       : "Manga Fox",