diff --git a/docs/configuration.rst b/docs/configuration.rst index 01218754192..96329f0df44 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -969,6 +969,19 @@ Description ``"l"``, ...) to use as an upper limit. +extractor.furaffinity.descriptions +---------------------------------- +Type + ``string`` +Default + ``"text"`` +Description + Controls the format of ``description`` metadata fields. + + * ``"text"``: Plain text with HTML tags removed + * ``"html"``: Raw HTML content + + extractor.furaffinity.include ----------------------------- Type diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 752cd627acf..df5a73e26fd 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -29,6 +29,9 @@ def __init__(self, match): self.user = match.group(1) self.offset = 0 + if self.config("descriptions") == "html": + self._process_description = lambda x: x.strip() + def items(self): metadata = self.metadata() for post_id in util.advance(self.posts(), self.offset): @@ -83,8 +86,8 @@ def _parse_post(self, post_id): if tags: # new site layout data["tags"] = text.split_html(tags) - data["description"] = text.unescape(rh(extr( - 'class="section-body">', ''), "", "")) + data["description"] = self._process_description(extr( + 'class="section-body">', '')) data["views"] = pi(rh(extr('class="views">', ''))) data["favorites"] = pi(rh(extr('class="favorites">', ''))) data["comments"] = pi(rh(extr('class="comments">', ''))) @@ -109,12 +112,16 @@ def _parse_post(self, post_id): data["tags"] = text.split_html(extr( 'id="keywords">', ''))[::2] data["rating"] = extr('', ' ')
-            data[", ""), "", "")) + data["description"] = self._process_description(extr( + "", "")) data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) return data + @staticmethod + def _process_description(description): + return text.unescape(text.remove_html(description, "", "")) + def _pagination(self): num = 1