Skip to content

Commit

Permalink
[furaffinity] add 'descriptions' option (#1231)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Jan 19, 2021
1 parent 36f2813 commit 89a2bcb
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
13 changes: 13 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,19 @@ Description
``"l"``, ...) to use as an upper limit.


extractor.furaffinity.descriptions
----------------------------------
Type
``string``
Default
``"text"``
Description
Controls the format of ``description`` metadata fields.

* ``"text"``: Plain text with HTML tags removed
* ``"html"``: Raw HTML content


extractor.furaffinity.include
-----------------------------
Type
Expand Down
15 changes: 11 additions & 4 deletions gallery_dl/extractor/furaffinity.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def __init__(self, match):
self.user = match.group(1)
self.offset = 0

if self.config("descriptions") == "html":
self._process_description = lambda x: x.strip()

def items(self):
metadata = self.metadata()
for post_id in util.advance(self.posts(), self.offset):
Expand Down Expand Up @@ -83,8 +86,8 @@ def _parse_post(self, post_id):
if tags:
# new site layout
data["tags"] = text.split_html(tags)
data["description"] = text.unescape(rh(extr(
'class="section-body">', '</div>'), "", ""))
data["description"] = self._process_description(extr(
'class="section-body">', '</div>'))
data["views"] = pi(rh(extr('class="views">', '</span>')))
data["favorites"] = pi(rh(extr('class="favorites">', '</span>')))
data["comments"] = pi(rh(extr('class="comments">', '</span>')))
Expand All @@ -109,12 +112,16 @@ def _parse_post(self, post_id):
data["tags"] = text.split_html(extr(
'id="keywords">', '</div>'))[::2]
data["rating"] = extr('<img alt="', ' ')
data["description"] = text.unescape(text.remove_html(extr(
"</table>", "</table>"), "", ""))
data["description"] = self._process_description(extr(
"</table>", "</table>"))
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])

return data

@staticmethod
def _process_description(description):
return text.unescape(text.remove_html(description, "", ""))

def _pagination(self):
num = 1

Expand Down

0 comments on commit 89a2bcb

Please sign in to comment.