Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: abstract limit #268

Merged
merged 5 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,28 +240,26 @@ Output:

### `abstract_chars_count`: item description length

To fill each [item description element](https://www.w3schools.com/xml/rss_tag_title_link_description_item.asp):
Used, in combination with `abstract_delimiter`, to determine each [item description element](https://www.w3schools.com/xml/rss_tag_title_link_description_item.asp):

- If this value is set to `-1`, then the articles' full HTML content will be filled into the description element.
- be careful: if set to `0` and there is no description, the feed's compliance is broken (an item must have a description)
- Otherwise, the plugin first tries to retrieve the value of the keyword `description` from the [page metadata].
- If the value is non-negative and no `description` meta is found, then the plugin retrieves the first number of characters of the page content defined by this setting. Retrieved content is the raw markdown converted roughly into HTML.
- If that fails and `abstract_delimiter` is found in the page, the article content up to (but not including) the delimiter is used.
- If the above has failed, then the plugin retrieves the first number of characters of the page content defined by this setting. Retrieved content is the raw markdown converted roughly into HTML.

Be careful: if set to `0` and there is no description, the feed's compliance is broken (an item must have a description).

`abstract_chars_count`: number of characters to use as item description.

Default: `150`

----

#### `abstract_delimiter`: abstract delimiter

Used to fill each [item description element](https://www.w3schools.com/xml/rss_tag_title_link_description_item.asp):
### `abstract_delimiter`: abstract delimiter

- If this value is set to `-1`, then the full HTML content will be filled into the description element.
- Otherwise, the plugin first tries to retrieve the value of the key `description` from the page metadata.
- If the value is non-negative and no `description` meta is found, then the plugin retrieves the first number of characters of the page content defined by this setting. Retrieved content is the raw markdown converted rougthly into HTML (i.e. without extension, etc.).
Please see `abstract_chars_count` for how this setting is used. A value of `""` (the empty string) disables this step.

`abstract_delimiter`: string to mark .
`abstract_delimiter`: string to mark where the description ends.

Default: `<!-- more -->`

Expand Down
50 changes: 22 additions & 28 deletions mkdocs_rss_plugin/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,8 @@ def get_description_or_abstract(
self, in_page: Page, chars_count: int = 160, abstract_delimiter: str = None
) -> str:
"""Returns description from page meta. If it doesn't exist, use the \
{chars_count} first characters from page content (in markdown).
page content up to {abstract_delimiter} or the {chars_count} first \
characters from page content (in markdown).

:param Page in_page: page to look at
:param int chars_count: if page.meta.description is not set, number of chars \
Expand All @@ -468,22 +469,16 @@ def get_description_or_abstract(

description = in_page.meta.get("description")

# Set chars_count to None if it is set to be unlimited, for slicing.
if chars_count < 0:
chars_count = None

# If the abstract chars is not unlimited and the description exists,
# return the description.
if description and chars_count is not None:
# If the full page is wanted (unlimited chars count)
if chars_count == -1 and (in_page.content or in_page.markdown):
if in_page.content:
return in_page.content
else:
return markdown.markdown(in_page.markdown, output_format="html5")
# If the description is explicitly given
elif description:
return description
# If no description and chars_count set to 0, return empty string
elif not description and chars_count == 0:
logger.warning(
f"No description set for page {in_page.file.src_uri} "
"and 'abstract_chars_count' set to 0. The feed won't be compliant, "
"because an item must have a description."
)
return ""
# If the abstract is cut by the delimiter
elif (
abstract_delimiter
and (
Expand All @@ -495,24 +490,23 @@ def get_description_or_abstract(
in_page.markdown[:excerpt_separator_position],
output_format="html5",
)
# If chars count is unlimited, use the html content
elif in_page.content and chars_count == -1:
if chars_count is None or len(in_page.content) < chars_count:
return in_page.content[:chars_count]
# Use markdown
elif in_page.markdown:
if chars_count is None or len(in_page.markdown) < chars_count:
return markdown.markdown(
in_page.markdown[:chars_count], output_format="html5"
)
# Use first chars_count from the markdown
elif chars_count > 0 and in_page.markdown:
if len(in_page.markdown) <= chars_count:
return markdown.markdown(in_page.markdown, output_format="html5")
else:
return markdown.markdown(
f"{in_page.markdown[: chars_count - 3]}...",
output_format="html5",
)
# Unlimited chars_count but no content is found, then return the description.
# No explicit description and no (or empty) abstract found
else:
return description if description else ""
logger.warning(
f"No description generated from metadata or content of the page {in_page.file.src_uri}, "
"therefore the feed won't be compliant, "
"because an item must have a description."
)
return ""

def get_image(self, in_page: Page, base_url: str) -> Optional[Tuple[str, str, int]]:
"""Get page's image from page meta or social cards and returns properties.
Expand Down
5 changes: 5 additions & 0 deletions tests/fixtures/docs/page_without_meta_early_delimiter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Page without meta with early delimiter

<!-- more -->

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
23 changes: 23 additions & 0 deletions tests/fixtures/mkdocs_item_delimiter_empty.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Project information
site_name: MkDocs RSS Plugin - TEST
site_description: Basic setup to test against MkDocs RSS plugin
site_author: Julien Moura (Guts)
site_url: https://guts.github.io/mkdocs-rss-plugin
copyright: "Guts - In Geo Veritas"

# Repository
repo_name: "guts/mkdocs-rss-plugin"
repo_url: "https://github.com/guts/mkdocs-rss-plugin"

use_directory_urls: true

plugins:
- rss:
abstract_delimiter: ""

theme:
name: readthedocs

# Extensions to enhance markdown
markdown_extensions:
- meta
52 changes: 49 additions & 3 deletions tests/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,9 +403,55 @@ def test_simple_build_item_length_unlimited(self):
"Page without meta with short text",
"Blog sample",
):
self.assertGreaterEqual(
len(feed_item.description), 150, feed_item.title
)
self.assertGreater(len(feed_item.description), 150, feed_item.title)

def test_simple_build_item_delimiter(self):
with tempfile.TemporaryDirectory() as tmpdirname:
cli_result = self.build_docs_setup(
testproject_path="docs",
mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_minimal.yml"),
output_path=tmpdirname,
strict=True,
)
if cli_result.exception is not None:
e = cli_result.exception
logger.debug(format_exception(type(e), e, e.__traceback__))

self.assertEqual(cli_result.exit_code, 0)
self.assertIsNone(cli_result.exception)

# created items
feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_RSS_FEED_CREATED)
self.assertEqual(feed_parsed.bozo, 0)

for feed_item in feed_parsed.entries:
if feed_item.title in ("Page without meta with early delimiter",):
self.assertLess(len(feed_item.description), 50, feed_item.title)

def test_simple_build_item_delimiter_empty(self):
with tempfile.TemporaryDirectory() as tmpdirname:
cli_result = self.build_docs_setup(
testproject_path="docs",
mkdocs_yml_filepath=Path(
"tests/fixtures/mkdocs_item_delimiter_empty.yml"
),
output_path=tmpdirname,
strict=True,
)
if cli_result.exception is not None:
e = cli_result.exception
logger.debug(format_exception(type(e), e, e.__traceback__))

self.assertEqual(cli_result.exit_code, 0)
self.assertIsNone(cli_result.exception)

# created items
feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_RSS_FEED_CREATED)
self.assertEqual(feed_parsed.bozo, 0)

for feed_item in feed_parsed.entries:
if feed_item.title in ("Page without meta with early delimiter",):
self.assertGreater(len(feed_item.description), 150, feed_item.title)

def test_simple_build_locale_with_territory(self):
with tempfile.TemporaryDirectory() as tmpdirname:
Expand Down