Skip to content

Commit

Permalink
Add code to strip markdown before readability
Browse files Browse the repository at this point in the history
  • Loading branch information
vankesteren committed Dec 18, 2023
1 parent 3fba138 commit c85d1d0
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 5 deletions.
6 changes: 4 additions & 2 deletions ghrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from base64 import b64decode
import textstat
import httpx
from unmarkdown import unmark

Severity = Literal["ok", "low", "high"]
Criteria = tuple[str, Severity]
Expand Down Expand Up @@ -126,8 +127,9 @@ async def get_readme(full_name: str, token: str | None = None) -> str | None:

def compute_readability(readme_txt: str):
"""Compute readability from readme markdown text."""
# TODO: strip markdown before computing readability
return textstat.textstat.flesch_reading_ease(readme_txt)
# Strip markdown before computing readability.
readme_plain = unmark(readme_txt)
return textstat.textstat.flesch_reading_ease(readme_plain)


async def get_org_repos(org: str, token: str | None = None) -> list[GitHubRepo]:
Expand Down
4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@ httpx = "^0.24.1"
jinja2 = "^3.1.2"
toml = "^0.10.2"
textstat = "^0.7.3"
markdown = "^3.5.1"

[tool.ruff]
line-length = 100

[tool.ruff.lint]
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E4", "E7", "E9", "F"]
ignore = []

Expand Down
26 changes: 26 additions & 0 deletions unmarkdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Module to convert markdown to plain text. Code based on https://stackoverflow.com/a/54923798"""
from io import StringIO
from markdown import Markdown


def unmark_element(element, stream=None):
"""Custom plain output format for markdown."""
if stream is None:
stream = StringIO()
if element.text:
stream.write(element.text)
for sub in element:
unmark_element(sub, stream)
if element.tail:
stream.write(element.tail)
return stream.getvalue()


Markdown.output_formats["plain"] = unmark_element


def unmark(text):
"""Convert markdown-formatted text to plain text."""
md = Markdown(output_format="plain")
md.stripTopLevelTags = False
return md.convert(text)

0 comments on commit c85d1d0

Please sign in to comment.