diff --git a/docs/conf.py b/docs/conf.py index ba96912d..7c9716b6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,9 +15,8 @@ external_projects_current_project = "rocm-docs-core" setting_all_article_info = True - -all_article_info_os = ["linux", "windows"] - +all_article_info_os = [] +all_article_info_author = "" # specific settings override any general settings (eg: all_article_info_) article_pages = [ { @@ -27,7 +26,16 @@ "date": "2024-07-03", "read-time": "2 min read", }, - {"file": "developer_guide/commitizen"}, + { + "file": "user_guide/article_info", + "os": [], + "author": "", + "date": "", + "read-time": "", + }, + { + "file": "developer_guide/commitizen", + }, ] html_theme = "rocm_docs_theme" diff --git a/src/rocm_docs/article_info.py b/src/rocm_docs/article_info.py new file mode 100644 index 00000000..06941f97 --- /dev/null +++ b/src/rocm_docs/article_info.py @@ -0,0 +1,216 @@ +"""Logic to add article info to a page. + +For all options see the user guide: +https://rocm.docs.amd.com/projects/rocm-docs-core/en/latest/user_guide/article_info.html +""" + +from typing import Any, cast + +import importlib.resources +import os +from pathlib import Path + +import bs4 +import git.repo +from sphinx.application import Sphinx +from sphinx.config import Config + + +def set_article_info(app: Sphinx, _: Config) -> None: + """Add article info headers to HTML pages.""" + if ( + app.config.setting_all_article_info is False + and len(app.config.article_pages) == 0 + ): + return + + article_info = ( + importlib.resources.files("rocm_docs") + .joinpath("rocm_docs_theme/components/article-info.html") + .read_text(encoding="utf-8") + ) + + specific_pages: list[str] = [] + + _set_page_article_info(app, article_info, specific_pages) + + if app.config.setting_all_article_info is True: + _set_all_article_info(app, article_info, specific_pages) + + +def _set_page_article_info( + app: Sphinx, article_info: str, specific_pages: list[str] +) -> None: + """Add article info headers to the configured HTML pages. + + The pages can be set in "article_pages" of the Sphinx configuration. + """ + repo = git.repo.Repo(app.srcdir, search_parent_directories=True) + for page in app.config.article_pages: + path_rel = app.project.doc2path(page["file"], False) + path_html = Path(app.outdir, path_rel).with_suffix(".html") + path_source = Path(app.srcdir, path_rel) + + # FIXME: This will silently skip all files when not building the default + # `html` format (e.g `htmlzip`, `epub` or `pdf`) + if not path_html.is_file(): + continue + + os_list = [] + page.setdefault("os", app.config.all_article_info_os) + if "linux" in page["os"]: + os_list.append("Linux") + if "windows" in page["os"]: + os_list.append("Windows") + article_os_info = " and ".join(os_list) + if os_list: + article_os_info = f"Applies to {article_os_info}" + modified_info = article_info.replace("", article_os_info) + + author = app.config.all_article_info_author + if "author" in page: + author = page["author"] + modified_info = modified_info.replace("", author) + + date_info: str | None = None + if "date" in page: + date_info = page["date"] + else: + date_info = _get_time_last_modified(repo, path_source) + + if date_info == "": + soup = bs4.BeautifulSoup(modified_info, "html.parser") + svg_to_remove = soup.find("span", class_="article-info-date-svg") + if svg_to_remove and isinstance(svg_to_remove, bs4.Tag): + svg_to_remove.decompose() + modified_info = str(soup) + + if date_info is not None: + modified_info = modified_info.replace("", date_info) + + if "read-time" in page: + read_time = page["read-time"] + else: + read_time = _estimate_read_time(path_html) + + if read_time == "": + soup = bs4.BeautifulSoup(modified_info, "html.parser") + svg_to_remove = soup.find( + "span", class_="article-info-read-time-svg" + ) + if svg_to_remove and isinstance(svg_to_remove, bs4.Tag): + svg_to_remove.decompose() + modified_info = str(soup) + + if read_time is not None: + modified_info = modified_info.replace("", read_time) + + specific_pages.append(page["file"]) + _write_article_info(path_html, modified_info) + + +def _set_all_article_info( + app: Sphinx, article_info: str, specific_pages: list[str] +) -> None: + """Add article info headers with general settings to all HTML pages. + + Pages that have specific settings (configured by "article_pages") are + skipped. + """ + repo = git.repo.Repo(app.srcdir, search_parent_directories=True) + for docname in app.project.docnames: + # skip pages with specific settings + if docname in specific_pages: + continue + + page_rel = app.project.doc2path(docname, False) + page = Path(app.outdir, page_rel).with_suffix(".html") + + # FIXME: This will silently skip all files when not building the default + # `html` format (e.g `htmlzip`, `epub` or `pdf`) + if not page.is_file(): + continue + + os_list = [] + if "linux" in app.config.all_article_info_os: + os_list.append("Linux") + if "windows" in app.config.all_article_info_os: + os_list.append("Windows") + article_os_info = " and ".join(os_list) + if os_list: + article_os_info = f"Applies to {article_os_info}" + + date_info = _get_time_last_modified(repo, Path(app.srcdir, page_rel)) + if not date_info: + date_info = cast(str, app.config.all_article_info_date) + + modified_info = article_info.replace("", article_os_info) + modified_info = modified_info.replace( + "", app.config.all_article_info_author + ) + modified_info = modified_info.replace("", date_info) + modified_info = modified_info.replace( + "", _estimate_read_time(page) + ) + + _write_article_info(page, modified_info) + + +def _get_time_last_modified(repo: git.repo.Repo, path: Path) -> str | None: + try: + time = next( + repo.iter_commits(paths=path, max_count=1) + ).committed_datetime + return time.strftime("%Y-%m-%d") + except StopIteration: + return None + + +def _estimate_read_time(file_name: Path) -> str: + def is_visible(element): + if element.parent.name in [ + "style", + "script", + "[document]", + "head", + "title", + ]: + return False + if isinstance(element, bs4.element.Comment): + return False + return element.string != "\n" + + words_per_minute = 200 + average_word_length = 5 + + with open(file_name, encoding="utf-8") as file: + html = file.read() + soup = bs4.BeautifulSoup(html, "html.parser") + page_text = soup.findAll(text=True) + visible_page_text = filter(is_visible, page_text) + average_word_count = ( + sum(len(line) for line in visible_page_text) / average_word_length + ) + time_minutes = int(max(1, round(average_word_count / words_per_minute))) + return f"{time_minutes} min read time" + + +def _write_article_info(path: os.PathLike[Any], article_info: str) -> None: + with open(path, "r+", encoding="utf8") as file: + page_html = file.read() + soup = bs4.BeautifulSoup(page_html, "html.parser") + + has_article_info = soup.find("div", id="rocm-docs-core-article-info") + if ( + has_article_info is not None + or soup.article is None + or soup.article.h1 is None + ): + return + + soup.article.h1.insert_after( + bs4.BeautifulSoup(article_info, "html.parser") + ) + file.seek(0) + file.truncate(0) + file.write(str(soup)) diff --git a/src/rocm_docs/core.py b/src/rocm_docs/core.py index 85a281e4..233cb2e7 100644 --- a/src/rocm_docs/core.py +++ b/src/rocm_docs/core.py @@ -7,23 +7,21 @@ from __future__ import annotations -from typing import Any, Generic, TypeVar, cast +from typing import Any, Generic, TypeVar -import importlib.resources import inspect import os import urllib.parse from abc import ABC, abstractmethod -from pathlib import Path -import bs4 -import git.repo from pydata_sphinx_theme.utils import ( # type: ignore[import-untyped] config_provided_by_user, ) from sphinx.application import Sphinx from sphinx.config import Config +from rocm_docs import article_info + T = TypeVar("T") @@ -114,189 +112,6 @@ def _force_notfound_prefix(app: Sphinx, _: Config) -> None: app.config.notfound_urls_prefix = components.path -def _set_article_info(app: Sphinx, _: Config) -> None: - """Add article info headers to HTML pages.""" - if ( - app.config.setting_all_article_info is False - and len(app.config.article_pages) == 0 - ): - return - - article_info = ( - importlib.resources.files("rocm_docs") - .joinpath("rocm_docs_theme/components/article-info.html") - .read_text(encoding="utf-8") - ) - - specific_pages: list[str] = [] - - _set_page_article_info(app, article_info, specific_pages) - - if app.config.setting_all_article_info is True: - _set_all_article_info(app, article_info, specific_pages) - - -def _set_page_article_info( - app: Sphinx, article_info: str, specific_pages: list[str] -) -> None: - """Add article info headers to the configured HTML pages. - - The pages can be set in "article_pages" of the Sphinx configuration. - """ - repo = git.repo.Repo(app.srcdir, search_parent_directories=True) - for page in app.config.article_pages: - path_rel = app.project.doc2path(page["file"], False) - path_html = Path(app.outdir, path_rel).with_suffix(".html") - path_source = Path(app.srcdir, path_rel) - - # FIXME: This will silently skip all files when not building the default - # `html` format (e.g `htmlzip`, `epub` or `pdf`) - if not path_html.is_file(): - continue - - article_os_info = "" - if "os" not in page: - page["os"] = app.config.all_article_info_os - if "linux" in page["os"]: - article_os_info += "Linux" - if "windows" in page["os"]: - if len(article_os_info) > 0: - article_os_info += " and " - article_os_info += "Windows" - modified_info = article_info.replace("", article_os_info) - - author = app.config.all_article_info_author - if "author" in page: - author = page["author"] - modified_info = modified_info.replace("AMD", author) - - date_info: str | None = None - if "date" in page: - date_info = page["date"] - else: - date_info = _get_time_last_modified(repo, path_source) - - if not date_info: - date_info = cast(str, app.config.all_article_info_date) - - modified_info = modified_info.replace("2023", date_info) - - if "read-time" in page: - read_time = page["read-time"] - else: - read_time = _estimate_read_time(path_html) - modified_info = modified_info.replace("5 min read", read_time) - - specific_pages.append(page["file"]) - _write_article_info(path_html, modified_info) - - -def _set_all_article_info( - app: Sphinx, article_info: str, specific_pages: list[str] -) -> None: - """Add article info headers with general settings to all HTML pages. - - Pages that have specific settings (configured by "article_pages") are - skipped. - """ - repo = git.repo.Repo(app.srcdir, search_parent_directories=True) - for docname in app.project.docnames: - # skip pages with specific settings - if docname in specific_pages: - continue - - page_rel = app.project.doc2path(docname, False) - page = Path(app.outdir, page_rel).with_suffix(".html") - - # FIXME: This will silently skip all files when not building the default - # `html` format (e.g `htmlzip`, `epub` or `pdf`) - if not page.is_file(): - continue - - article_os_info = "" - if "linux" in app.config.all_article_info_os: - article_os_info += "Linux" - if "windows" in app.config.all_article_info_os: - if len(article_os_info) > 0: - article_os_info += " and " - article_os_info += "Windows" - - date_info = _get_time_last_modified(repo, Path(app.srcdir, page_rel)) - if not date_info: - date_info = cast(str, app.config.all_article_info_date) - - modified_info = article_info.replace("", article_os_info) - modified_info = modified_info.replace( - "AMD", app.config.all_article_info_author - ) - modified_info = modified_info.replace("2023", date_info) - modified_info = modified_info.replace( - "5 min read", _estimate_read_time(page) - ) - - _write_article_info(page, modified_info) - - -def _get_time_last_modified(repo: git.repo.Repo, path: Path) -> str | None: - try: - time = next( - repo.iter_commits(paths=path, max_count=1) - ).committed_datetime - return time.strftime("%Y-%m-%d") - except StopIteration: - return None - - -def _estimate_read_time(file_name: Path) -> str: - def is_visible(element): - if element.parent.name in [ - "style", - "script", - "[document]", - "head", - "title", - ]: - return False - if isinstance(element, bs4.element.Comment): - return False - return element.string != "\n" - - words_per_minute = 200 - average_word_length = 5 - - with open(file_name, encoding="utf-8") as file: - html = file.read() - soup = bs4.BeautifulSoup(html, "html.parser") - page_text = soup.findAll(text=True) - visible_page_text = filter(is_visible, page_text) - average_word_count = ( - sum(len(line) for line in visible_page_text) / average_word_length - ) - time_minutes = int(max(1, round(average_word_count / words_per_minute))) - return f"{time_minutes} min read time" - - -def _write_article_info(path: os.PathLike[Any], article_info: str) -> None: - with open(path, "r+", encoding="utf8") as file: - page_html = file.read() - soup = bs4.BeautifulSoup(page_html, "html.parser") - - has_article_info = soup.find("div", id="rocm-docs-core-article-info") - if ( - has_article_info is not None - or soup.article is None - or soup.article.h1 is None - ): - return - - soup.article.h1.insert_after( - bs4.BeautifulSoup(article_info, "html.parser") - ) - file.seek(0) - file.truncate(0) - file.write(str(soup)) - - def setup(app: Sphinx) -> dict[str, Any]: """Set up rocm_docs.core as a Sphinx extension.""" required_extensions = [ @@ -318,7 +133,7 @@ def setup(app: Sphinx) -> dict[str, Any]: ) app.add_config_value( "all_article_info_os", - default=["linux", "windows"], + default=[], rebuild="html", types=str, ) @@ -326,7 +141,7 @@ def setup(app: Sphinx) -> dict[str, Any]: "all_article_info_author", default="", rebuild="html", types=str ) app.add_config_value( - "all_article_info_date", default="2023", rebuild="html", types=str + "all_article_info_date", default="", rebuild="html", types=str ) app.add_config_value( "all_article_info_read_time", default="", rebuild="html", types=str @@ -338,5 +153,5 @@ def setup(app: Sphinx) -> dict[str, Any]: # Run before notfound.extension sees the config (default priority(=500)) app.connect("config-inited", _force_notfound_prefix, priority=400) app.connect("config-inited", _DefaultSettings.update_config) - app.connect("build-finished", _set_article_info, priority=1000) + app.connect("build-finished", article_info.set_article_info, priority=1000) return {"parallel_read_safe": True, "parallel_write_safe": True} diff --git a/src/rocm_docs/rocm_docs_theme/components/article-info.html b/src/rocm_docs/rocm_docs_theme/components/article-info.html index 86da0747..15d41247 100644 --- a/src/rocm_docs/rocm_docs_theme/components/article-info.html +++ b/src/rocm_docs/rocm_docs_theme/components/article-info.html @@ -1,22 +1,36 @@
-
-
- Applies to -
-
-
-
-
-

AMD

-
-
-

2023

-
-
-

5 min read

-
-
-
-
-
+
+
+
+
+
+

+ + +

+
+
+

+ + +

+
+
+ +
+
+

+
+
+
+
+