diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml index 6988fa4..0f48bc1 100644 --- a/.github/workflows/greetings.yml +++ b/.github/workflows/greetings.yml @@ -9,5 +9,5 @@ jobs: - uses: actions/first-interaction@v1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - issue-message: 'Welcome to this project and thank you!'' first issue' - pr-message: 'Thank you for submitting a PR, this is appreciated. Please do not forget to submit a corresponding issue, and to reference its number in the PR'' first pr' + issue-message: 'Welcome to this project and thank you!' + pr-message: 'Thank you for submitting a PR, this is appreciated. Please do not forget to submit a corresponding issue, and to reference its number in the PR'' diff --git a/.gitignore b/.gitignore index a78f96e..ee0fb85 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,9 @@ MANIFEST # MkDocs site/ +# Mkdocs-Macros +__*/ + # Other files (generated by mkdocs-macros or others) cache* diff --git a/mkdocs_macros/context.py b/mkdocs_macros/context.py index e35a8d9..8f5e6e7 100644 --- a/mkdocs_macros/context.py +++ b/mkdocs_macros/context.py @@ -322,8 +322,13 @@ def render_file(filename): return env.render(s, force_rendering=True) @env.macro - def context(obj=env.variables): - "*Default Mkdocs-Macro*: List the defined variables" + def context(obj:dict=None): + """ + *Default Mkdocs-Macro*: List an object + (by default the variables) + """ + if not obj: + obj = env.variables try: return [(var, type(value).__name__, format_value(value)) for var, value in list_items(obj)] diff --git a/mkdocs_macros/plugin.py b/mkdocs_macros/plugin.py index bf964a1..33b20fb 100644 --- a/mkdocs_macros/plugin.py +++ b/mkdocs_macros/plugin.py @@ -9,13 +9,14 @@ import importlib import os from copy import copy +import pathspec +import json +from datetime import datetime import yaml from jinja2 import ( Environment, FileSystemLoader, Undefined, DebugUndefined, StrictUndefined, ) -import pathspec - from mkdocs.config import config_options from mkdocs.config.config_options import Type as PluginType from mkdocs.plugins import BasePlugin @@ -25,7 +26,8 @@ from mkdocs_macros.context import define_env from mkdocs_macros.util import ( install_package, parse_package, trace, debug, - update, SuperDict, import_local_module, format_chatter, LOG, + update, SuperDict, import_local_module, format_chatter, LOG, get_log_level, + setup_directory, CustomEncoder ) # ------------------------------------------ @@ -38,9 +40,26 @@ # The default name of the Python module: DEFAULT_MODULE_NAME = 'main' # main.py -# Possible behavior in case of ignored variables or macros (first is default) +# the directory where the rendered macros must go +RENDERED_MACROS_DIRNAME = '__docs_macros_rendered' + + + +# ------------------------------------------ +# Debug +# ------------------------------------------ +# message for the front matter of markdown pages saved after rendering: +YAML_HEADER_WARNING = ( + "# IMPORTANT NOTE:" + "\n# This page was automatically generated by MkDocs-Macros " + "for debug purposes," + "\n# after rendering the macros as plain text." + f"\n# ({datetime.now():%Y-%m-%d %H:%M:%S})" + ) + +# Possible behavior in case of ignored variables or macros (first is default) class LaxUndefined(Undefined): "Pass anything wrong as blank" @@ -271,6 +290,27 @@ def reverse(x): self.filters[name] = v return v + + + @property + def rendered_macros_dir(self): + """ + The directory, beside the docs_dir, that contains + the rendered pages from the macros. + """ + try: + r = self._rendered_macros_dir + except AttributeError: + raise AttributeError("Rendered macros directory is undefined") + if not os.path.isdir(self._rendered_macros_dir): + raise FileNotFoundError("Rendered macros directory is defined " + "but does not exists") + return r + + + # ------------------------------------------------ + # Property of the current page for on_page_markdown() + # ------------------------------------------------ @property def page(self) -> Page: """ @@ -296,7 +336,10 @@ def markdown(self) -> str: @markdown.setter def markdown(self, value): """ - Used to set the raw markdown of the current page + Used to set the raw markdown of the current page. + + [Especially used in the `on_pre_page_macros()` and + `on_ost_page_macros()` hooks.] """ if not isinstance(value, str): raise ValueError("Value provided to attribute markdown " @@ -561,7 +604,12 @@ def _load_modules(self): "module in '%s'." % (local_module_name, self.project_dir)) - def render(self, markdown: str, force_rendering:bool=False): + + # ---------------------------------- + # output elements + # ---------------------------------- + + def render(self, markdown: str, force_rendering:bool=False) -> str: """ Render a page through jinja2: it reads the code and executes the macros. @@ -605,11 +653,14 @@ def render(self, markdown: str, force_rendering:bool=False): # this is a premature rendering, no meta variables in the page meta_variables = {} - # Warning this is ternary logic(True, False, None: nothing said) render_macros = None if meta_variables: + # file_path = self.variables.page.file.src_path + file_path = self.page.file.src_path + debug(f"Metadata in page '{file_path}'", + payload=meta_variables) # determine whether the page will be rendered or not # the two formulations are accepted render_macros = meta_variables.get('render_macros') @@ -658,6 +709,44 @@ def render(self, markdown: str, force_rendering:bool=False): else: return error_message + def _save_debug_file(self, page:Page, + rendered_markdown:str) -> str: + """ + Saves a page to disk for debug/testing purposes, + with a reconstituted YAML front matter. + + Argument: + - page: the Page (page.markdown contains the old markdown) + - rendered_mardkown (the new markdown) + + Returns the saved document. + """ + dest_file = os.path.join(self.rendered_macros_dir, + page.file.src_path) + debug(f"Saving page '{page.title}' in destination file:", + dest_file) + # Create the subdirectory hierarchy if necessary + os.makedirs(os.path.dirname(dest_file), exist_ok=True) + if page.meta: + # recreate the YAML header: + yaml_values = yaml.dump(dict(page.meta), + default_flow_style=False, sort_keys=False) + document = '\n'.join([ '---', + YAML_HEADER_WARNING, + yaml_values.strip(), + '---', + rendered_markdown + ]) + else: + # re-generate the document with YAML header + document = rendered_markdown + # write on file: + debug("Saved ") + with open(dest_file, 'w') as f: + f.write(document) + return document + + # ---------------------------------- # Standard Hooks for a mkdocs plugin # ---------------------------------- @@ -669,7 +758,7 @@ def on_config(self, config): with variables, functions and filters. """ # WARNING: this is not the config argument: - trace("Macros arguments:", self.config) + trace("Macros arguments\n", self.config) # define the variables and macros as dictionaries # (for update function to work): self._variables = SuperDict() @@ -716,12 +805,20 @@ def on_config(self, config): register_items('filter' , self.filters , self._add_filters ) # Provide information: - debug("Variables:", list(self.variables.keys())) - if len(extra): - trace("Extra variables (config file):", list(extra.keys())) - debug("Content of extra variables (config file):", extra) + trace("Config variables:", list(self.variables.keys())) + debug("Config variables:\n", payload=json.dumps(self.variables, + cls=CustomEncoder)) + if self.macros: + trace("Config macros:", list(self.macros.keys())) + debug("Config macros:", payload=json.dumps(self.macros, + cls=CustomEncoder)) if self.filters: - trace("Extra filters (module):", list(self.filters.keys())) + trace("Config filters:", list(self.filters.keys())) + debug("Config filters:", payload=json.dumps(self.filters, + cls=CustomEncoder)) + # if len(extra): + # trace("Extra variables (config file):", list(extra.keys())) + # debug("Content of extra variables (config file):\n", dict(extra)) # Define the spec for the file paths whose rendering must be forced. @@ -793,6 +890,17 @@ def on_config(self, config): # update environment with the custom filters: self.env.filters.update(self.filters) + # ------------------- + # Setup the markdown (rendered) directory + # ------------------- + docs_dir = config['docs_dir'] + abs_docs_dir = os.path.abspath(docs_dir) + # recreate only if debug (otherewise delete): + recreate = get_log_level('DEBUG') + self._rendered_macros_dir = setup_directory(abs_docs_dir, + RENDERED_MACROS_DIRNAME, + recreate=recreate) + def on_nav(self, nav, config, files): """ Called after the site navigation is created. @@ -840,14 +948,11 @@ def on_page_markdown(self, markdown, page:Page, It uses the jinja2 directives, together with variables, macros and filters, to create pure markdown code. """ - # the site_navigation argument has been made optional - # (deleted in post-1.0 mkdocs, but maintained here - # for backward compatibility) - # We REALLY want the same object self._page = page if not self.variables: return markdown else: + trace("Rendering source page:", page.file.src_path) # Update the page info in the document # page is an object with a number of properties (title, url, ...) # see: https://github.com/mkdocs/mkdocs/blob/master/mkdocs/structure/pages.py @@ -880,6 +985,12 @@ def on_page_markdown(self, markdown, page:Page, # execute the post-macro functions in the various modules for func in self.post_macro_functions: func(self) + + # save the rendered page, with its YAML header + if get_log_level('DEBUG'): + self._save_debug_file(page, + rendered_markdown=self.markdown) + return self.markdown def on_post_build(self, config: config_options.Config): diff --git a/mkdocs_macros/util.py b/mkdocs_macros/util.py index c5ec4c4..203abba 100644 --- a/mkdocs_macros/util.py +++ b/mkdocs_macros/util.py @@ -6,12 +6,21 @@ import subprocess from copy import deepcopy -import os, sys, importlib.util +import os, sys, importlib.util, shutil from typing import Literal from packaging.version import Version +import json +import inspect +from datetime import datetime +from typing import Any + + from termcolor import colored import mkdocs +import hjson + + # ------------------------------------------ # Trace and debug @@ -29,14 +38,18 @@ LOG.addFilter(warning_filter) -def format_trace(*args): +def format_trace(*args, payload:str=''): """ General purpose print function, as trace, for the mkdocs-macros framework; it will appear if --verbose option is activated + + The payload is simply some text that will be added after a newline. """ first = args[0] rest = [str(el) for el in args[1:]] + if payload: + rest.append(f"\n{payload}") text = "[%s] - %s" % (TRACE_PREFIX, first) emphasized = colored(text, TRACE_COLOR) return ' '.join([emphasized] + rest) @@ -50,15 +63,18 @@ def format_trace(*args): 'critical': logging.CRITICAL } -def trace(*args, level:str='info'): +def trace(*args, payload:str='', level:str='info'): """ General purpose print function, as trace, for the mkdocs-macros framework; it will appear unless --quiet option is activated. + Payload is an information that goes to the next lines + (typically a json dump) + The level is 'debug', 'info', 'warning', 'error' or 'critical'. """ - msg = format_trace(*args) + msg = format_trace(*args, payload=payload) try: LOG.log(TRACE_LEVELS[level], msg) except KeyError: @@ -71,16 +87,22 @@ def trace(*args, level:str='info'): -def debug(*args): +def debug(*args, payload:str=''): """ General purpose print function, as trace, for the mkdocs-macros framework; it will appear if --verbose option is activated """ - msg = format_trace(*args) + msg = format_trace(*args, payload=payload) LOG.debug(msg) +def get_log_level(level_name:str) -> bool: + "Get the log level (INFO, DEBUG, etc.)" + level = getattr(logging, level_name.upper(), None) + return LOG.isEnabledFor(level) + + def format_chatter(*args, prefix:str, color:str=TRACE_COLOR): """ Format information for env.chatter() in macros. @@ -94,6 +116,31 @@ def format_chatter(*args, prefix:str, color:str=TRACE_COLOR): +from collections import UserDict + +class CustomEncoder(json.JSONEncoder): + """ + Custom encoder for JSON serialization. + Used for debugging purposes. + """ + def default(self, obj: Any) -> Any: + if isinstance(obj, datetime): + return obj.isoformat() + if isinstance(obj, UserDict): + # for objects used by MkDocs (config, plugin, etc.s) + return dict(obj) + + elif inspect.isfunction(obj): + return f"Function: %s %s" % (inspect.signature(obj), + obj.__doc__) + try: + return super().default(obj) + except TypeError: + print(f"CANNOT INTERPRET {obj.__class__}") + return str(obj) + + + # ------------------------------------------ @@ -162,7 +209,7 @@ def import_local_module(project_dir, module_name): # ------------------------------------------ -# Utilities +# Arithmetic # ------------------------------------------ def update(d1, d2): """ @@ -198,17 +245,113 @@ class SuperDict(dict): except for standard methods """ - def __getattr__(self, name): + def __init__(self, *args, **kwargs): + # Call the superclass's __init__ method + super().__init__(*args, **kwargs) + self.__post_init__() + + def __post_init__(self): + "Recursively transform sub-dictionary" + for key, value in self.items(): + if isinstance(value, dict): + self[key] = SuperDict(value) + + def __getattr__(self, name:str): "Allow dot notation on reading" + ERR_MSG = "Cannot find attribute '%s'" % name + # if name.startswith('_'): + # raise AttributeError(ERR_MSG) try: return self[name] except KeyError: - raise AttributeError("Cannot find attribute '%s" % name) + raise AttributeError(ERR_MSG) def __setattr__(self, name, value): "Allow dot notation on writing" + # ERR_MSG = "Cannot assign an attribute starting with _ ('%s')" % name + # if name.startswith('_'): + # raise AttributeError(ERR_MSG) self[name] = value + @property + def _attributes(self): + "Make a list of the valid attributes" + return list(self.keys()) + + def _codewords(self): + "Make a list of the codewords" + return + + def __dir__(self): + "List all attributes (for autocompletion, etc.)" + return super().__dir__() + self._attributes + + + + # ------------------------------------- + # Output + # ------------------------------------- + + def to_json(self): + "Convert to json" + return json.dumps(self, cls=CustomEncoder) + + def to_hjson(self): + """ + Convert to hjson + """ + python_dict = json.loads(self.to_json()) + return hjson.dumps(python_dict) + + + def __str__(self): + "Print a superdict" + return self.to_hjson() + return self.to_yaml() + # r = [f"{self.__class__.__name__}:"] + # r.extend([f" - {key}: {value}" for key, value in self.items()]) + # return("\n".join(r)) + + def __rich__(self): + "Print a superdict (for rich)" + r = [f"[bold red]{self.__class__.__name__}:[/]"] + r.append(self.to_hjson()) + return("\n".join(r)) + + + +# ------------------------------------------ +# File system +# ------------------------------------------ + + +def setup_directory(reference_dir: str, dir_name: str, + recreate:bool=True) -> str: + """ + Create a new directory beside the specified one. + + Parameters: + - reference_dir (str): The path of the current (reference) directory. + - dir_name (str): The name of the new directory to be created beside the current directory. + + Returns + - the directory + """ + # Find the parent directory and define new path: + parent_dir = os.path.dirname(reference_dir) + new_dir = os.path.join(parent_dir, dir_name) + # Safety: prevent deletion of current_dir + if new_dir == parent_dir: + raise FileExistsError("Cannot recreate the current dir!") + # Safety: check if the new directory exists + if os.path.exists(new_dir): + # If it exists, empty its contents + shutil.rmtree(new_dir) + # Recreate the new directory + if recreate: + os.makedirs(new_dir) + return new_dir + if __name__ == '__main__': # test merging of dictionaries a = {'foo': 4, 'bar': 5} diff --git a/setup.py b/setup.py index c96e680..6051780 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ # Initialization # -------------------- -VERSION_NUMBER = '1.2.0' +VERSION_NUMBER = '1.3.0' # required if you want to run document/test # pip install 'mkdocs-macros-plugin[test]' @@ -45,7 +45,7 @@ def read_file(fname): 'mkdocs>=0.17', 'jinja2', 'termcolor', - 'pyyaml', + 'pyyaml', 'hjson', 'python-dateutil', 'packaging' ], diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/fixture.py b/test/fixture.py new file mode 100644 index 0000000..629c206 --- /dev/null +++ b/test/fixture.py @@ -0,0 +1,816 @@ +""" +Fixtures for the testing of Mkdocs-Macros (pytest) +This program must be in the test directory. + +(C) Laurent Franceschetti 2024 +""" + +import os +from io import StringIO +import yaml +import subprocess +import re +from dataclasses import dataclass, field +from typing import List +import json +from typing import Any, List +import difflib + + +# from rich import print +import markdown +from bs4 import BeautifulSoup +import pandas as pd +import rich +from rich.table import Table + + +"A dictionary where the keys are also accessible with the dot notation" +from mkdocs_macros.util import SuperDict + +# --------------------------- +# Initialization +# --------------------------- + +"Command for build" +MKDOCS_BUILD = ['mkdocs', 'build'] + +"The directory of this file" +REF_DIR = os.path.dirname(os.path.abspath(__file__)) + +def list_doc_projects(directory:str): + "Make the list of projects" + # Specify the directory to start the search + start_dir = directory + mkdocs_dirs = [] + for entry in os.scandir(start_dir): + if entry.is_dir(): + files_in_dir = os.listdir(entry.path) + if 'mkdocs.yml' in files_in_dir or 'mkdocs.yaml' in files_in_dir: + mkdocs_dirs.append(entry.name) + return mkdocs_dirs + + +"All subdirectories containing mkdocs.yml" +PROJECTS = list_doc_projects(REF_DIR) + +"The default docs directory" +DOCS_DEFAULT_DIRNAME = 'docs' + +"The directory containing the macros rendered" +RENDERED_MACROS_DIRNAME = '__docs_macros_rendered' + +"The error string" +MACRO_ERROR_STRING = '# _Macro Rendering Error_' + + +# --------------------------- +# Print functions +# --------------------------- +std_print = print +from rich import print +from rich.panel import Panel + +TITLE_COLOR = 'green' +def h1(s:str, color:str=TITLE_COLOR): + "Color print a 1st level title to the console" + print() + print(Panel(f"[{color} bold]{s}", style=color, width=80)) + +def h2(s:str, color:str=TITLE_COLOR): + "Color print a 2nd level title to the consule" + print() + print(f"[green bold underline]{s}") + +def h3(s:str, color:str=TITLE_COLOR): + "Color print a 2nd level title to the consule" + print() + print(f"[green underline]{s}") + +# --------------------------- +# Low-level functions +# --------------------------- + +def find_after(s:str, word:str, pattern:str): + """ + Find the the first occurence of a pattern after a word + (Both word and pattern can be regex, and the matching + is case insensitive.) + """ + word_pattern = re.compile(word, re.IGNORECASE) + parts = word_pattern.split(s, maxsplit=1) + # parts = s.split(word, 1) + + if len(parts) > 1: + # Strip the remainder and search for the pattern + remainder = parts[1].strip() + match = re.search(pattern, remainder, flags=re.IGNORECASE) + return match.group(0) if match else None + else: + return None + +def list_markdown_files(directory:str): + """ + Makes a list of markdown files in a directory + """ + markdown_files = [] + for root, dirs, files in os.walk(directory): + for file in files: + if file.endswith('.md') or file.endswith('.markdown'): + relative_path = os.path.relpath(os.path.join(root, file), directory) + markdown_files.append(relative_path) + return markdown_files + + +def markdown_to_html(markdown_text): + """Convert markdown text to HTML.""" + html = markdown.markdown(markdown_text, extensions=["tables"]) + # print("HTML:") + # print(html) + return html + + +def style_dataframe(df:pd.DataFrame): + """ + Apply beautiful and colorful styling to any dataframe + (patches the dataframe). + """ + def _rich_str(self): + table = Table(show_header=True, header_style="bold magenta") + + # Add columns + for col in self.columns: + table.add_column(col, style="dim", width=12) + + # Add rows + for row in self.itertuples(index=False): + table.add_row(*map(str, row)) + + return table + + # reassign str to rich (to avoid messing up when rich.print is used) + df.__rich__ = _rich_str.__get__(df) + +def extract_tables_from_html(html:str, formatter:callable=None): + """ + Extract tables from a HTML source and convert them into dataframes + """ + soup = BeautifulSoup(html, 'html.parser') + tables = soup.find_all('table') + + dataframes = {} + unnamed_table_count = 0 + for table in tables: + print("Found a table") + # Find the nearest header + header = table.find_previous(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) + if header: + header_text = header.get_text() + else: + unnamed_table_count += 1 + header_text = f"Unnamed Table {unnamed_table_count}" + + # Convert HTML table to DataFrame + df = pd.read_html(StringIO(str(table)))[0] + if formatter: + formatter(df) + # Add DataFrame to dictionary with header as key + dataframes[header_text] = df + + return dataframes + + +def get_frontmatter(text:str) -> tuple[str, dict]: + "Get the front matter from a markdown file" + # Split the content to extract the YAML front matter + parts = text.split('---',maxsplit=2) + if len(parts) > 1: + frontmatter = parts[1] + metadata = SuperDict(yaml.safe_load(frontmatter)) + try: + markdown = parts[2] + except IndexError: + markdown = '' + return (markdown, frontmatter, metadata) + else: + return (text, '', {}) + +def find_in_html(html: str, + pattern: str, + header: str = None, header_level: int = None) -> str | None: + """ + Find a text or regex pattern in a HTML document (case-insensitive) + + Arguments + --------- + - html: the html string + - pattern: the text or regex + - header (text or regex): if specified, it finds it first, + and then looks for the text between that header and the next one + (any level). + - header_level: you can speciy it, if there is a risk of ambiguity. + + Returns + ------- + The line where the pattern was found, or None + """ + soup = BeautifulSoup(html, 'html.parser') + + # Compile regex patterns with case-insensitive flag + pattern_regex = re.compile(pattern, re.IGNORECASE) + + if header: + header_regex = re.compile(header, re.IGNORECASE) + + # Find all headers (h1 to h6) + headers = soup.find_all(re.compile('^h[1-6]$', re.IGNORECASE)) + + for hdr in headers: + if header_regex.search(hdr.text): + # Check if header level is specified and matches + if header_level and hdr.name != f'h{header_level}': + continue + + # Extract text until the next header + text = [] + for sibling in hdr.find_next_siblings(): + if sibling.name and re.match('^h[1-6]$', sibling.name, re.IGNORECASE): + break + text.append(sibling.get_text(separator='\n', strip=True)) + + full_text = '\n'.join(text) + + # Search for the pattern in the extracted text + match = pattern_regex.search(full_text) + if match: + # Find the full line containing the match + lines = full_text.split('\n') + for line in lines: + if pattern_regex.search(line): + return line + else: + # Extract all text from the document + full_text = soup.get_text(separator='\n', strip=True) + + # Search for the pattern in the full text + match = pattern_regex.search(full_text) + if match: + # Find the full line containing the match + lines = full_text.split('\n') + for line in lines: + if pattern_regex.search(line): + return line + + return None + + + + + + +def get_first_h1(markdown_text: str): + """ + Get the first h1 in a markdown file, + ignoring YAML frontmatter and comments. + """ + # Remove YAML frontmatter + yaml_frontmatter_pattern = re.compile(r'^---\s*\n(.*?\n)?---\s*\n', + re.DOTALL) + markdown_text = yaml_frontmatter_pattern.sub('', markdown_text) + # Regular expression to match both syntaxes for level 1 headers + h1_pattern = re.compile(r'^(# .+|.+\n=+)', re.MULTILINE) + match = h1_pattern.search(markdown_text) + if match: + header = match.group(0) + # Remove formatting + if header.startswith('#'): + return header.lstrip('# ').strip() + else: + return header.split('\n')[0].strip() + return None + + + +def get_tables(markdown_text:str) -> dict[pd.DataFrame]: + """ + Convert markdown text to HTML, extract tables, + and convert them to dataframes. + """ + html = markdown_to_html(markdown_text) + dataframes = extract_tables_from_html(html, + formatter=style_dataframe) + return dataframes + + + + +def run_command(command, *args) -> subprocess.CompletedProcess: + "Execute a command" + full_command = [command] + list(args) + return subprocess.run(full_command, capture_output=True, text=True) + + +# --------------------------- +# Log parsing +# --------------------------- + +@dataclass +class LogEntry(object): + """ + Represents a log entry + """ + + "Severity (DEBUG, INFO, WARNING)" + severity: str + + "Source, if available (e.g. [macros])" + source: str = None + + "Title, first line" + title: str = None + + "Payload, following lines" + payload: str = None + + + +def parse_log(mkdocs_log: str) -> list[LogEntry]: + """ + Parse the log entries, e.g.: + + DEBUG - Running 1 `page_markdown` events + INFO - [macros] - Rendering source page: index.md + DEBUG - [macros] - Page title: Home + WARNING - [macros] - ERROR # _Macro Rendering Error_ + + _File_: `second.md` + + _UndefinedError_: 'foo' is undefined + + ``` + Traceback (most recent call last): + File "snip/site-packages/mkdocs_macros/plugin.py", line 665, in render + DEBUG - Copying static assets. + + RULES: + 1. Every entry starts with a severity code (Uppercase). + 2. The message is then divided into: + - source: between brackets, e.g. [macros] + - title: the remnant of the first line, e.g. "Page title: Home" + - payload: the rest of the message + """ + log_entries = [] + current_entry = None + mkdocs_log = mkdocs_log.strip() + + for line in mkdocs_log.split('\n'): + match = re.match(r'^([A-Z]+)\s+-\s+(.*)', line) + if match: + if current_entry: + log_entries.append(current_entry) + severity = match.group(1) + message = match.group(2) + source_match = re.match(r'^\[(.*?)\]\s+-\s+(.*)', message) + if source_match: + source = source_match.group(1) + title = source_match.group(2) + else: + source = '' + title = message + current_entry = {'severity': severity, + 'source': source, + 'title': title, + 'payload': []} + elif current_entry: + # current_entry['payload'] += '\n' + line + current_entry['payload'].append(line) + if current_entry: + log_entries.append(current_entry) + + # Transform the payloads into str: + for entry in log_entries: + entry['payload'] = '\n'.join(entry['payload']).strip() + return [SuperDict(item) for item in log_entries] + +# --------------------------- +# Target file +# --------------------------- +@dataclass +class MarkdownPage(object): + "A markdown page (rendered)" + + "The destination filename in the doc hierarchy" + filename: str + + "Full path of the project directory" + project_dir: str + + "Reference directory (name of directory where the page)" + doc_dir: str + + "The full pathname of the file" + full_filename: str = field(init=False) + + "The full content of the file" + text: str = field(init=False) + + "The content of the file" + markdown: str = field(init=False) + + "The front matter" + frontmatter: str = field(init=False) + + "The metadata (the front-matter, interpreted)" + metadata: SuperDict = field(init=False) + + "The HTML code (rendered, without frills)" + html: str = field(init=False) + + "THe page rendered in plain text" + plain_text: str = field(init=False) + + "First h1" + h1: str| None = field(init=False) + + + def __post_init__(self): + self.full_filename = os.path.join(self.project_dir, + self.doc_dir, self.filename) + with open(self.full_filename, "r") as f: + self.text = f.read() + # Parse + (self.markdown, + self.frontmatter, + self.metadata) = get_frontmatter(self.text) + self.html = markdown_to_html(self.markdown) + soup = BeautifulSoup(self.html, "html.parser") + self.plain_text = soup.get_text() + self.h1 = get_first_h1(self.markdown) + + + def find(self, pattern: str, + header: str = None, header_level: int = None) -> str | None: + """ + Find a text or regex pattern in the markdown page (case-insensitive). + + Arguments + --------- + - html: the html string + - pattern: the text or regex + - header (text or regex): if specified, it finds it first, + and then looks for the text between that header and the next one + (any level). + - header_level: you can speciy it, if there is a risk of ambiguity. + + Returns + ------- + The line where the pattern was found, or None + """ + # it operates on the html + return find_in_html(self.html, + pattern=pattern, + header=header, header_level=header_level) + + +@dataclass +class TestMarkdownPage(MarkdownPage): + "A subclass of markdown page, for MkDocs-Macros purposes" + + "The source markdown page (before the rendering of macros)" + source_page: MarkdownPage = field(init=False) + + "The source doc dir (normally the docs dir)" + source_doc_dir: str = DOCS_DEFAULT_DIRNAME + + # "Difference of the source" + # diff_markdown: str = field(init=False) + + + def __post_init__(self): + "Additional actions after the rest" + super().__post_init__() + self.source_page = MarkdownPage(self.filename, + project_dir=self.project_dir, + doc_dir=self.source_doc_dir) + # this should be the case, always, or something is wrong + assert self.filename == self.source_page.filename + assert self.metadata == self.source_page.metadata + + + @property + def has_error(self) -> bool: + "Checks whether there is an error" + return self.markdown.startswith(MACRO_ERROR_STRING) + + @property + def is_rendered(self) -> bool: + """ + "Rendered" means that the target markdown is different from the source. + + Hence "not rendered" covers these two cases: + 1. An order to render was given, but there where actually + NO jinja2 directives. + 2. A jinja2 rendering has not taken place at all. + """ + return self.markdown != self.source_page.markdown + + + +# --------------------------- +# Main class +# --------------------------- +class DocProject(object): + "An object that describes the current MkDocs project being tested." + + def __init__(self, directory:str=''): + "Initialize" + self._project_dir = os.path.join(REF_DIR, directory) + # test existence of YAML file or fail + self.config_file + + @property + def project_dir(self) -> str: + "The source directory of the MkDocs project (abs or relative path)" + return self._project_dir + + @property + def config_file(self) -> str: + "The config file" + try: + return self._config_file + except AttributeError: + # List of possible mkdocs configuration filenames + CANDIDATES = ['mkdocs.yaml', 'mkdocs.yml'] + for filename in os.listdir(self.project_dir): + if filename in CANDIDATES: + self._config_file = os.path.join(self.project_dir, filename) + return self._config_file + raise FileNotFoundError("This is not an MkDocs directory") + + @property + def config(self) -> SuperDict: + """ + Get the configuration from the config file. + All main items of the config are accessible with the dot notation. + (config.site_name, config.theme, etc.) + """ + try: + return self._config + except AttributeError: + with open(self.config_file, 'r', encoding='utf-8') as file: + self._config = SuperDict(yaml.safe_load(file)) + return self._config + + + + @property + def target_doc_dir(self): + "The target directory of markdown files (rendered macros)" + return os.path.join(REF_DIR, + self.project_dir, + RENDERED_MACROS_DIRNAME) + + + + + + def build(self, strict:bool=False, + verbose:bool=True) -> subprocess.CompletedProcess: + """ + Build the documentation, to perform the tests + + Arguments: + - strict (default: False) to make the build fail in case of warnings + - verbose (default: True), to generate the target_files directory + + Returns: + (if desired) the low level result of the process + (return code and stderr). + + This is not needed, since, those values are stored, and parsed. + """ + os.chdir(self.project_dir) + command = MKDOCS_BUILD.copy() + assert '--strict' not in command + if strict: + command.append('--strict') + if verbose: + command.append('--verbose') + print("BUILD COMMAND:", command) + self._build_result = run_command(*command) + return self.build_result + + + # ---------------------------------- + # Post-build properties + # Will fail if called before build + # ---------------------------------- + @property + def build_result(self) -> subprocess.CompletedProcess: + """ + Result of the build (low level) + """ + try: + return self._build_result + except AttributeError: + raise AttributeError("No build result yet (not run)") + + @property + def trace(self) -> str: + "Return the trace of the execution (log as text)" + return self.build_result.stderr + + + @property + def success(self) -> bool: + "Was the execution of the build a success?" + return self.build_result.returncode == 0 + + @property + def log(self) -> List[SuperDict]: + """ + The parsed trace + """ + try: + return self._log + except AttributeError: + self._log = parse_log(self.trace) + # print("BUILT:", self.log) + return self._log + + @property + def log_severities(self) -> List[str]: + """ + List of severities (DEBUG, INFO, WARNING) found + """ + try: + return self._log_severities + except AttributeError: + self._log_severities = list({entry.get('severity', '#None') + for entry in self.log}) + return self._log_severities + + + def find_entries(self, title:str='', source:str='', + severity:str='') -> List[SuperDict]: + """ + Filter entries according to criteria of title and severity; + all criteria are case-insensitive. + + Arguments: + - title: regex + - source: regex, for which entity issued it (macros, etc.) + - severity: one of the existing sevirities + """ + if not title and not severity and not source: + return self.log + + severity = severity.upper() + # if severity and severity not in self.log_severities: + # raise ValueError(f"{severity} not in the list") + + filtered_entries = [] + # Compile the title regex pattern once (if provided) + title_pattern = re.compile(title, re.IGNORECASE) if title else None + source_pattern = re.compile(source, re.IGNORECASE) if source else None + + for entry in self.log: + # Check if the entry matches the title regex (if provided) + if title_pattern: + title_match = re.search(title_pattern, entry.get('title', '')) + else: + title_match = True + # Check if the entry matches the source regex (if provided) + if source_pattern: + source_match = re.search(source_pattern, entry.get('source', '')) + else: + source_match = True + + # Check if the entry matches the severity (if provided) + if severity: + severity_match = (entry['severity'] == severity) + # print("Decision:", severity_match) + else: + severity_match = True + # If both conditions are met, add the entry to the filtered list + if title_match and severity_match and source_match: + filtered_entries.append(entry) + assert isinstance(filtered_entries, list) + return filtered_entries + + + def find_entry(self, title:str='', + source:str = '', + severity:str='') -> SuperDict | None: + """ + Find the first entry according to criteria of title and severity + + Arguments: + - title: regex + - source: regex + - severity + """ + found = self.find_entries(title, + source=source, + severity=severity) + if len(found): + return found[0] + else: + return None + + + # ---------------------------------- + # Smart properties (from log, etc.) + # ---------------------------------- + @property + def modules(self) -> List[str]: + "List of modules imported (from the log)" + entries = self.find_entries('external Python module') + l = [] + # word between quotes: + PATTERN = r"'([^']*)'" + for entry in entries: + module_name = find_after(entry.title, 'module', PATTERN) + if module_name: + l.append(module_name) + return l + + @property + def variables(self) -> SuperDict: + """ + List of all variables, at the moment of on_config (from the log) + + We have the data as the payload of a DEBUG entry + called "config variables". + """ + entry = self.find_entry('config variables', severity='debug') + return SuperDict(json.loads(entry.payload)) + + + + @property + def macros(self) -> SuperDict: + """ + List of all macros, at the moment of on_config (from the log)) + + We have the data as the payload of a DEBUG entry + called "config macros". + """ + entry = self.find_entry('config macros', severity='debug') + if entry and entry.payload: + return SuperDict(json.loads(entry.payload)) + + @property + def filters(self) -> SuperDict: + """ + List of all filters, at the moment of on_config (from the log) + + We have the data as the payload of a DEBUG entry + called "config filters". + """ + entry = self.find_entry('config filters', severity='debug') + if entry and entry.payload: + return SuperDict(json.loads(entry.payload)) + + @property + def pages(self) -> List[TestMarkdownPage]: + "The list of Markdown pages produced by the build" + try: + return self._pages + except AttributeError: + # Make the list and + full_project_dir = os.path.join(REF_DIR, self.project_dir) + full_target_dir = os.path.join(REF_DIR, self.target_doc_dir) + self._pages = [TestMarkdownPage(el, + project_dir = full_project_dir, + doc_dir=RENDERED_MACROS_DIRNAME, + source_doc_dir=DOCS_DEFAULT_DIRNAME + ) + for el in list_markdown_files(full_target_dir)] + return self._pages + + def get_page(self, name:str): + "Get the page by its filename or a substring" + for page in self.pages: + if name in page.filename: + return page + + def get_plugin(self, name:str) -> SuperDict: + "Get the plugin by its plugin name" + for el in self.config.plugins: + if name in el: + if isinstance(el, str): + return SuperDict() + elif isinstance(el, dict): + plugin = el[name] + return SuperDict(plugin) + else: + raise ValueError(f"Unexpected content of plugin {name}!") + return SuperDict(self.config.plugins.get(name)) + + @property + def macros_plugin(self) -> SuperDict: + "Return the plugin config" + return self.get_plugin('macros') + + diff --git a/test/module/__init__.py b/test/module/__init__.py new file mode 100644 index 0000000..7a6bb57 --- /dev/null +++ b/test/module/__init__.py @@ -0,0 +1,4 @@ +""" +This __init__.py file is indispensable for pytest to +recognize its packages. +""" \ No newline at end of file diff --git a/test/module/main.py b/test/module/main.py index 8227523..8eaef98 100644 --- a/test/module/main.py +++ b/test/module/main.py @@ -1,12 +1,13 @@ import os +SIGNATURE = 'MAIN' def define_env(env): """ This is the hook for the functions (new form) """ # activate trace - chatter = env.start_chatting("Simple module") + chatter = env.start_chatting(SIGNATURE) env.macros.cwd = os.getcwd() @@ -64,5 +65,5 @@ def on_post_page_macros(env): def on_post_build(env): "Post build action" # activate trace - chatter = env.start_chatting("Simple module (post-build)") + chatter = env.start_chatting(SIGNATURE) chatter("This means `on_post_build(env)` works") diff --git a/test/module/mkdocs.yml b/test/module/mkdocs.yml index f50a2ee..1089d6e 100644 --- a/test/module/mkdocs.yml +++ b/test/module/mkdocs.yml @@ -1,4 +1,4 @@ -site_name: Main test site +site_name: Test with a module (test) theme: name: material palette: diff --git a/test/module/test_site.py b/test/module/test_site.py new file mode 100644 index 0000000..6a1af90 --- /dev/null +++ b/test/module/test_site.py @@ -0,0 +1,78 @@ +""" +Testing the project + +(C) Laurent Franceschetti 2024 +""" + + +import pytest + +from test.fixture import DocProject, find_after + +CURRENT_PROJECT = 'module' + + + +def test_pages(): + PROJECT = DocProject(CURRENT_PROJECT) + PROJECT.build() + # did not fail + assert not PROJECT.build_result.returncode + + # ---------------- + # Check that the chatter works + # ---------------- + entries = PROJECT.find_entries(source='main') + assert len(entries) > 0 + # the post-built worked: + assert PROJECT.find_entry(source='main', title='post_build') + + # ---------------- + # First page + # ---------------- + page = PROJECT.get_page('index') + assert page.is_rendered + + VARIABLE_NAME = 'unit_price' + + # it is defined in the config file (extra) + assert VARIABLE_NAME in PROJECT.config.extra + price = PROJECT.config.extra.unit_price + + + # check that the `greeting` variable is rendered: + assert VARIABLE_NAME in PROJECT.variables + assert f"{price} euros" in page.markdown + + assert f"{PROJECT.macros_plugin.include_dir}" in page.markdown + + # check that both on_pre/post_page_macro() worked + assert "Added Footer (Pre-macro)" in page.markdown, f"Not in {page.markdown}" + assert page.find(r'is \d{4}-\d{2}-\d{2}', header='Pre-macro') + + assert "Added Footer (Post-macro)" in page.markdown + assert find_after(page.plain_text, 'name of the page', 'home') + assert page.find('Home', header='Post-macro') + # ---------------- + # Environment page + # ---------------- + page = PROJECT.get_page('environment') + + + + +def test_strict(): + "This project must fail" + PROJECT = DocProject(CURRENT_PROJECT) + + # it must fail with the --strict option, + # because the second page contains an error + PROJECT.build(strict=True) + assert not PROJECT.build_result.returncode + warning = PROJECT.find_entry("Macro Rendering", + severity='warning') + assert not warning, "Warning found, shouldn't!" + + + + \ No newline at end of file diff --git a/test/simple/__init__.py b/test/simple/__init__.py new file mode 100644 index 0000000..7a6bb57 --- /dev/null +++ b/test/simple/__init__.py @@ -0,0 +1,4 @@ +""" +This __init__.py file is indispensable for pytest to +recognize its packages. +""" \ No newline at end of file diff --git a/test/simple/docs/index.md b/test/simple/docs/index.md index aadfccd..ae04b1e 100644 --- a/test/simple/docs/index.md +++ b/test/simple/docs/index.md @@ -1,5 +1,7 @@ # Main Page +{{ greeting }} + {{ macros_info() }} diff --git a/test/simple/docs/second.md b/test/simple/docs/second.md index 00b7fd9..5fda00d 100644 --- a/test/simple/docs/second.md +++ b/test/simple/docs/second.md @@ -1,6 +1,6 @@ # Cause a warning -This variable {{ foo() }} does not exist. +This macro {{ foo() }} does not exist. In the default config (`on_undefined: keep`) it won't make the build fail. But it will generate a warning, so this page is NOT displayed. diff --git a/test/simple/mkdocs.yml b/test/simple/mkdocs.yml index 5358511..45bce90 100644 --- a/test/simple/mkdocs.yml +++ b/test/simple/mkdocs.yml @@ -9,3 +9,6 @@ plugins: - search - macros +extra: + greeting: Hello World! + diff --git a/test/simple/test_site.py b/test/simple/test_site.py new file mode 100644 index 0000000..33d023b --- /dev/null +++ b/test/simple/test_site.py @@ -0,0 +1,62 @@ +""" +Testing the project + +(C) Laurent Franceschetti 2024 +""" + + +import pytest + +from test.fixture import DocProject + +CURRENT_PROJECT = 'simple' + + + +def test_pages(): + PROJECT = DocProject(CURRENT_PROJECT) + build_result = PROJECT.build(strict=False) + # did not fail + return_code = PROJECT.build_result.returncode + assert not return_code, f"Build returned with {return_code} {build_result.args})" + + # ---------------- + # First page + # ---------------- + VARIABLE_NAME = 'greeting' + + # it is defined in the config file (extra) + assert VARIABLE_NAME in PROJECT.config.extra + + page = PROJECT.get_page('index') + assert page.is_rendered + + # check that the `greeting` variable is rendered: + assert VARIABLE_NAME in PROJECT.variables + assert PROJECT.variables[VARIABLE_NAME] in page.markdown + + + # ---------------- + # Second page + # ---------------- + # there is intentionally an error (`foo` does not exist) + page = PROJECT.get_page('second') + assert 'foo' not in PROJECT.config.extra + assert page.is_rendered + assert page.has_error + +def test_strict(): + "This project must fail" + PROJECT = DocProject(CURRENT_PROJECT) + + # it must fail with the --strict option, + # because the second page contains an error + PROJECT.build(strict=True) + assert PROJECT.build_result.returncode + warning = PROJECT.find_entry("Macro Rendering", + severity='warning') + assert warning, "No warning found" + + + + \ No newline at end of file diff --git a/test/test_fixture.py b/test/test_fixture.py new file mode 100644 index 0000000..afeb6df --- /dev/null +++ b/test/test_fixture.py @@ -0,0 +1,227 @@ +""" +Testing the tester + +(C) Laurent Franceschetti 2024 +""" +import click +import os + +try: + from .fixture import (PROJECTS, get_tables, parse_log, h1, h2, h3, + std_print, DocProject, REF_DIR, list_markdown_files, + find_in_html) +except ImportError: + from fixture import (PROJECTS, get_tables, parse_log, h1, h2, h3, + std_print, DocProject, REF_DIR, list_markdown_files, + find_in_html) + + +@click.command() +def test_low_level_fixtures(): + "Test the low level fixtures" + + h1("Unit tests") + # Print the list of directories + h2("Directories containing mkdocs.yml") + for directory in PROJECTS: + print(directory) + print(PROJECTS) + print() + + + # Example usage + h2("Parse tables") + SOURCE_DOCUMENT = """ +# Header 1 +Some text. + +## Table 1 +| Column 1 | Column 2 | +|----------|----------| +| Value 1 | Value 2 | +| Value 3 | Value 4 | + +## Table 2 +| Column A | Column B | +|----------|----------| +| Value A | Value B | +| Value C | Value D | + +## Another Section +Some more text. + +| Column X | Column Y | +|----------|----------| +| Value X1 | Value Y1 | +| Value X2 | Value Y2 | +""" + + dfs = get_tables(SOURCE_DOCUMENT) + + # Print the list of directories + print("Dataframes:") + for header, df in dfs.items(): + print(f"Table under '{header}':") + print(df) + + # -------------------- + # Test parsing + # -------------------- + h2("Parsing logs") + TEST_CODE = """ +DEBUG - Running 1 `page_markdown` events +INFO - [macros] - Rendering source page: index.md +DEBUG - [macros] - Page title: Home +DEBUG - No translations found here: '(...)/mkdocs/themes/mkdocs/locales' +WARNING - [macros] - ERROR # _Macro Rendering Error_ + +_File_: `second.md` + +_UndefinedError_: 'foo' is undefined + +``` +Traceback (most recent call last): +File "snip/site-packages/mkdocs_macros/plugin.py", line 665, in render +DEBUG - Copying static assets. +FOOBAR - This is a title with a new severity + +Payload here. +DEBUG - Copying static assets. +INFO - [macros - MAIN] - This means `on_post_build(env)` works +""" + log = parse_log(TEST_CODE) + print(log) + + + + + h2("Search in HTML (advanced)") + + # Example usage + html_doc = """ +
This is some text under the main header.
+More text under the main header.
+Text under the sub header.
+Text under another main header.
+ + + """ + + + print(find_in_html(html_doc, 'more text')) + print(find_in_html(html_doc, 'MORE TEXT')) + print(find_in_html(html_doc, 'under the main', header='Main header')) + print(find_in_html(html_doc, 'under the main', header='Main header')) + print(find_in_html(html_doc, 'under the', header='sub header')) + +@click.command() +def test_high_level_fixtures(): + """ + Test a project + """ + MYPROJECT = 'opt-in' + # MYPROJECT = 'simple' + h1(f"TESTING MKDOCS-MACROS PROJECT ({MYPROJECT})") + + h2("Config") + myproject = DocProject(MYPROJECT) + config = myproject.config + print(config) + + + + h2("Build") + result = myproject.build() + assert result == myproject.build_result + + h2("Log") + assert myproject.trace == result.stderr + std_print(myproject.trace) + + + + + h2("Filtering the log by severity") + infos = myproject.find_entries(severity='INFO') + print(f"There are {len(infos)} info items.") + print('\n'.join(f" {i} - {item.title}" for i, item in enumerate(infos))) + + + h2("Filtering the log by source") + infos = myproject.find_entries(source='macros') + print(f"There are {len(infos)} `macros` items.") + print('\n'.join(f" {i} - {item.title}" for i, item in enumerate(infos))) + + + + h2("Testing the entries") + print(myproject.find_entries('No default module')) + print("No default module:", bool(myproject.find_entry('No default module'))) + print("Module:", myproject.find_entries('external Python module')) + print("Module:", myproject.find_entry('external Python module')) + print("From macros:", myproject.find_entry(title='post_build', + source='macros')) + + h2("Smart properties") + print("Modules found:", myproject.modules) + + h2("Variables") + print(myproject.variables) + + h2("Test variables") + print("Site name:", myproject.variables.config.site_name) + + h2("Macros") + print(myproject.macros) + + h2("Filters") + print(myproject.filters) + + + + + h1("Reading the pages") + h2("Check on pages") + target_dir = os.path.join(REF_DIR, myproject.target_doc_dir) + print("Official list:", list_markdown_files(target_dir)) + print("Objects target directory:", myproject.target_doc_dir) + + h2("Page objects") + for page in myproject.pages: + h3(f"PAGE: {page.filename}") + print("- Main title:", page.h1) + print("- Filename:", page.filename) + print('- Error?', page.has_error) + print("- Source frontmatter:", page.source_page.frontmatter) + # print("- Frontmatter:", page.frontmatter) + print("- Metadata:", page.metadata) + print("- Location of the source file:", + page.source_page.full_filename) + # print("- Markdown", page.markdown) + # print("- Source Markdown", page.source_page.markdown) + # print("- Diff (no):", len(page.diff_markdown)) + print("- Rendered?", page.is_rendered) + # the metadata are identical + assert page.metadata == page.source_page.metadata + print('') + + + +@click.command() +@click.option('--short', is_flag=True, + help='Test low-level fixtures only') +def command_line(short:bool): + if short: + test_low_level_fixtures() + else: + test_low_level_fixtures() + test_high_level_fixtures() + + +if __name__ == '__main__': + command_line() \ No newline at end of file