From 81040e001635ff069f80dd2e8c08b2eb784fa9e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Mond=C3=A9jar=20Rubio?= Date: Thu, 21 Nov 2024 04:42:10 +0100 Subject: [PATCH] More performance improvements (#240) --- src/mkdocs_include_markdown_plugin/cache.py | 20 +++-- .../directive.py | 40 +++++++--- src/mkdocs_include_markdown_plugin/event.py | 43 ++++++----- src/mkdocs_include_markdown_plugin/plugin.py | 2 +- src/mkdocs_include_markdown_plugin/process.py | 75 +++++++++++++------ 5 files changed, 120 insertions(+), 60 deletions(-) diff --git a/src/mkdocs_include_markdown_plugin/cache.py b/src/mkdocs_include_markdown_plugin/cache.py index cff3014..053e764 100644 --- a/src/mkdocs_include_markdown_plugin/cache.py +++ b/src/mkdocs_include_markdown_plugin/cache.py @@ -4,11 +4,13 @@ import hashlib import os +import stat import time +from importlib.util import find_spec try: - from platformdirs import user_data_dir + platformdirs_spec = find_spec('platformdirs') except ImportError: # pragma: no cover CACHE_AVAILABLE = False else: @@ -37,13 +39,19 @@ def generate_unique_key_from_url(cls, url: str) -> str: return hashlib.blake2b(url.encode(), digest_size=16).digest().hex() def read_file(self, fpath: str, encoding: str = 'utf-8') -> str: # noqa: D102 - with open(fpath, encoding=encoding) as f: - return f.read().split('\n', 1)[1] + f = open(fpath, encoding=encoding) # noqa: SIM115 + content = f.read().split('\n', 1)[1] + f.close() + return content def get_(self, url: str, encoding: str = 'utf-8') -> str | None: # noqa: D102 key = self.generate_unique_key_from_url(url) fpath = os.path.join(self.cache_dir, key) - if os.path.isfile(fpath): + try: + is_file = stat.S_ISREG(os.stat(fpath).st_mode) + except (FileNotFoundError, OSError): # pragma: no cover + return None + if is_file: creation_time = self.get_creation_time_from_fpath(fpath) if time.time() < creation_time + self.expiration_seconds: return self.read_file(fpath, encoding=encoding) @@ -72,9 +80,9 @@ def get_cache_directory() -> str | None: if not CACHE_AVAILABLE: return None + from platformdirs import user_data_dir cache_dir = user_data_dir('mkdocs-include-markdown-plugin') - if not os.path.isdir(cache_dir): - os.makedirs(cache_dir) + os.makedirs(cache_dir, exist_ok=True) return cache_dir diff --git a/src/mkdocs_include_markdown_plugin/directive.py b/src/mkdocs_include_markdown_plugin/directive.py index 140222e..0e57356 100644 --- a/src/mkdocs_include_markdown_plugin/directive.py +++ b/src/mkdocs_include_markdown_plugin/directive.py @@ -4,6 +4,7 @@ import os import re +import stat import string from dataclasses import dataclass from typing import TYPE_CHECKING @@ -22,7 +23,7 @@ class DirectiveBoolArgument: # noqa: D101 if TYPE_CHECKING: # pragma: no cover - from typing import Literal, TypedDict + from typing import Callable, Literal, TypedDict DirectiveBoolArgumentsDict = dict[str, DirectiveBoolArgument] @@ -119,7 +120,7 @@ def str_arg(arg: str) -> re.Pattern[str]: def warn_invalid_directive_arguments( arguments_string: str, - directive_lineno: int, + directive_lineno: Callable[[], int], directive: Literal['include', 'include-markdown'], page_src_path: str | None, docs_dir: str, @@ -129,13 +130,13 @@ def warn_invalid_directive_arguments( INCLUDE_DIRECTIVE_ARGS if directive == 'include' else INCLUDE_MARKDOWN_DIRECTIVE_ARGS ) - for arg_value in re.findall( - WARN_INVALID_DIRECTIVE_ARGS_REGEX, + for arg_match in WARN_INVALID_DIRECTIVE_ARGS_REGEX.finditer( arguments_string, ): + arg_value = arg_match.group() if arg_value.split('=', 1)[0] not in valid_args: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) logger.warning( f"Invalid argument '{arg_value}' in" @@ -226,19 +227,28 @@ def resolve_file_paths_to_include( # noqa: PLR0912 return [include_string], True if process.is_absolute_path(include_string): - if os.name == 'nt': # pragma: nt cover + if os.name == 'nt': # pragma: no cover # Windows fpath = os.path.normpath(include_string) - if not os.path.isfile(fpath): + try: + is_file = stat.S_ISREG(os.stat(fpath).st_mode) + except (FileNotFoundError, OSError): + is_file = False + if not is_file: return [], False return process.filter_paths( [fpath], ignore_paths, ), False + try: + is_file = stat.S_ISREG(os.stat(include_string).st_mode) + except (FileNotFoundError, OSError): + is_file = False return process.filter_paths( - [include_string] if os.path.isfile(include_string) - else glob.iglob(include_string, flags=GLOB_FLAGS), + [include_string] if is_file else glob.iglob( + include_string, flags=GLOB_FLAGS, + ), ignore_paths), False if process.is_relative_path(include_string): @@ -253,7 +263,11 @@ def resolve_file_paths_to_include( # noqa: PLR0912 ) paths = [] include_path = os.path.join(root_dir, include_string) - if os.path.isfile(include_path): + try: + is_file = stat.S_ISREG(os.stat(include_path).st_mode) + except (FileNotFoundError, OSError): + is_file = False + if is_file: paths.append(include_path) else: for fp in glob.iglob( @@ -268,7 +282,11 @@ def resolve_file_paths_to_include( # noqa: PLR0912 paths = [] root_dir = docs_dir include_path = os.path.join(root_dir, include_string) - if os.path.isfile(include_path): + try: + is_file = stat.S_ISREG(os.stat(include_path).st_mode) + except (FileNotFoundError, OSError): + is_file = False + if is_file: paths.append(include_path) else: for fp in glob.iglob( diff --git a/src/mkdocs_include_markdown_plugin/event.py b/src/mkdocs_include_markdown_plugin/event.py index 73ccc6f..f63fd26 100644 --- a/src/mkdocs_include_markdown_plugin/event.py +++ b/src/mkdocs_include_markdown_plugin/event.py @@ -2,6 +2,7 @@ from __future__ import annotations +import functools import html import os import re @@ -104,7 +105,8 @@ def found_include_tag( # noqa: PLR0912, PLR0915 match: re.Match[str], ) -> str: directive_match_start = match.start() - directive_lineno = process.lineno_from_content_start( + directive_lineno = functools.partial( + process.lineno_from_content_start, markdown, directive_match_start, ) @@ -114,7 +116,7 @@ def found_include_tag( # noqa: PLR0912, PLR0915 filename, raw_filename = parse_filename_argument(match) if filename is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Found no path passed including with 'include'" @@ -137,7 +139,7 @@ def found_include_tag( # noqa: PLR0912, PLR0915 exclude_string = parse_string_argument(exclude_match) if exclude_string is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Invalid empty 'exclude' argument in 'include'" @@ -159,7 +161,7 @@ def found_include_tag( # noqa: PLR0912, PLR0915 if not file_paths_to_include: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( f"No files found including '{raw_filename}' at {location}", @@ -176,7 +178,7 @@ def found_include_tag( # noqa: PLR0912, PLR0915 ) if invalid_bool_args: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( f"Invalid value for '{invalid_bool_args[0]}' argument of" @@ -189,7 +191,7 @@ def found_include_tag( # noqa: PLR0912, PLR0915 start = parse_string_argument(start_match) if start is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Invalid empty 'start' argument in 'include' directive at" @@ -203,7 +205,7 @@ def found_include_tag( # noqa: PLR0912, PLR0915 end = parse_string_argument(end_match) if end is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Invalid empty 'end' argument in 'include' directive at" @@ -217,7 +219,7 @@ def found_include_tag( # noqa: PLR0912, PLR0915 encoding = parse_string_argument(encoding_match) if encoding is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Invalid empty 'encoding' argument in 'include'" @@ -294,7 +296,7 @@ def found_include_tag( # noqa: PLR0912, PLR0915 ]) plural_suffix = 's' if len(file_paths_to_include) > 1 else '' location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) logger.warning( f"Delimiter {delimiter_name} '{delimiter_value}'" @@ -313,7 +315,8 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 match: re.Match[str], ) -> str: directive_match_start = match.start() - directive_lineno = process.lineno_from_content_start( + directive_lineno = functools.partial( + process.lineno_from_content_start, markdown, directive_match_start, ) @@ -324,7 +327,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 filename, raw_filename = parse_filename_argument(match) if filename is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Found no path passed including with 'include-markdown'" @@ -347,7 +350,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 exclude_string = parse_string_argument(exclude_match) if exclude_string is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Invalid empty 'exclude' argument in 'include-markdown'" @@ -368,7 +371,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 if not file_paths_to_include: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( f"No files found including '{raw_filename}' at {location}", @@ -388,7 +391,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 ) if invalid_bool_args: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( f"Invalid value for '{invalid_bool_args[0]}' argument of" @@ -402,7 +405,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 start = parse_string_argument(start_match) if start is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Invalid empty 'start' argument in 'include-markdown'" @@ -416,7 +419,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 end = parse_string_argument(end_match) if end is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Invalid empty 'end' argument in 'include-markdown'" @@ -430,7 +433,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 encoding = parse_string_argument(encoding_match) if encoding is None: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Invalid empty 'encoding' argument in 'include-markdown'" @@ -447,7 +450,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 offset_raw_value = offset_match[1] if offset_raw_value == '': location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( "Invalid empty 'heading-offset' argument in" @@ -457,7 +460,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 offset = int(offset_raw_value) except ValueError: location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) raise PluginError( f"Invalid 'heading-offset' argument \"{offset_raw_value}\"" @@ -579,7 +582,7 @@ def found_include_markdown_tag( # noqa: PLR0912, PLR0915 ]) plural_suffix = 's' if len(file_paths_to_include) > 1 else '' location = process.file_lineno_message( - page_src_path, docs_dir, directive_lineno, + page_src_path, docs_dir, directive_lineno(), ) logger.warning( f"Delimiter {delimiter_name} '{delimiter_value}' of" diff --git a/src/mkdocs_include_markdown_plugin/plugin.py b/src/mkdocs_include_markdown_plugin/plugin.py index b855a16..9c8dd5c 100644 --- a/src/mkdocs_include_markdown_plugin/plugin.py +++ b/src/mkdocs_include_markdown_plugin/plugin.py @@ -7,7 +7,6 @@ from typing import TYPE_CHECKING from mkdocs.exceptions import PluginError -from mkdocs.livereload import LiveReloadServer from mkdocs.plugins import BasePlugin, event_priority @@ -15,6 +14,7 @@ import re from mkdocs.config.defaults import MkDocsConfig + from mkdocs.livereload import LiveReloadServer from mkdocs.structure.files import Files from mkdocs.structure.pages import Page diff --git a/src/mkdocs_include_markdown_plugin/process.py b/src/mkdocs_include_markdown_plugin/process.py index 854186d..32eb533 100644 --- a/src/mkdocs_include_markdown_plugin/process.py +++ b/src/mkdocs_include_markdown_plugin/process.py @@ -6,10 +6,9 @@ import io import os import re +import stat from collections.abc import Callable, Iterator -from typing import TYPE_CHECKING, Any -from urllib.parse import urlparse, urlunparse -from urllib.request import Request, urlopen +from typing import TYPE_CHECKING if TYPE_CHECKING: # pragma: no cover @@ -246,10 +245,15 @@ def rewrite_relative_urls( ``destination_path``. """ def rewrite_url(url: str) -> str: + from urllib.parse import urlparse, urlunparse + + if is_relative_path(url): + return url + scheme, netloc, path, params, query, fragment = urlparse(url) # absolute or mail - if is_relative_path(url) or path.startswith('/') or scheme == 'mailto': + if path.startswith('/') or scheme == 'mailto': return url new_path = os.path.relpath( @@ -260,10 +264,13 @@ def rewrite_url(url: str) -> str: # ensure forward slashes are used, on Windows new_path = new_path.replace('\\', '/').replace('//', '/') - if path.endswith('/'): - # the above operation removes a trailing slash. Add it back if it - # was present in the input - new_path = new_path + '/' + try: + if path[-1] == '/': + # the above operation removes a trailing slash, + # so add it back if it was present in the input + new_path += '/' + except IndexError: # pragma: no cover + pass return urlunparse((scheme, netloc, new_path, params, query, fragment)) @@ -351,9 +358,10 @@ def filter_inclusions( # noqa: PLR0912 if end not in text_to_include: expected_end_not_found = True + start_split = text_to_include.split(start) text_parts = ( - text_to_include.split(start)[1:] - if start in text_to_include else [text_to_include] + start_split[1:] + if len(start_split) > 1 else [text_to_include] ) for start_text in text_parts: @@ -374,20 +382,34 @@ def _transform_negative_offset_func_factory( offset: int, ) -> Callable[[str], str]: heading_prefix = '#' * abs(offset) - return lambda line: line if not line.startswith('#') else ( - heading_prefix + line.lstrip('#') - if line.startswith(heading_prefix) - else '#' + line.lstrip('#') - ) + + def transform(line: str) -> str: + try: + if line[0] != '#': + return line + except IndexError: # pragma: no cover + return line + if line.startswith(heading_prefix): + return heading_prefix + line.lstrip('#') + return '#' + line.lstrip('#') + + return transform def _transform_positive_offset_func_factory( offset: int, ) -> Callable[[str], str]: heading_prefix = '#' * offset - return lambda line: ( - heading_prefix + line if line.startswith('#') else line - ) + + def transform(line: str) -> str: + try: + prefix = line[0] + except IndexError: # pragma: no cover + return line + else: + return heading_prefix + line if prefix == '#' else line + + return transform def increase_headings_offset(markdown: str, offset: int = 0) -> str: @@ -442,8 +464,11 @@ def filter_paths( continue # ignore if is a directory - if not os.path.isdir(filepath): - response.append(filepath) + try: + if not stat.S_ISDIR(os.stat(filepath).st_mode): + response.append(filepath) + except (FileNotFoundError, OSError): # pragma: no cover + continue response.sort() return response @@ -452,6 +477,8 @@ def is_url(string: str) -> bool: """Determine if a string is an URL.""" if ':' not in string: # fast path return False + from urllib.parse import urlparse + try: result = urlparse(string) return all([result.scheme, result.netloc]) @@ -471,8 +498,10 @@ def is_absolute_path(string: str) -> bool: def read_file(file_path: str, encoding: str) -> str: """Read a file and return its content.""" - with open(file_path, encoding=encoding) as f: - return f.read() + f = open(file_path, encoding=encoding) # noqa: SIM115 + content = f.read() + f.close() + return content def read_url( @@ -481,6 +510,8 @@ def read_url( encoding: str = 'utf-8', ) -> Any: """Read an HTTP location and return its content.""" + from urllib.request import Request, urlopen + if http_cache is not None: cached_content = http_cache.get_(url, encoding) if cached_content is not None: