From 81040e001635ff069f80dd2e8c08b2eb784fa9e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Mond=C3=A9jar=20Rubio?=
 <mondejar1994@gmail.com>
Date: Thu, 21 Nov 2024 04:42:10 +0100
Subject: [PATCH] More performance improvements (#240)

---
 src/mkdocs_include_markdown_plugin/cache.py   | 20 +++--
 .../directive.py                              | 40 +++++++---
 src/mkdocs_include_markdown_plugin/event.py   | 43 ++++++-----
 src/mkdocs_include_markdown_plugin/plugin.py  |  2 +-
 src/mkdocs_include_markdown_plugin/process.py | 75 +++++++++++++------
 5 files changed, 120 insertions(+), 60 deletions(-)

diff --git a/src/mkdocs_include_markdown_plugin/cache.py b/src/mkdocs_include_markdown_plugin/cache.py
index cff3014..053e764 100644
--- a/src/mkdocs_include_markdown_plugin/cache.py
+++ b/src/mkdocs_include_markdown_plugin/cache.py
@@ -4,11 +4,13 @@
 
 import hashlib
 import os
+import stat
 import time
+from importlib.util import find_spec
 
 
 try:
-    from platformdirs import user_data_dir
+    platformdirs_spec = find_spec('platformdirs')
 except ImportError:  # pragma: no cover
     CACHE_AVAILABLE = False
 else:
@@ -37,13 +39,19 @@ def generate_unique_key_from_url(cls, url: str) -> str:
         return hashlib.blake2b(url.encode(), digest_size=16).digest().hex()
 
     def read_file(self, fpath: str, encoding: str = 'utf-8') -> str:  # noqa: D102
-        with open(fpath, encoding=encoding) as f:
-            return f.read().split('\n', 1)[1]
+        f = open(fpath, encoding=encoding)  # noqa: SIM115
+        content = f.read().split('\n', 1)[1]
+        f.close()
+        return content
 
     def get_(self, url: str, encoding: str = 'utf-8') -> str | None:  # noqa: D102
         key = self.generate_unique_key_from_url(url)
         fpath = os.path.join(self.cache_dir, key)
-        if os.path.isfile(fpath):
+        try:
+            is_file = stat.S_ISREG(os.stat(fpath).st_mode)
+        except (FileNotFoundError, OSError):  # pragma: no cover
+            return None
+        if is_file:
             creation_time = self.get_creation_time_from_fpath(fpath)
             if time.time() < creation_time + self.expiration_seconds:
                 return self.read_file(fpath, encoding=encoding)
@@ -72,9 +80,9 @@ def get_cache_directory() -> str | None:
     if not CACHE_AVAILABLE:
         return None
 
+    from platformdirs import user_data_dir
     cache_dir = user_data_dir('mkdocs-include-markdown-plugin')
-    if not os.path.isdir(cache_dir):
-        os.makedirs(cache_dir)
+    os.makedirs(cache_dir, exist_ok=True)
 
     return cache_dir
 
diff --git a/src/mkdocs_include_markdown_plugin/directive.py b/src/mkdocs_include_markdown_plugin/directive.py
index 140222e..0e57356 100644
--- a/src/mkdocs_include_markdown_plugin/directive.py
+++ b/src/mkdocs_include_markdown_plugin/directive.py
@@ -4,6 +4,7 @@
 
 import os
 import re
+import stat
 import string
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
@@ -22,7 +23,7 @@ class DirectiveBoolArgument:  # noqa: D101
 
 
 if TYPE_CHECKING:  # pragma: no cover
-    from typing import Literal, TypedDict
+    from typing import Callable, Literal, TypedDict
 
     DirectiveBoolArgumentsDict = dict[str, DirectiveBoolArgument]
 
@@ -119,7 +120,7 @@ def str_arg(arg: str) -> re.Pattern[str]:
 
 def warn_invalid_directive_arguments(
     arguments_string: str,
-    directive_lineno: int,
+    directive_lineno: Callable[[], int],
     directive: Literal['include', 'include-markdown'],
     page_src_path: str | None,
     docs_dir: str,
@@ -129,13 +130,13 @@ def warn_invalid_directive_arguments(
         INCLUDE_DIRECTIVE_ARGS if directive == 'include'
         else INCLUDE_MARKDOWN_DIRECTIVE_ARGS
     )
-    for arg_value in re.findall(
-        WARN_INVALID_DIRECTIVE_ARGS_REGEX,
+    for arg_match in WARN_INVALID_DIRECTIVE_ARGS_REGEX.finditer(
         arguments_string,
     ):
+        arg_value = arg_match.group()
         if arg_value.split('=', 1)[0] not in valid_args:
             location = process.file_lineno_message(
-                page_src_path, docs_dir, directive_lineno,
+                page_src_path, docs_dir, directive_lineno(),
             )
             logger.warning(
                 f"Invalid argument '{arg_value}' in"
@@ -226,19 +227,28 @@ def resolve_file_paths_to_include(  # noqa: PLR0912
         return [include_string], True
 
     if process.is_absolute_path(include_string):
-        if os.name == 'nt':  # pragma: nt cover
+        if os.name == 'nt':  # pragma: no cover
             # Windows
             fpath = os.path.normpath(include_string)
-            if not os.path.isfile(fpath):
+            try:
+                is_file = stat.S_ISREG(os.stat(fpath).st_mode)
+            except (FileNotFoundError, OSError):
+                is_file = False
+            if not is_file:
                 return [], False
 
             return process.filter_paths(
                 [fpath], ignore_paths,
             ), False
 
+        try:
+            is_file = stat.S_ISREG(os.stat(include_string).st_mode)
+        except (FileNotFoundError, OSError):
+            is_file = False
         return process.filter_paths(
-            [include_string] if os.path.isfile(include_string)
-            else glob.iglob(include_string, flags=GLOB_FLAGS),
+            [include_string] if is_file else glob.iglob(
+                include_string, flags=GLOB_FLAGS,
+            ),
             ignore_paths), False
 
     if process.is_relative_path(include_string):
@@ -253,7 +263,11 @@ def resolve_file_paths_to_include(  # noqa: PLR0912
         )
         paths = []
         include_path = os.path.join(root_dir, include_string)
-        if os.path.isfile(include_path):
+        try:
+            is_file = stat.S_ISREG(os.stat(include_path).st_mode)
+        except (FileNotFoundError, OSError):
+            is_file = False
+        if is_file:
             paths.append(include_path)
         else:
             for fp in glob.iglob(
@@ -268,7 +282,11 @@ def resolve_file_paths_to_include(  # noqa: PLR0912
     paths = []
     root_dir = docs_dir
     include_path = os.path.join(root_dir, include_string)
-    if os.path.isfile(include_path):
+    try:
+        is_file = stat.S_ISREG(os.stat(include_path).st_mode)
+    except (FileNotFoundError, OSError):
+        is_file = False
+    if is_file:
         paths.append(include_path)
     else:
         for fp in glob.iglob(
diff --git a/src/mkdocs_include_markdown_plugin/event.py b/src/mkdocs_include_markdown_plugin/event.py
index 73ccc6f..f63fd26 100644
--- a/src/mkdocs_include_markdown_plugin/event.py
+++ b/src/mkdocs_include_markdown_plugin/event.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import functools
 import html
 import os
 import re
@@ -104,7 +105,8 @@ def found_include_tag(  # noqa: PLR0912, PLR0915
             match: re.Match[str],
     ) -> str:
         directive_match_start = match.start()
-        directive_lineno = process.lineno_from_content_start(
+        directive_lineno = functools.partial(
+            process.lineno_from_content_start,
             markdown,
             directive_match_start,
         )
@@ -114,7 +116,7 @@ def found_include_tag(  # noqa: PLR0912, PLR0915
         filename, raw_filename = parse_filename_argument(match)
         if filename is None:
             location = process.file_lineno_message(
-                page_src_path, docs_dir, directive_lineno,
+                page_src_path, docs_dir, directive_lineno(),
             )
             raise PluginError(
                 "Found no path passed including with 'include'"
@@ -137,7 +139,7 @@ def found_include_tag(  # noqa: PLR0912, PLR0915
             exclude_string = parse_string_argument(exclude_match)
             if exclude_string is None:
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     "Invalid empty 'exclude' argument in 'include'"
@@ -159,7 +161,7 @@ def found_include_tag(  # noqa: PLR0912, PLR0915
 
         if not file_paths_to_include:
             location = process.file_lineno_message(
-                page_src_path, docs_dir, directive_lineno,
+                page_src_path, docs_dir, directive_lineno(),
             )
             raise PluginError(
                 f"No files found including '{raw_filename}' at {location}",
@@ -176,7 +178,7 @@ def found_include_tag(  # noqa: PLR0912, PLR0915
         )
         if invalid_bool_args:
             location = process.file_lineno_message(
-                page_src_path, docs_dir, directive_lineno,
+                page_src_path, docs_dir, directive_lineno(),
             )
             raise PluginError(
                 f"Invalid value for '{invalid_bool_args[0]}' argument of"
@@ -189,7 +191,7 @@ def found_include_tag(  # noqa: PLR0912, PLR0915
             start = parse_string_argument(start_match)
             if start is None:
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     "Invalid empty 'start' argument in 'include' directive at"
@@ -203,7 +205,7 @@ def found_include_tag(  # noqa: PLR0912, PLR0915
             end = parse_string_argument(end_match)
             if end is None:
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     "Invalid empty 'end' argument in 'include' directive at"
@@ -217,7 +219,7 @@ def found_include_tag(  # noqa: PLR0912, PLR0915
             encoding = parse_string_argument(encoding_match)
             if encoding is None:
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     "Invalid empty 'encoding' argument in 'include'"
@@ -294,7 +296,7 @@ def found_include_tag(  # noqa: PLR0912, PLR0915
                 ])
                 plural_suffix = 's' if len(file_paths_to_include) > 1 else ''
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 logger.warning(
                     f"Delimiter {delimiter_name} '{delimiter_value}'"
@@ -313,7 +315,8 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
             match: re.Match[str],
     ) -> str:
         directive_match_start = match.start()
-        directive_lineno = process.lineno_from_content_start(
+        directive_lineno = functools.partial(
+            process.lineno_from_content_start,
             markdown,
             directive_match_start,
         )
@@ -324,7 +327,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
         filename, raw_filename = parse_filename_argument(match)
         if filename is None:
             location = process.file_lineno_message(
-                page_src_path, docs_dir, directive_lineno,
+                page_src_path, docs_dir, directive_lineno(),
             )
             raise PluginError(
                 "Found no path passed including with 'include-markdown'"
@@ -347,7 +350,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
             exclude_string = parse_string_argument(exclude_match)
             if exclude_string is None:
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     "Invalid empty 'exclude' argument in 'include-markdown'"
@@ -368,7 +371,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
 
         if not file_paths_to_include:
             location = process.file_lineno_message(
-                page_src_path, docs_dir, directive_lineno,
+                page_src_path, docs_dir, directive_lineno(),
             )
             raise PluginError(
                 f"No files found including '{raw_filename}' at {location}",
@@ -388,7 +391,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
         )
         if invalid_bool_args:
             location = process.file_lineno_message(
-                page_src_path, docs_dir, directive_lineno,
+                page_src_path, docs_dir, directive_lineno(),
             )
             raise PluginError(
                 f"Invalid value for '{invalid_bool_args[0]}' argument of"
@@ -402,7 +405,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
             start = parse_string_argument(start_match)
             if start is None:
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     "Invalid empty 'start' argument in 'include-markdown'"
@@ -416,7 +419,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
             end = parse_string_argument(end_match)
             if end is None:
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     "Invalid empty 'end' argument in 'include-markdown'"
@@ -430,7 +433,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
             encoding = parse_string_argument(encoding_match)
             if encoding is None:
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     "Invalid empty 'encoding' argument in 'include-markdown'"
@@ -447,7 +450,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
             offset_raw_value = offset_match[1]
             if offset_raw_value == '':
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     "Invalid empty 'heading-offset' argument in"
@@ -457,7 +460,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
                 offset = int(offset_raw_value)
             except ValueError:
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 raise PluginError(
                     f"Invalid 'heading-offset' argument \"{offset_raw_value}\""
@@ -579,7 +582,7 @@ def found_include_markdown_tag(  # noqa: PLR0912, PLR0915
                 ])
                 plural_suffix = 's' if len(file_paths_to_include) > 1 else ''
                 location = process.file_lineno_message(
-                    page_src_path, docs_dir, directive_lineno,
+                    page_src_path, docs_dir, directive_lineno(),
                 )
                 logger.warning(
                     f"Delimiter {delimiter_name} '{delimiter_value}' of"
diff --git a/src/mkdocs_include_markdown_plugin/plugin.py b/src/mkdocs_include_markdown_plugin/plugin.py
index b855a16..9c8dd5c 100644
--- a/src/mkdocs_include_markdown_plugin/plugin.py
+++ b/src/mkdocs_include_markdown_plugin/plugin.py
@@ -7,7 +7,6 @@
 from typing import TYPE_CHECKING
 
 from mkdocs.exceptions import PluginError
-from mkdocs.livereload import LiveReloadServer
 from mkdocs.plugins import BasePlugin, event_priority
 
 
@@ -15,6 +14,7 @@
     import re
 
     from mkdocs.config.defaults import MkDocsConfig
+    from mkdocs.livereload import LiveReloadServer
     from mkdocs.structure.files import Files
     from mkdocs.structure.pages import Page
 
diff --git a/src/mkdocs_include_markdown_plugin/process.py b/src/mkdocs_include_markdown_plugin/process.py
index 854186d..32eb533 100644
--- a/src/mkdocs_include_markdown_plugin/process.py
+++ b/src/mkdocs_include_markdown_plugin/process.py
@@ -6,10 +6,9 @@
 import io
 import os
 import re
+import stat
 from collections.abc import Callable, Iterator
-from typing import TYPE_CHECKING, Any
-from urllib.parse import urlparse, urlunparse
-from urllib.request import Request, urlopen
+from typing import TYPE_CHECKING
 
 
 if TYPE_CHECKING:  # pragma: no cover
@@ -246,10 +245,15 @@ def rewrite_relative_urls(
     ``destination_path``.
     """
     def rewrite_url(url: str) -> str:
+        from urllib.parse import urlparse, urlunparse
+
+        if is_relative_path(url):
+            return url
+
         scheme, netloc, path, params, query, fragment = urlparse(url)
 
         # absolute or mail
-        if is_relative_path(url) or path.startswith('/') or scheme == 'mailto':
+        if path.startswith('/') or scheme == 'mailto':
             return url
 
         new_path = os.path.relpath(
@@ -260,10 +264,13 @@ def rewrite_url(url: str) -> str:
         # ensure forward slashes are used, on Windows
         new_path = new_path.replace('\\', '/').replace('//', '/')
 
-        if path.endswith('/'):
-            # the above operation removes a trailing slash. Add it back if it
-            # was present in the input
-            new_path = new_path + '/'
+        try:
+            if path[-1] == '/':
+                # the above operation removes a trailing slash,
+                # so add it back if it was present in the input
+                new_path += '/'
+        except IndexError:  # pragma: no cover
+            pass
 
         return urlunparse((scheme, netloc, new_path, params, query, fragment))
 
@@ -351,9 +358,10 @@ def filter_inclusions(  # noqa: PLR0912
         if end not in text_to_include:
             expected_end_not_found = True
 
+        start_split = text_to_include.split(start)
         text_parts = (
-            text_to_include.split(start)[1:]
-            if start in text_to_include else [text_to_include]
+            start_split[1:]
+            if len(start_split) > 1 else [text_to_include]
         )
 
         for start_text in text_parts:
@@ -374,20 +382,34 @@ def _transform_negative_offset_func_factory(
         offset: int,
 ) -> Callable[[str], str]:
     heading_prefix = '#' * abs(offset)
-    return lambda line: line if not line.startswith('#') else (
-        heading_prefix + line.lstrip('#')
-        if line.startswith(heading_prefix)
-        else '#' + line.lstrip('#')
-    )
+
+    def transform(line: str) -> str:
+        try:
+            if line[0] != '#':
+                return line
+        except IndexError:  # pragma: no cover
+            return line
+        if line.startswith(heading_prefix):
+            return heading_prefix + line.lstrip('#')
+        return '#' + line.lstrip('#')
+
+    return transform
 
 
 def _transform_positive_offset_func_factory(
         offset: int,
 ) -> Callable[[str], str]:
     heading_prefix = '#' * offset
-    return lambda line: (
-        heading_prefix + line if line.startswith('#') else line
-    )
+
+    def transform(line: str) -> str:
+        try:
+            prefix = line[0]
+        except IndexError:  # pragma: no cover
+            return line
+        else:
+            return heading_prefix + line if prefix == '#' else line
+
+    return transform
 
 
 def increase_headings_offset(markdown: str, offset: int = 0) -> str:
@@ -442,8 +464,11 @@ def filter_paths(
             continue
 
         # ignore if is a directory
-        if not os.path.isdir(filepath):
-            response.append(filepath)
+        try:
+            if not stat.S_ISDIR(os.stat(filepath).st_mode):
+                response.append(filepath)
+        except (FileNotFoundError, OSError):  # pragma: no cover
+            continue
     response.sort()
     return response
 
@@ -452,6 +477,8 @@ def is_url(string: str) -> bool:
     """Determine if a string is an URL."""
     if ':' not in string:  # fast path
         return False
+    from urllib.parse import urlparse
+
     try:
         result = urlparse(string)
         return all([result.scheme, result.netloc])
@@ -471,8 +498,10 @@ def is_absolute_path(string: str) -> bool:
 
 def read_file(file_path: str, encoding: str) -> str:
     """Read a file and return its content."""
-    with open(file_path, encoding=encoding) as f:
-        return f.read()
+    f = open(file_path, encoding=encoding)  # noqa: SIM115
+    content = f.read()
+    f.close()
+    return content
 
 
 def read_url(
@@ -481,6 +510,8 @@ def read_url(
         encoding: str = 'utf-8',
 ) -> Any:
     """Read an HTTP location and return its content."""
+    from urllib.request import Request, urlopen
+
     if http_cache is not None:
         cached_content = http_cache.get_(url, encoding)
         if cached_content is not None: