Skip to content

Commit

Permalink
Optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
SeanTAllen committed Feb 24, 2024
1 parent c3eb1df commit cabe052
Showing 1 changed file with 16 additions and 13 deletions.
29 changes: 16 additions & 13 deletions htmlproofer/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os.path
import pathlib
import re
from typing import List, Optional, Set
from typing import Dict, List, Optional, Set
import urllib.parse
import uuid

Expand Down Expand Up @@ -100,6 +100,13 @@ def on_post_page(self, output_content: str, page: Page, config: Config) -> None:

use_directory_urls = config.data["use_directory_urls"]

# Optimization: At this point, we have all the files, so we can create
# a dictionary for faster lookups. Prior to this point, files are
# still being updated so creating a dictionary before now would result
# in incorrect values appearing as the key.
opt_files = {}
opt_files.update({os.path.normpath(file.url): file for file in self.files})

# Optimization: only parse links and headings
# li, sup are used for footnotes
strainer = SoupStrainer(('a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'sup', 'img'))
Expand All @@ -122,7 +129,7 @@ def on_post_page(self, output_content: str, page: Page, config: Config) -> None:
log_warning(f"ignoring URL {url} from {page.file.src_path}")
continue

url_status = self.get_url_status(url, page.file.src_path, all_element_ids, self.files, use_directory_urls)
url_status = self.get_url_status(url, page.file.src_path, all_element_ids, opt_files, use_directory_urls)

if self.bad_url(url_status) and self.is_error(self.config, url, url_status):
self.report_invalid_url(url, url_status, page.file.src_path)
Expand Down Expand Up @@ -161,7 +168,7 @@ def get_url_status(
url: str,
src_path: str,
all_element_ids: Set[str],
files: List[File],
files: Dict[str, File],
use_directory_urls: bool
) -> int:
if any(pat.match(url) for pat in LOCAL_PATTERNS):
Expand All @@ -188,7 +195,7 @@ def get_url_status(
return 0

@staticmethod
def is_url_target_valid(url: str, src_path: str, files: List[File]) -> bool:
def is_url_target_valid(url: str, src_path: str, files: Dict[str, File]) -> bool:
match = MARKDOWN_ANCHOR_PATTERN.match(url)
if match is None:
return True
Expand All @@ -209,7 +216,7 @@ def is_url_target_valid(url: str, src_path: str, files: List[File]) -> bool:
return True

@staticmethod
def find_target_markdown(url: str, src_path: str, files: List[File]) -> Optional[str]:
def find_target_markdown(url: str, src_path: str, files: Dict[str, File]) -> Optional[str]:
"""From a built URL, find the original Markdown source from the project that built it."""

file = HtmlProoferPlugin.find_source_file(url, src_path, files)
Expand All @@ -218,7 +225,7 @@ def find_target_markdown(url: str, src_path: str, files: List[File]) -> Optional
return None

@staticmethod
def find_source_file(url: str, src_path: str, files: List[File]) -> Optional[File]:
def find_source_file(url: str, src_path: str, files: Dict[str, File]) -> Optional[File]:
"""From a built URL, find the original file from the project that built it."""

if len(url) > 1 and url[0] == '/':
Expand All @@ -229,13 +236,9 @@ def find_source_file(url: str, src_path: str, files: List[File]) -> Optional[Fil
src_dir = urllib.parse.quote(str(pathlib.Path(src_path).parent), safe='/\\')
search_path = os.path.normpath(str(pathlib.Path(src_dir) / pathlib.Path(url)))

for file in files:
# Need to call normpath on the url to get the Windows tests to
# pass. This might be required for other platforms as well, but
# based on the tests, it seems to be required for Windows only.
if os.path.normpath(file.url) == search_path:
return file
else:
try:
return files[search_path]
except KeyError:
return None

@staticmethod
Expand Down

0 comments on commit cabe052

Please sign in to comment.