diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 9eeb8c3ac4..777c645f91 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -20,12 +20,24 @@ import configparser import ctypes import fnmatch +import itertools import os import re import sys import textwrap from ctypes import wintypes -from typing import Any, Dict, List, Match, Optional, Pattern, Sequence, Set, Tuple +from typing import ( + Any, + Dict, + Iterable, + List, + Match, + Optional, + Pattern, + Sequence, + Set, + Tuple, +) # autogenerated by setuptools_scm from ._version import ( # type: ignore[import-not-found] @@ -655,14 +667,30 @@ def parse_options( return options, parser, used_cfg_files -def parse_ignore_words_option(ignore_words_option: List[str]) -> Set[str]: +def process_ignore_words( + words: Iterable[str], ignore_words: Set[str], ignore_words_cased: Set[str] +) -> None: + for word in words: + word = word.strip() + if word == word.lower(): + ignore_words.add(word) + else: + ignore_words_cased.add(word) + + +def parse_ignore_words_option( + ignore_words_option: List[str], +) -> Tuple[Set[str], Set[str]]: ignore_words: Set[str] = set() + ignore_words_cased: Set[str] = set() if ignore_words_option: for comma_separated_words in ignore_words_option: - ignore_words.update( - word.strip() for word in comma_separated_words.split(",") + process_ignore_words( + (word.strip() for word in comma_separated_words.split(",")), + ignore_words, + ignore_words_cased, ) - return ignore_words + return (ignore_words, ignore_words_cased) def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None: @@ -670,9 +698,13 @@ def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None: exclude_lines.update(line.rstrip() for line in f) -def build_ignore_words(filename: str, ignore_words: Set[str]) -> None: +def build_ignore_words( + filename: str, ignore_words: Set[str], ignore_words_cased: Set[str] +) -> None: with open(filename, encoding="utf-8") as f: - ignore_words.update(line.strip() for line in f) + process_ignore_words( + (line.strip() for line in f), ignore_words, ignore_words_cased + ) def add_misspelling( @@ -865,6 +897,7 @@ def parse_file( colors: TermColors, summary: Optional[Summary], misspellings: Dict[str, Misspelling], + ignore_words_cased: Set[str], exclude_lines: Set[str], file_opener: FileOpener, word_regex: Pattern[str], @@ -885,6 +918,8 @@ def parse_file( else: if options.check_filenames: for word in extract_words(filename, word_regex, ignore_word_regex): + if word in ignore_words_cased: + continue lword = word.lower() if lword not in misspellings: continue @@ -958,6 +993,8 @@ def parse_file( ) for match in check_matches: word = match.group() + if word in ignore_words_cased: + continue lword = word.lower() if lword in misspellings: # Sometimes we find a 'misspelling' which is actually a valid word @@ -1112,7 +1149,10 @@ def main(*args: str) -> int: ignore_word_regex = None ignore_words_files = options.ignore_words or [] - ignore_words = parse_ignore_words_option(options.ignore_words_list) + ignore_words, ignore_words_cased = parse_ignore_words_option( + options.ignore_words_list + ) + for ignore_words_file in ignore_words_files: if not os.path.isfile(ignore_words_file): print( @@ -1121,7 +1161,7 @@ def main(*args: str) -> int: ) parser.print_help() return EX_USAGE - build_ignore_words(ignore_words_file, ignore_words) + build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased) uri_regex = options.uri_regex or uri_regex_def try: @@ -1133,7 +1173,10 @@ def main(*args: str) -> int: ) parser.print_help() return EX_USAGE - uri_ignore_words = parse_ignore_words_option(options.uri_ignore_words_list) + + uri_ignore_words = set( + itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list)) + ) dictionaries = options.dictionary or ["-"] @@ -1242,6 +1285,7 @@ def main(*args: str) -> int: colors, summary, misspellings, + ignore_words_cased, exclude_lines, file_opener, word_regex, @@ -1266,6 +1310,7 @@ def main(*args: str) -> int: colors, summary, misspellings, + ignore_words_cased, exclude_lines, file_opener, word_regex, diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index c6bae11018..56844d8998 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -343,6 +343,43 @@ def test_ignore_dictionary( assert cs.main("-I", fname, bad_name) == 1 +def test_ignore_words_with_cases( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Test case-sensitivity implemented for -I and -L options in #3272.""" + bad_name = tmp_path / "MIS.txt" + bad_name.write_text( + "1 MIS (Management Information System) 1\n" + "2 Les Mis (1980 musical) 2\n" + "3 mis 3\n" + ) + assert cs.main(bad_name) == 3 + assert cs.main(bad_name, "-f") == 4 + fname = tmp_path / "ignore.txt" + + fname.write_text("miS") + assert cs.main("-I", fname, bad_name) == 3 + assert cs.main("-LmiS", bad_name) == 3 + assert cs.main("-I", fname, "-f", bad_name) == 4 + assert cs.main("-LmiS", "-f", bad_name) == 4 + fname.write_text("MIS") + assert cs.main("-I", fname, bad_name) == 2 + assert cs.main("-LMIS", bad_name) == 2 + assert cs.main("-I", fname, "-f", bad_name) == 2 + assert cs.main("-LMIS", "-f", bad_name) == 2 + fname.write_text("MIS\nMis") + assert cs.main("-I", fname, bad_name) == 1 + assert cs.main("-LMIS,Mis", bad_name) == 1 + assert cs.main("-I", fname, "-f", bad_name) == 1 + assert cs.main("-LMIS,Mis", "-f", bad_name) == 1 + fname.write_text("mis") + assert cs.main("-I", fname, bad_name) == 0 + assert cs.main("-Lmis", bad_name) == 0 + assert cs.main("-I", fname, "-f", bad_name) == 0 + assert cs.main("-Lmis", "-f", bad_name) == 0 + + def test_ignore_word_list( tmp_path: Path, capsys: pytest.CaptureFixture[str], diff --git a/pyproject.toml b/pyproject.toml index 561d1777c2..2a8176beb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,7 +160,7 @@ max-complexity = 45 [tool.ruff.lint.pylint] allow-magic-value-types = ["bytes", "int", "str",] -max-args = 12 +max-args = 13 max-branches = 49 max-returns = 11 -max-statements = 111 +max-statements = 113