codespell-project · larsoner · Jan 9, 2024 · Jan 3, 2024 · Jan 3, 2024 · Jan 3, 2024
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
@@ -20,12 +20,24 @@
 import configparser
 import ctypes
 import fnmatch
+import itertools
 import os
 import re
 import sys
 import textwrap
 from ctypes import wintypes
-from typing import Any, Dict, List, Match, Optional, Pattern, Sequence, Set, Tuple
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Match,
+    Optional,
+    Pattern,
+    Sequence,
+    Set,
+    Tuple,
+)
 
 # autogenerated by setuptools_scm
 from ._version import (  # type: ignore[import-not-found]
@@ -655,24 +667,44 @@ def parse_options(
     return options, parser, used_cfg_files
 
 
-def parse_ignore_words_option(ignore_words_option: List[str]) -> Set[str]:
+def process_ignore_words(
+    words: Iterable[str], ignore_words: Set[str], ignore_words_cased: Set[str]
+) -> None:
+    for word in words:
+        word = word.strip()
+        if word == word.lower():
+            ignore_words.add(word)
+        else:
+            ignore_words_cased.add(word)
+
+
+def parse_ignore_words_option(
+    ignore_words_option: List[str],
+) -> Tuple[Set[str], Set[str]]:
     ignore_words: Set[str] = set()
+    ignore_words_cased: Set[str] = set()
     if ignore_words_option:
         for comma_separated_words in ignore_words_option:
-            ignore_words.update(
-                word.strip() for word in comma_separated_words.split(",")
+            process_ignore_words(
+                (word.strip() for word in comma_separated_words.split(",")),
+                ignore_words,
+                ignore_words_cased,
             )
-    return ignore_words
+    return (ignore_words, ignore_words_cased)
 
 
 def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None:
     with open(filename, encoding="utf-8") as f:
         exclude_lines.update(line.rstrip() for line in f)
 
 
-def build_ignore_words(filename: str, ignore_words: Set[str]) -> None:
+def build_ignore_words(
+    filename: str, ignore_words: Set[str], ignore_words_cased: Set[str]
+) -> None:
     with open(filename, encoding="utf-8") as f:
-        ignore_words.update(line.strip() for line in f)
+        process_ignore_words(
+            (line.strip() for line in f), ignore_words, ignore_words_cased
+        )
 
 
 def add_misspelling(
@@ -865,6 +897,7 @@ def parse_file(
     colors: TermColors,
     summary: Optional[Summary],
     misspellings: Dict[str, Misspelling],
+    ignore_words_cased: Set[str],
     exclude_lines: Set[str],
     file_opener: FileOpener,
     word_regex: Pattern[str],
@@ -885,6 +918,8 @@ def parse_file(
     else:
         if options.check_filenames:
             for word in extract_words(filename, word_regex, ignore_word_regex):
+                if word in ignore_words_cased:
+                    continue
                 lword = word.lower()
                 if lword not in misspellings:
                     continue
@@ -958,6 +993,8 @@ def parse_file(
             )
         for match in check_matches:
             word = match.group()
+            if word in ignore_words_cased:
+                continue
             lword = word.lower()
             if lword in misspellings:
                 # Sometimes we find a 'misspelling' which is actually a valid word
@@ -1112,7 +1149,10 @@ def main(*args: str) -> int:
         ignore_word_regex = None
 
     ignore_words_files = options.ignore_words or []
-    ignore_words = parse_ignore_words_option(options.ignore_words_list)
+    ignore_words, ignore_words_cased = parse_ignore_words_option(
+        options.ignore_words_list
+    )
+
     for ignore_words_file in ignore_words_files:
         if not os.path.isfile(ignore_words_file):
             print(
@@ -1121,7 +1161,7 @@ def main(*args: str) -> int:
             )
             parser.print_help()
             return EX_USAGE
-        build_ignore_words(ignore_words_file, ignore_words)
+        build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased)
 
     uri_regex = options.uri_regex or uri_regex_def
     try:
@@ -1133,7 +1173,10 @@ def main(*args: str) -> int:
         )
         parser.print_help()
         return EX_USAGE
-    uri_ignore_words = parse_ignore_words_option(options.uri_ignore_words_list)
+
+    uri_ignore_words = set(
+        itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list))
+    )
 
     dictionaries = options.dictionary or ["-"]
 
@@ -1242,6 +1285,7 @@ def main(*args: str) -> int:
                         colors,
                         summary,
                         misspellings,
+                        ignore_words_cased,
                         exclude_lines,
                         file_opener,
                         word_regex,
@@ -1266,6 +1310,7 @@ def main(*args: str) -> int:
                 colors,
                 summary,
                 misspellings,
+                ignore_words_cased,
                 exclude_lines,
                 file_opener,
                 word_regex,

diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py
@@ -343,6 +343,43 @@ def test_ignore_dictionary(
     assert cs.main("-I", fname, bad_name) == 1
 
 
+def test_ignore_words_with_cases(
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """Test case-sensitivity implemented for -I and -L options in #3272."""
+    bad_name = tmp_path / "MIS.txt"
+    bad_name.write_text(
+        "1 MIS (Management Information System) 1\n"
+        "2 Les Mis (1980 musical) 2\n"
+        "3 mis 3\n"
+    )
+    assert cs.main(bad_name) == 3
+    assert cs.main(bad_name, "-f") == 4
+    fname = tmp_path / "ignore.txt"
+
+    fname.write_text("miS")
+    assert cs.main("-I", fname, bad_name) == 3
+    assert cs.main("-LmiS", bad_name) == 3
+    assert cs.main("-I", fname, "-f", bad_name) == 4
+    assert cs.main("-LmiS", "-f", bad_name) == 4
+    fname.write_text("MIS")
+    assert cs.main("-I", fname, bad_name) == 2
+    assert cs.main("-LMIS", bad_name) == 2
+    assert cs.main("-I", fname, "-f", bad_name) == 2
+    assert cs.main("-LMIS", "-f", bad_name) == 2
+    fname.write_text("MIS\nMis")
+    assert cs.main("-I", fname, bad_name) == 1
+    assert cs.main("-LMIS,Mis", bad_name) == 1
+    assert cs.main("-I", fname, "-f", bad_name) == 1
+    assert cs.main("-LMIS,Mis", "-f", bad_name) == 1
+    fname.write_text("mis")
+    assert cs.main("-I", fname, bad_name) == 0
+    assert cs.main("-Lmis", bad_name) == 0
+    assert cs.main("-I", fname, "-f", bad_name) == 0
+    assert cs.main("-Lmis", "-f", bad_name) == 0
+
+
 def test_ignore_word_list(
     tmp_path: Path,
     capsys: pytest.CaptureFixture[str],

diff --git a/pyproject.toml b/pyproject.toml
@@ -160,7 +160,7 @@ max-complexity = 45
 
 [tool.ruff.lint.pylint]
 allow-magic-value-types = ["bytes", "int", "str",]
-max-args = 12
+max-args = 13
 max-branches = 49
 max-returns = 11
-max-statements = 111
+max-statements = 113