From a1a22ca50f29484ef677d011fd8905cd0ad20f46 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Wed, 10 Feb 2021 08:25:49 -0800 Subject: [PATCH] Add --exclude (#9992) Resolves #4675, resolves #9981. Additionally, we always ignore site-packages and node_modules, and directories starting with a dot. Also note that this doesn't really affect import discovery; it only directly affects passing files or packages to mypy. The additional check before suggesting "are you missing an __init__.py" didn't make any sense to me, so I removed it, appended to the message and downgraded the severity to note. Co-authored-by: hauntsaninja <> --- docs/source/command_line.rst | 24 ++++++ docs/source/config_file.rst | 12 +++ docs/source/running_mypy.rst | 3 +- mypy/build.py | 16 ++-- mypy/find_sources.py | 11 ++- mypy/main.py | 9 +++ mypy/modulefinder.py | 25 ++++++- mypy/options.py | 2 + mypy/test/test_find_sources.py | 131 ++++++++++++++++++++++++++++++--- mypy_self_check.ini | 1 + test-data/unit/cmdline.test | 16 +--- 11 files changed, 210 insertions(+), 40 deletions(-) diff --git a/docs/source/command_line.rst b/docs/source/command_line.rst index 40df775742a6..db4da1436189 100644 --- a/docs/source/command_line.rst +++ b/docs/source/command_line.rst @@ -49,6 +49,30 @@ for full details, see :ref:`running-mypy`. Asks mypy to type check the provided string as a program. +.. option:: --exclude + + A regular expression that matches file names, directory names and paths + which mypy should ignore while recursively discovering files to check. + Use forward slashes on all platforms. + + For instance, to avoid discovering any files named `setup.py` you could + pass ``--exclude '/setup\.py$'``. Similarly, you can ignore discovering + directories with a given name by e.g. ``--exclude /build/`` or + those matching a subpath with ``--exclude /project/vendor/``. + + Note that this flag only affects recursive discovery, that is, when mypy is + discovering files within a directory tree or submodules of a package to + check. If you pass a file or module explicitly it will still be checked. For + instance, ``mypy --exclude '/setup.py$' but_still_check/setup.py``. + + Note that mypy will never recursively discover files and directories named + "site-packages", "node_modules" or "__pycache__", or those whose name starts + with a period, exactly as ``--exclude + '/(site-packages|node_modules|__pycache__|\..*)/$'`` would. Mypy will also + never recursively discover files with extensions other than ``.py`` or + ``.pyi``. + + Optional arguments ****************** diff --git a/docs/source/config_file.rst b/docs/source/config_file.rst index 11aa73fbf5d0..6ae02fe8aa52 100644 --- a/docs/source/config_file.rst +++ b/docs/source/config_file.rst @@ -192,6 +192,18 @@ section of the command line docs. This option may only be set in the global section (``[mypy]``). +.. confval:: exclude + + :type: regular expression + + A regular expression that matches file names, directory names and paths + which mypy should ignore while recursively discovering files to check. + Use forward slashes on all platforms. + + For more details, see :option:`--exclude `. + + This option may only be set in the global section (``[mypy]``). + .. confval:: namespace_packages :type: boolean diff --git a/docs/source/running_mypy.rst b/docs/source/running_mypy.rst index 3d5b9ff6d17a..2c1d14b6d858 100644 --- a/docs/source/running_mypy.rst +++ b/docs/source/running_mypy.rst @@ -355,7 +355,8 @@ to modules to type check. - Mypy will check all paths provided that correspond to files. - Mypy will recursively discover and check all files ending in ``.py`` or - ``.pyi`` in directory paths provided. + ``.pyi`` in directory paths provided, after accounting for + :option:`--exclude `. - For each file to be checked, mypy will attempt to associate the file (e.g. ``project/foo/bar/baz.py``) with a fully qualified module name (e.g. diff --git a/mypy/build.py b/mypy/build.py index e6f597af31bc..324a8f853456 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -15,7 +15,6 @@ import gc import json import os -import pathlib import re import stat import sys @@ -2552,6 +2551,7 @@ def log_configuration(manager: BuildManager, sources: List[BuildSource]) -> None ("Current Executable", sys.executable), ("Cache Dir", manager.options.cache_dir), ("Compiled", str(not __file__.endswith(".py"))), + ("Exclude", manager.options.exclude), ] for conf_name, conf_value in configuration_vars: @@ -2751,14 +2751,12 @@ def load_graph(sources: List[BuildSource], manager: BuildManager, "Duplicate module named '%s' (also at '%s')" % (st.id, graph[st.id].xpath), blocker=True, ) - p1 = len(pathlib.PurePath(st.xpath).parents) - p2 = len(pathlib.PurePath(graph[st.id].xpath).parents) - - if p1 != p2: - manager.errors.report( - -1, -1, - "Are you missing an __init__.py?" - ) + manager.errors.report( + -1, -1, + "Are you missing an __init__.py? Alternatively, consider using --exclude to " + "avoid checking one of them.", + severity='note' + ) manager.errors.raise_error() graph[st.id] = st diff --git a/mypy/find_sources.py b/mypy/find_sources.py index 47d686cddcbc..4f50d8ff52b2 100644 --- a/mypy/find_sources.py +++ b/mypy/find_sources.py @@ -6,7 +6,7 @@ from typing import List, Sequence, Set, Tuple, Optional from typing_extensions import Final -from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS, mypy_path +from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS, mypy_path, matches_exclude from mypy.fscache import FileSystemCache from mypy.options import Options @@ -91,6 +91,8 @@ def __init__(self, fscache: FileSystemCache, options: Options) -> None: self.fscache = fscache self.explicit_package_bases = get_explicit_package_bases(options) self.namespace_packages = options.namespace_packages + self.exclude = options.exclude + self.verbosity = options.verbosity def is_explicit_package_base(self, path: str) -> bool: assert self.explicit_package_bases @@ -103,10 +105,15 @@ def find_sources_in_dir(self, path: str) -> List[BuildSource]: names = sorted(self.fscache.listdir(path), key=keyfunc) for name in names: # Skip certain names altogether - if name == '__pycache__' or name.startswith('.') or name.endswith('~'): + if name in ("__pycache__", "site-packages", "node_modules") or name.startswith("."): continue subpath = os.path.join(path, name) + if matches_exclude( + subpath, self.exclude, self.fscache, self.verbosity >= 2 + ): + continue + if self.fscache.isdir(subpath): sub_sources = self.find_sources_in_dir(subpath) if sub_sources: diff --git a/mypy/main.py b/mypy/main.py index ab38f7478b3f..ea68ee41d0f2 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -791,6 +791,15 @@ def add_invertible_flag(flag: str, code_group.add_argument( '--explicit-package-bases', action='store_true', help="Use current directory and MYPYPATH to determine module names of files passed") + code_group.add_argument( + "--exclude", + metavar="PATTERN", + default="", + help=( + "Regular expression to match file names, directory names or paths which mypy should " + "ignore while recursively discovering files to check, e.g. --exclude '/setup\\.py$'" + ) + ) code_group.add_argument( '-m', '--module', action='append', metavar='MODULE', default=[], diff --git a/mypy/modulefinder.py b/mypy/modulefinder.py index bdc71d7a7e58..2c708b8f802d 100644 --- a/mypy/modulefinder.py +++ b/mypy/modulefinder.py @@ -7,6 +7,7 @@ import collections import functools import os +import re import subprocess import sys from enum import Enum @@ -380,10 +381,15 @@ def find_modules_recursive(self, module: str) -> List[BuildSource]: names = sorted(self.fscache.listdir(package_path)) for name in names: # Skip certain names altogether - if name == '__pycache__' or name.startswith('.') or name.endswith('~'): + if name in ("__pycache__", "site-packages", "node_modules") or name.startswith("."): continue subpath = os.path.join(package_path, name) + if self.options and matches_exclude( + subpath, self.options.exclude, self.fscache, self.options.verbosity >= 2 + ): + continue + if self.fscache.isdir(subpath): # Only recurse into packages if (self.options and self.options.namespace_packages) or ( @@ -397,13 +403,26 @@ def find_modules_recursive(self, module: str) -> List[BuildSource]: if stem == '__init__': continue if stem not in seen and '.' not in stem and suffix in PYTHON_EXTENSIONS: - # (If we sorted names) we could probably just make the BuildSource ourselves, - # but this ensures compatibility with find_module / the cache + # (If we sorted names by keyfunc) we could probably just make the BuildSource + # ourselves, but this ensures compatibility with find_module / the cache seen.add(stem) sources.extend(self.find_modules_recursive(module + '.' + stem)) return sources +def matches_exclude(subpath: str, exclude: str, fscache: FileSystemCache, verbose: bool) -> bool: + if not exclude: + return False + subpath_str = os.path.abspath(subpath).replace(os.sep, "/") + if fscache.isdir(subpath): + subpath_str += "/" + if re.search(exclude, subpath_str): + if verbose: + print("TRACE: Excluding {}".format(subpath_str), file=sys.stderr) + return True + return False + + def verify_module(fscache: FileSystemCache, id: str, path: str, prefix: str) -> bool: """Check that all packages containing id have a __init__ file.""" if path.endswith(('__init__.py', '__init__.pyi')): diff --git a/mypy/options.py b/mypy/options.py index e95ed3e0bb46..752e1cffdb25 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -97,6 +97,8 @@ def __init__(self) -> None: # sufficient to determine module names for files. As a possible alternative, add a single # top-level __init__.py to your packages. self.explicit_package_bases = False + # File names, directory names or subpaths to avoid checking + self.exclude = "" # type: str # disallow_any options self.disallow_any_generics = False diff --git a/mypy/test/test_find_sources.py b/mypy/test/test_find_sources.py index 5cedec338bbc..056ddf13b108 100644 --- a/mypy/test/test_find_sources.py +++ b/mypy/test/test_find_sources.py @@ -1,8 +1,9 @@ from mypy.modulefinder import BuildSource import os +import pytest import unittest from typing import List, Optional, Set, Tuple -from mypy.find_sources import SourceFinder +from mypy.find_sources import InvalidSourceList, SourceFinder, create_source_list from mypy.fscache import FileSystemCache from mypy.modulefinder import BuildSource from mypy.options import Options @@ -47,10 +48,17 @@ def crawl(finder: SourceFinder, f: str) -> Tuple[str, str]: return module, normalise_path(base_dir) -def find_sources(finder: SourceFinder, f: str) -> List[Tuple[str, Optional[str]]]: +def find_sources_in_dir(finder: SourceFinder, f: str) -> List[Tuple[str, Optional[str]]]: return normalise_build_source_list(finder.find_sources_in_dir(os.path.abspath(f))) +def find_sources( + paths: List[str], options: Options, fscache: FileSystemCache +) -> List[Tuple[str, Optional[str]]]: + paths = [os.path.abspath(p) for p in paths] + return normalise_build_source_list(create_source_list(paths, options, fscache)) + + class SourceFinderSuite(unittest.TestCase): def test_crawl_no_namespace(self) -> None: options = Options() @@ -172,7 +180,7 @@ def test_crawl_namespace_multi_dir(self) -> None: assert crawl(finder, "/a/pkg/a.py") == ("pkg.a", "/a") assert crawl(finder, "/b/pkg/b.py") == ("pkg.b", "/b") - def test_find_sources_no_namespace(self) -> None: + def test_find_sources_in_dir_no_namespace(self) -> None: options = Options() options.namespace_packages = False @@ -184,7 +192,7 @@ def test_find_sources_no_namespace(self) -> None: "/pkg/a2/b/f.py", } finder = SourceFinder(FakeFSCache(files), options) - assert find_sources(finder, "/") == [ + assert find_sources_in_dir(finder, "/") == [ ("a2", "/pkg"), ("e", "/pkg/a1/b/c/d"), ("e", "/pkg/a2/b/c/d"), @@ -192,7 +200,7 @@ def test_find_sources_no_namespace(self) -> None: ("f", "/pkg/a2/b"), ] - def test_find_sources_namespace(self) -> None: + def test_find_sources_in_dir_namespace(self) -> None: options = Options() options.namespace_packages = True @@ -204,7 +212,7 @@ def test_find_sources_namespace(self) -> None: "/pkg/a2/b/f.py", } finder = SourceFinder(FakeFSCache(files), options) - assert find_sources(finder, "/") == [ + assert find_sources_in_dir(finder, "/") == [ ("a2", "/pkg"), ("a2.b.c.d.e", "/pkg"), ("a2.b.f", "/pkg"), @@ -212,7 +220,7 @@ def test_find_sources_namespace(self) -> None: ("f", "/pkg/a1/b"), ] - def test_find_sources_namespace_explicit_base(self) -> None: + def test_find_sources_in_dir_namespace_explicit_base(self) -> None: options = Options() options.namespace_packages = True options.explicit_package_bases = True @@ -226,7 +234,7 @@ def test_find_sources_namespace_explicit_base(self) -> None: "/pkg/a2/b/f.py", } finder = SourceFinder(FakeFSCache(files), options) - assert find_sources(finder, "/") == [ + assert find_sources_in_dir(finder, "/") == [ ("pkg.a1.b.c.d.e", "/"), ("pkg.a1.b.f", "/"), ("pkg.a2", "/"), @@ -236,7 +244,7 @@ def test_find_sources_namespace_explicit_base(self) -> None: options.mypy_path = ["/pkg"] finder = SourceFinder(FakeFSCache(files), options) - assert find_sources(finder, "/") == [ + assert find_sources_in_dir(finder, "/") == [ ("a1.b.c.d.e", "/pkg"), ("a1.b.f", "/pkg"), ("a2", "/pkg"), @@ -244,11 +252,112 @@ def test_find_sources_namespace_explicit_base(self) -> None: ("a2.b.f", "/pkg"), ] - def test_find_sources_namespace_multi_dir(self) -> None: + def test_find_sources_in_dir_namespace_multi_dir(self) -> None: options = Options() options.namespace_packages = True options.explicit_package_bases = True options.mypy_path = ["/a", "/b"] finder = SourceFinder(FakeFSCache({"/a/pkg/a.py", "/b/pkg/b.py"}), options) - assert find_sources(finder, "/") == [("pkg.a", "/a"), ("pkg.b", "/b")] + assert find_sources_in_dir(finder, "/") == [("pkg.a", "/a"), ("pkg.b", "/b")] + + def test_find_sources_exclude(self) -> None: + options = Options() + options.namespace_packages = True + + # default + for excluded_dir in ["site-packages", ".whatever", "node_modules", ".x/.z"]: + fscache = FakeFSCache({"/dir/a.py", "/dir/venv/{}/b.py".format(excluded_dir)}) + assert find_sources(["/"], options, fscache) == [("a", "/dir")] + with pytest.raises(InvalidSourceList): + find_sources(["/dir/venv/"], options, fscache) + assert find_sources(["/dir/venv/{}".format(excluded_dir)], options, fscache) == [ + ("b", "/dir/venv/{}".format(excluded_dir)) + ] + assert find_sources(["/dir/venv/{}/b.py".format(excluded_dir)], options, fscache) == [ + ("b", "/dir/venv/{}".format(excluded_dir)) + ] + + files = { + "/pkg/a1/b/c/d/e.py", + "/pkg/a1/b/f.py", + "/pkg/a2/__init__.py", + "/pkg/a2/b/c/d/e.py", + "/pkg/a2/b/f.py", + } + + # file name + options.exclude = r"/f\.py$" + fscache = FakeFSCache(files) + assert find_sources(["/"], options, fscache) == [ + ("a2", "/pkg"), + ("a2.b.c.d.e", "/pkg"), + ("e", "/pkg/a1/b/c/d"), + ] + assert find_sources(["/pkg/a1/b/f.py"], options, fscache) == [('f', '/pkg/a1/b')] + assert find_sources(["/pkg/a2/b/f.py"], options, fscache) == [('a2.b.f', '/pkg')] + + # directory name + options.exclude = "/a1/" + fscache = FakeFSCache(files) + assert find_sources(["/"], options, fscache) == [ + ("a2", "/pkg"), + ("a2.b.c.d.e", "/pkg"), + ("a2.b.f", "/pkg"), + ] + with pytest.raises(InvalidSourceList): + find_sources(["/pkg/a1"], options, fscache) + with pytest.raises(InvalidSourceList): + find_sources(["/pkg/a1/"], options, fscache) + with pytest.raises(InvalidSourceList): + find_sources(["/pkg/a1/b"], options, fscache) + + options.exclude = "/a1/$" + assert find_sources(["/pkg/a1"], options, fscache) == [ + ('e', '/pkg/a1/b/c/d'), ('f', '/pkg/a1/b') + ] + + # paths + options.exclude = "/pkg/a1/" + fscache = FakeFSCache(files) + assert find_sources(["/"], options, fscache) == [ + ("a2", "/pkg"), + ("a2.b.c.d.e", "/pkg"), + ("a2.b.f", "/pkg"), + ] + with pytest.raises(InvalidSourceList): + find_sources(["/pkg/a1"], options, fscache) + + options.exclude = "/(a1|a3)/" + fscache = FakeFSCache(files) + assert find_sources(["/"], options, fscache) == [ + ("a2", "/pkg"), + ("a2.b.c.d.e", "/pkg"), + ("a2.b.f", "/pkg"), + ] + + options.exclude = "b/c/" + fscache = FakeFSCache(files) + assert find_sources(["/"], options, fscache) == [ + ("a2", "/pkg"), + ("a2.b.f", "/pkg"), + ("f", "/pkg/a1/b"), + ] + + # nothing should be ignored as a result of this + options.exclude = "|".join(( + "/pkg/a/", "/2", "/1", "/pk/", "/kg", "/g.py", "/bc", "/xxx/pkg/a2/b/f.py" + "xxx/pkg/a2/b/f.py", + )) + fscache = FakeFSCache(files) + assert len(find_sources(["/"], options, fscache)) == len(files) + + files = { + "pkg/a1/b/c/d/e.py", + "pkg/a1/b/f.py", + "pkg/a2/__init__.py", + "pkg/a2/b/c/d/e.py", + "pkg/a2/b/f.py", + } + fscache = FakeFSCache(files) + assert len(find_sources(["/"], options, fscache)) == len(files) diff --git a/mypy_self_check.ini b/mypy_self_check.ini index 2b7ed2b157c5..c974a0248afc 100644 --- a/mypy_self_check.ini +++ b/mypy_self_check.ini @@ -19,3 +19,4 @@ pretty = True always_false = MYPYC plugins = misc/proper_plugin.py python_version = 3.5 +exclude = /mypy/typeshed/ diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index 8fe9f478a077..4c78928500b0 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -59,7 +59,7 @@ undef undef [out] dir/a.py: error: Duplicate module named 'a' (also at 'dir/subdir/a.py') -dir/a.py: error: Are you missing an __init__.py? +dir/a.py: note: Are you missing an __init__.py? Alternatively, consider using --exclude to avoid checking one of them. == Return code: 2 [case testCmdlineNonPackageSlash] @@ -125,19 +125,7 @@ mypy: can't decode file 'a.py': unknown encoding: uft-8 # type: ignore [out] two/mod/__init__.py: error: Duplicate module named 'mod' (also at 'one/mod/__init__.py') -== Return code: 2 - -[case promptsForgotInit] -# cmd: mypy a.py one/mod/a.py -[file one/__init__.py] -# type: ignore -[file a.py] -# type: ignore -[file one/mod/a.py] -#type: ignore -[out] -one/mod/a.py: error: Duplicate module named 'a' (also at 'a.py') -one/mod/a.py: error: Are you missing an __init__.py? +two/mod/__init__.py: note: Are you missing an __init__.py? Alternatively, consider using --exclude to avoid checking one of them. == Return code: 2 [case testFlagsFile]