Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FindModuleCache: optionally leverage BuildSourceSet #12616

Merged
merged 2 commits into from
May 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions docs/source/command_line.rst
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,29 @@ imports.
By default, mypy will suppress any error messages generated within :pep:`561`
compliant packages. Adding this flag will disable this behavior.

.. option:: --fast-module-lookup

The default logic used to scan through search paths to resolve imports has a
quadratic worse-case behavior in some cases, which is for instance triggered
by a large number of folders sharing a top-level namespace as in:

foo/
company/
foo/
a.py
bar/
company/
bar/
b.py
baz/
company/
baz/
c.py
...

If you are in this situation, you can enable an experimental fast path by
setting the :option:`--fast-module-lookup` option.


.. _platform-configuration:

Expand Down
1 change: 1 addition & 0 deletions docs/source/running_mypy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,7 @@ same directory on the search path, only the stub file is used.
(However, if the files are in different directories, the one found
in the earlier directory is used.)


Other advice and best practices
*******************************

Expand Down
34 changes: 4 additions & 30 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
from mypy.report import Reports # Avoid unconditional slow import
from mypy.fixup import fixup_module
from mypy.modulefinder import (
BuildSource, compute_search_paths, FindModuleCache, SearchPaths, ModuleSearchResult,
ModuleNotFoundReason
BuildSource, BuildSourceSet, compute_search_paths, FindModuleCache, SearchPaths,
ModuleSearchResult, ModuleNotFoundReason
)
from mypy.nodes import Expression
from mypy.options import Options
Expand Down Expand Up @@ -107,33 +107,6 @@ def __init__(self, manager: 'BuildManager', graph: Graph) -> None:
self.errors: List[str] = [] # Filled in by build if desired


class BuildSourceSet:
"""Efficiently test a file's membership in the set of build sources."""

def __init__(self, sources: List[BuildSource]) -> None:
self.source_text_present = False
self.source_modules: Set[str] = set()
self.source_paths: Set[str] = set()

for source in sources:
if source.text is not None:
self.source_text_present = True
elif source.path:
self.source_paths.add(source.path)
else:
self.source_modules.add(source.module)

def is_source(self, file: MypyFile) -> bool:
if file.path and file.path in self.source_paths:
return True
elif file._fullname in self.source_modules:
return True
elif self.source_text_present:
return True
else:
return False


def build(sources: List[BuildSource],
options: Options,
alt_lib_path: Optional[str] = None,
Expand Down Expand Up @@ -630,7 +603,8 @@ def __init__(self, data_dir: str,
or options.use_fine_grained_cache)
and not has_reporters)
self.fscache = fscache
self.find_module_cache = FindModuleCache(self.search_paths, self.fscache, self.options)
self.find_module_cache = FindModuleCache(self.search_paths, self.fscache, self.options,
source_set=self.source_set)
self.metastore = create_metastore(options)

# a mapping from source files to their corresponding shadow files
Expand Down
4 changes: 4 additions & 0 deletions mypy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,10 @@ def add_invertible_flag(flag: str,
'--explicit-package-bases', default=False,
help="Use current directory and MYPYPATH to determine module names of files passed",
group=code_group)
add_invertible_flag(
'--fast-module-lookup', default=False,
help=argparse.SUPPRESS,
group=code_group)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this flag doesn't have good test coverage, I'd prefer to not advertise it in the mypy --help output, since it could easily break between releases. Instead, this could be discussed in the documentation, with some caveats about being an experimental feature.

You can use help=argparse.SUPPRESS for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you switch to help=argparse.SUPPRESS?

code_group.add_argument(
"--exclude",
action="append",
Expand Down
114 changes: 112 additions & 2 deletions mypy/modulefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from typing_extensions import Final, TypeAlias as _TypeAlias

from mypy.fscache import FileSystemCache
from mypy.nodes import MypyFile
from mypy.options import Options
from mypy.stubinfo import is_legacy_bundled_package
from mypy import pyinfo
Expand Down Expand Up @@ -126,6 +127,33 @@ def __repr__(self) -> str:
self.base_dir)


class BuildSourceSet:
"""Helper to efficiently test a file's membership in a set of build sources."""

def __init__(self, sources: List[BuildSource]) -> None:
self.source_text_present = False
self.source_modules = {} # type: Dict[str, str]
self.source_paths = set() # type: Set[str]

for source in sources:
if source.text is not None:
self.source_text_present = True
if source.path:
self.source_paths.add(source.path)
if source.module:
self.source_modules[source.module] = source.path or ''

def is_source(self, file: MypyFile) -> bool:
if file.path and file.path in self.source_paths:
return True
elif file._fullname in self.source_modules:
return True
elif self.source_text_present:
return True
else:
return False


class FindModuleCache:
"""Module finder with integrated cache.

Expand All @@ -141,8 +169,10 @@ def __init__(self,
search_paths: SearchPaths,
fscache: Optional[FileSystemCache],
options: Optional[Options],
stdlib_py_versions: Optional[StdlibVersions] = None) -> None:
stdlib_py_versions: Optional[StdlibVersions] = None,
source_set: Optional[BuildSourceSet] = None) -> None:
self.search_paths = search_paths
self.source_set = source_set
self.fscache = fscache or FileSystemCache()
# Cache for get_toplevel_possibilities:
# search_paths -> (toplevel_id -> list(package_dirs))
Expand All @@ -164,6 +194,53 @@ def clear(self) -> None:
self.initial_components.clear()
self.ns_ancestors.clear()

def find_module_via_source_set(self, id: str) -> Optional[ModuleSearchResult]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add docstring. Also mention that this is not used by default.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

"""Fast path to find modules by looking through the input sources

This is only used when --fast-module-lookup is passed on the command line."""
if not self.source_set:
return None

p = self.source_set.source_modules.get(id, None)
if p and self.fscache.isfile(p):
# We need to make sure we still have __init__.py all the way up
# otherwise we might have false positives compared to slow path
# in case of deletion of init files, which is covered by some tests.
# TODO: are there some combination of flags in which this check should be skipped?
d = os.path.dirname(p)
for _ in range(id.count('.')):
if not any(self.fscache.isfile(os.path.join(d, '__init__' + x))
for x in PYTHON_EXTENSIONS):
return None
d = os.path.dirname(d)
return p

idx = id.rfind('.')
if idx != -1:
# When we're looking for foo.bar.baz and can't find a matching module
# in the source set, look up for a foo.bar module.
parent = self.find_module_via_source_set(id[:idx])
if parent is None or not isinstance(parent, str):
return None

basename, ext = os.path.splitext(parent)
if (not any(parent.endswith('__init__' + x) for x in PYTHON_EXTENSIONS)
and (ext in PYTHON_EXTENSIONS and not self.fscache.isdir(basename))):
# If we do find such a *module* (and crucially, we don't want a package,
# hence the filtering out of __init__ files, and checking for the presence
# of a folder with a matching name), then we can be pretty confident that
# 'baz' will either be a top-level variable in foo.bar, or will not exist.
#
# Either way, spelunking in other search paths for another 'foo.bar.baz'
# module should be avoided because:
# 1. in the unlikely event that one were found, it's highly likely that
# it would be unrelated to the source being typechecked and therefore
# more likely to lead to erroneous results
# 2. as described in _find_module, in some cases the search itself could
# potentially waste significant amounts of time
return ModuleNotFoundReason.NOT_FOUND
return None

def find_lib_path_dirs(self, id: str, lib_path: Tuple[str, ...]) -> PackageDirs:
"""Find which elements of a lib_path have the directory a module needs to exist.

Expand Down Expand Up @@ -229,7 +306,7 @@ def find_module(self, id: str, *, fast_path: bool = False) -> ModuleSearchResult
elif top_level in self.stdlib_py_versions:
use_typeshed = self._typeshed_has_version(top_level)
self.results[id] = self._find_module(id, use_typeshed)
if (not fast_path
if (not (fast_path or (self.options is not None and self.options.fast_module_lookup))
and self.results[id] is ModuleNotFoundReason.NOT_FOUND
and self._can_find_module_in_parent_dir(id)):
self.results[id] = ModuleNotFoundReason.WRONG_WORKING_DIRECTORY
Expand Down Expand Up @@ -295,6 +372,39 @@ def _can_find_module_in_parent_dir(self, id: str) -> bool:
def _find_module(self, id: str, use_typeshed: bool) -> ModuleSearchResult:
fscache = self.fscache

# Fast path for any modules in the current source set.
# This is particularly important when there are a large number of search
# paths which share the first (few) component(s) due to the use of namespace
# packages, for instance:
# foo/
# company/
# __init__.py
# foo/
# bar/
# company/
# __init__.py
# bar/
# baz/
# company/
# __init__.py
# baz/
#
# mypy gets [foo/company/foo, bar/company/bar, baz/company/baz, ...] as input
# and computes [foo, bar, baz, ...] as the module search path.
#
# This would result in O(n) search for every import of company.*, leading to
# O(n**2) behavior in load_graph as such imports are unsurprisingly present
# at least once, and usually many more times than that, in each and every file
# being parsed.
#
# Thankfully, such cases are efficiently handled by looking up the module path
# via BuildSourceSet.
p = (self.find_module_via_source_set(id)
if (self.options is not None and self.options.fast_module_lookup)
else None)
if p:
return p

# If we're looking for a module like 'foo.bar.baz', it's likely that most of the
# many elements of lib_path don't even have a subdirectory 'foo/bar'. Discover
# that only once and cache it for when we look for modules like 'foo.bar.blah'
Expand Down
2 changes: 2 additions & 0 deletions mypy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ def __init__(self) -> None:
self.cache_map: Dict[str, Tuple[str, str]] = {}
# Don't properly free objects on exit, just kill the current process.
self.fast_exit = True
# fast path for finding modules from source set
self.fast_module_lookup = False
# Used to transform source code before parsing if not None
# TODO: Make the type precise (AnyStr -> AnyStr)
self.transform_source: Optional[Callable[[Any], Any]] = None
Expand Down
1 change: 1 addition & 0 deletions mypy/test/testcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
'check-multiple-inheritance.test',
'check-super.test',
'check-modules.test',
'check-modules-fast.test',
'check-typevar-values.test',
'check-unsupported.test',
'check-unreachable-code.test',
Expand Down
Loading