From a324d381da265637bb9b03e7c65c03f5bd1a239f Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 22 Feb 2018 11:48:58 -0800 Subject: [PATCH 01/16] Make BuildManager use an fscache --- mypy/build.py | 22 +++++++++++++++------- mypy/dmypy_server.py | 20 ++++++++++++++------ mypy/fscache.py | 3 ++- mypy/server/update.py | 4 +++- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 27f86681215b..ae71d95c62ba 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -53,6 +53,7 @@ from mypy.plugin import Plugin, DefaultPlugin, ChainedPlugin from mypy.defaults import PYTHON3_VERSION_MIN from mypy.server.deps import get_dependencies +from mypy.fscache import FileSystemCache # Switch to True to produce debug output related to fine-grained incremental @@ -142,6 +143,7 @@ def build(sources: List[BuildSource], bin_dir: Optional[str] = None, saved_cache: Optional[SavedCache] = None, flush_errors: Optional[Callable[[List[str], bool], None]] = None, + fscache: Optional[FileSystemCache] = None, ) -> BuildResult: """Analyze a program. @@ -165,6 +167,7 @@ def build(sources: List[BuildSource], directories; if omitted, use '.' as the data directory saved_cache: optional dict with saved cache state for dmypy (read-write!) flush_errors: optional function to flush errors after a file is processed + fscache: optionally a file-system cacher """ # If we were not given a flush_errors, we use one that will populate those @@ -177,7 +180,8 @@ def default_flush_errors(new_messages: List[str], is_serious: bool) -> None: flush_errors = flush_errors or default_flush_errors try: - result = _build(sources, options, alt_lib_path, bin_dir, saved_cache, flush_errors) + result = _build(sources, options, alt_lib_path, bin_dir, + saved_cache, flush_errors, fscache) result.errors = messages return result except CompileError as e: @@ -197,6 +201,7 @@ def _build(sources: List[BuildSource], bin_dir: Optional[str], saved_cache: Optional[SavedCache], flush_errors: Callable[[List[str], bool], None], + fscache: Optional[FileSystemCache], ) -> BuildResult: # This seems the most reasonable place to tune garbage collection. gc.set_threshold(50000) @@ -260,7 +265,8 @@ def _build(sources: List[BuildSource], plugin=plugin, errors=errors, saved_cache=saved_cache, - flush_errors=flush_errors) + flush_errors=flush_errors, + fscache=fscache) try: graph = dispatch(sources, manager) @@ -570,6 +576,7 @@ class BuildManager: saved_cache: Dict with saved cache state for coarse-grained dmypy (read-write!) stats: Dict with various instrumentation numbers + fscache: A file system cacher """ def __init__(self, data_dir: str, @@ -583,6 +590,7 @@ def __init__(self, data_dir: str, errors: Errors, flush_errors: Callable[[List[str], bool], None], saved_cache: Optional[SavedCache] = None, + fscache: Optional[FileSystemCache] = None, ) -> None: self.start_time = time.time() self.data_dir = data_dir @@ -608,6 +616,7 @@ def __init__(self, data_dir: str, self.flush_errors = flush_errors self.saved_cache = saved_cache if saved_cache is not None else {} # type: SavedCache self.stats = {} # type: Dict[str, Any] # Values are ints or floats + self.fscache = fscache or FileSystemCache(self.options.python_version) def maybe_swap_for_shadow_path(self, path: str) -> str: if (self.options.shadow_file and @@ -616,7 +625,7 @@ def maybe_swap_for_shadow_path(self, path: str) -> str: return path def get_stat(self, path: str) -> os.stat_result: - return os.stat(self.maybe_swap_for_shadow_path(path)) + return self.fscache.stat(self.maybe_swap_for_shadow_path(path)) def all_imported_modules_in_file(self, file: MypyFile) -> List[Tuple[int, str, int]]: @@ -1167,8 +1176,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], mtime = int(st.st_mtime) if mtime != meta.mtime or path != meta.path: - with open(path, 'rb') as f: - source_hash = hashlib.md5(f.read()).hexdigest() + source_hash = manager.fscache.md5(path) if source_hash != meta.hash: manager.log('Metadata abandoned for {}: file {} has different hash'.format(id, path)) return None @@ -1872,8 +1880,8 @@ def parse_file(self) -> None: if self.path and source is None: try: path = manager.maybe_swap_for_shadow_path(self.path) - source, self.source_hash = read_with_python_encoding( - path, self.options.python_version) + source = manager.fscache.read_with_python_encoding(path) + self.source_hash = manager.fscache.md5(path) except IOError as ioerr: raise CompileError([ "mypy: can't read file '{}': {}".format(self.path, ioerr.strerror)]) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index a26a256fb011..b736ca5d2cad 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -251,8 +251,14 @@ def check_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str, return self.fine_grained_increment(sources) def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]: - self.fscache = FileSystemCache(self.options.python_version) - self.fswatcher = FileSystemWatcher(self.fscache) + # The file system cache we create gets passed off to + # BuildManager, and thence to FineGrainedBuildManager, which + # assumes responsibility for clearing it at the appropriate + # times (after init and update()). + # We also need to clear it ourselves sometimes, when we don't invoke + # update, which is unfortunate. + fscache = FileSystemCache(self.options.python_version) + self.fswatcher = FileSystemWatcher(fscache) self.update_sources(sources) if not self.options.use_fine_grained_cache: # Stores the initial state of sources as a side effect. @@ -260,7 +266,8 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict try: # TODO: alt_lib_path result = mypy.build.build(sources=sources, - options=self.options) + options=self.options, + fscache=fscache) except mypy.errors.CompileError as e: output = ''.join(s + '\n' for s in e.messages) if e.use_stdout: @@ -274,7 +281,7 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(manager, graph) self.fine_grained_initialized = True self.previous_sources = sources - self.fscache.flush() + #self.fscache.flush() # If we are using the fine-grained cache, build hasn't actually done # the typechecking on the updated files yet. @@ -294,7 +301,8 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict changed = self.find_changed(sources) if changed: messages = self.fine_grained_manager.update(changed) - self.fscache.flush() + else: + self.fine_grained_manager.manager.fscache.flush() # XXX: sigh status = 1 if messages else 0 self.previous_messages = messages[:] @@ -308,6 +316,7 @@ def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[ if not changed: # Nothing changed -- just produce the same result as before. messages = self.previous_messages + self.fine_grained_manager.manager.fscache.flush() # XXX: sigh else: messages = self.fine_grained_manager.update(changed) t2 = time.time() @@ -317,7 +326,6 @@ def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[ status = 1 if messages else 0 self.previous_messages = messages[:] self.previous_sources = sources - self.fscache.flush() return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status} def update_sources(self, sources: List[mypy.build.BuildSource]) -> None: diff --git a/mypy/fscache.py b/mypy/fscache.py index 947c697b38c3..39d07813547c 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -32,7 +32,6 @@ import stat from typing import Tuple, Dict, List -from mypy.build import read_with_python_encoding from mypy.errors import DecodeError @@ -52,6 +51,8 @@ def flush(self) -> None: self.listdir_error_cache = {} # type: Dict[str, Exception] def read_with_python_encoding(self, path: str) -> str: + from mypy.build import read_with_python_encoding + if path in self.read_cache: return self.read_cache[path] if path in self.read_error_cache: diff --git a/mypy/server/update.py b/mypy/server/update.py index a1c60655dae8..a6b360471ead 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -173,6 +173,7 @@ def __init__(self, manager.saved_cache = {} # Active triggers during the last update self.triggered = [] # type: List[str] + self.manager.fscache.flush() def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]: """Update previous build result by processing changed modules. @@ -225,8 +226,9 @@ def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]: if blocker: self.blocking_error = (next_id, next_path) self.stale = changed_modules - return messages + break + self.manager.fscache.flush() return messages def update_single(self, module: str, path: str) -> Tuple[List[str], From 6aa62e5d4f2211601d485da2e8fa61372e1edb01 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 22 Feb 2018 13:07:30 -0800 Subject: [PATCH 02/16] Some cleanups --- mypy/dmypy_server.py | 22 +++------------------- mypy/server/update.py | 19 ++++++++++++------- mypy/test/testfinegrained.py | 21 +++++++++++---------- mypy/test/testmerge.py | 20 ++++++++++---------- 4 files changed, 36 insertions(+), 46 deletions(-) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index b736ca5d2cad..da0edff7240c 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -255,8 +255,6 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict # BuildManager, and thence to FineGrainedBuildManager, which # assumes responsibility for clearing it at the appropriate # times (after init and update()). - # We also need to clear it ourselves sometimes, when we don't invoke - # update, which is unfortunate. fscache = FileSystemCache(self.options.python_version) self.fswatcher = FileSystemWatcher(fscache) self.update_sources(sources) @@ -276,12 +274,9 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict out, err = '', output return {'out': out, 'err': err, 'status': 2} messages = result.errors - manager = result.manager - graph = result.graph - self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(manager, graph) + self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(result) self.fine_grained_initialized = True self.previous_sources = sources - #self.fscache.flush() # If we are using the fine-grained cache, build hasn't actually done # the typechecking on the updated files yet. @@ -298,14 +293,9 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash)) # Run an update - changed = self.find_changed(sources) - if changed: - messages = self.fine_grained_manager.update(changed) - else: - self.fine_grained_manager.manager.fscache.flush() # XXX: sigh + messages = self.fine_grained_manager.update(self.find_changed(sources)) status = 1 if messages else 0 - self.previous_messages = messages[:] return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status} def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]: @@ -313,18 +303,12 @@ def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[ self.update_sources(sources) changed = self.find_changed(sources) t1 = time.time() - if not changed: - # Nothing changed -- just produce the same result as before. - messages = self.previous_messages - self.fine_grained_manager.manager.fscache.flush() # XXX: sigh - else: - messages = self.fine_grained_manager.update(changed) + messages = self.fine_grained_manager.update(changed) t2 = time.time() self.fine_grained_manager.manager.log( "fine-grained increment: find_changed: {:.3f}s, update: {:.3f}s".format( t1 - t0, t2 - t1)) status = 1 if messages else 0 - self.previous_messages = messages[:] self.previous_sources = sources return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status} diff --git a/mypy/server/update.py b/mypy/server/update.py index a6b360471ead..eef6cb8bd43f 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -121,7 +121,7 @@ from typing import Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple from mypy.build import ( - BuildManager, State, BuildSource, Graph, load_graph, find_module_clear_caches, + BuildManager, State, BuildSource, BuildResult, Graph, load_graph, find_module_clear_caches, PRI_INDIRECT, DEBUG_FINE_GRAINED, ) from mypy.checker import DeferredNode @@ -146,21 +146,23 @@ class FineGrainedBuildManager: - def __init__(self, - manager: BuildManager, - graph: Graph) -> None: + def __init__(self, result: BuildResult) -> None: """Initialize fine-grained build based on a batch build. Args: + result: Result from the initialized build. + The manager and graph will be taken over by this class. manager: State of the build (mutated by this class) graph: Additional state of the build (only read to initialize state) """ + manager = result.manager self.manager = manager + self.graph = result.graph self.options = manager.options self.previous_modules = get_module_to_path_map(manager) - self.deps = get_all_dependencies(manager, graph, self.options) + self.deps = get_all_dependencies(manager, self.graph, self.options) self.previous_targets_with_errors = manager.errors.targets() - self.graph = graph + self.previous_messages = result.errors[:] # Module, if any, that had blocking errors in the last run as (id, path) tuple. # TODO: Handle blocking errors in the initial build self.blocking_error = None # type: Optional[Tuple[str, str]] @@ -192,7 +194,9 @@ def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]: Returns: A list of errors. """ - assert changed_modules, 'No changed modules' + if not changed_modules: + self.manager.fscache.flush() + return self.previous_messages # Reset global caches for the new build. find_module_clear_caches() @@ -229,6 +233,7 @@ def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]: break self.manager.fscache.flush() + self.previous_messages = messages[:] return messages def update_single(self, module: str, path: str) -> Tuple[List[str], diff --git a/mypy/test/testfinegrained.py b/mypy/test/testfinegrained.py index f102d6571da2..83512cf0837f 100644 --- a/mypy/test/testfinegrained.py +++ b/mypy/test/testfinegrained.py @@ -14,7 +14,7 @@ from typing import List, Tuple, Dict, Optional, Set from mypy import build -from mypy.build import BuildManager, BuildSource, Graph +from mypy.build import BuildManager, BuildSource, BuildResult, Graph from mypy.errors import Errors, CompileError from mypy.nodes import Node, MypyFile, SymbolTable, SymbolTableNode, TypeInfo, Expression from mypy.options import Options @@ -76,16 +76,17 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: main_src = '\n'.join(testcase.input) sources_override = self.parse_sources(main_src) - messages, manager, graph = self.build(main_src, testcase, sources_override, - build_cache=self.use_cache, - enable_cache=self.use_cache) + result = self.build(main_src, testcase, sources_override, + build_cache=self.use_cache, + enable_cache=self.use_cache) + messages = result.errors a = [] if messages: a.extend(normalize_messages(messages)) fine_grained_manager = None if not self.use_cache: - fine_grained_manager = FineGrainedBuildManager(manager, graph) + fine_grained_manager = FineGrainedBuildManager(result) if CHECK_CONSISTENCY: check_consistency(fine_grained_manager) @@ -110,9 +111,9 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: # If this is the second iteration and we are using a # cache, now we need to set it up if fine_grained_manager is None: - messages, manager, graph = self.build(main_src, testcase, sources_override, - build_cache=False, enable_cache=True) - fine_grained_manager = FineGrainedBuildManager(manager, graph) + result = self.build(main_src, testcase, sources_override, + build_cache=False, enable_cache=True) + fine_grained_manager = FineGrainedBuildManager(result) new_messages = fine_grained_manager.update(modules) if CHECK_CONSISTENCY: @@ -143,7 +144,7 @@ def build(self, testcase: DataDrivenTestCase, sources_override: Optional[List[Tuple[str, str]]], build_cache: bool, - enable_cache: bool) -> Tuple[List[str], BuildManager, Graph]: + enable_cache: bool) -> BuildResult: # This handles things like '# flags: --foo'. options = parse_options(source, testcase, incremental_step=1) options.incremental = True @@ -170,7 +171,7 @@ def build(self, # TODO: We need a manager and a graph in this case as well assert False, str('\n'.join(e.messages)) return e.messages, None, None - return result.errors, result.manager, result.graph + return result def format_triggered(self, triggered: List[List[str]]) -> List[str]: result = [] diff --git a/mypy/test/testmerge.py b/mypy/test/testmerge.py index d2feece42cac..50d9d97a8a76 100644 --- a/mypy/test/testmerge.py +++ b/mypy/test/testmerge.py @@ -5,7 +5,7 @@ from typing import List, Tuple, Dict, Optional from mypy import build -from mypy.build import BuildManager, BuildSource, State, Graph +from mypy.build import BuildManager, BuildSource, BuildResult, State, Graph from mypy.errors import Errors, CompileError from mypy.nodes import ( Node, MypyFile, SymbolTable, SymbolTableNode, TypeInfo, Expression, Var, TypeVarExpr, @@ -67,19 +67,19 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: kind = AST main_src = '\n'.join(testcase.input) - messages, manager, graph = self.build(main_src) - assert manager is not None, 'cases where CompileError occurred should not be run' - fine_grained_manager = FineGrainedBuildManager(manager, graph) + result = self.build(main_src) + assert result is not None, 'cases where CompileError occurred should not be run' + fine_grained_manager = FineGrainedBuildManager(result) a = [] - if messages: - a.extend(messages) + if result.errors: + a.extend(result.errors) target_path = os.path.join(test_temp_dir, 'target.py') shutil.copy(os.path.join(test_temp_dir, 'target.py.next'), target_path) a.extend(self.dump(fine_grained_manager, kind)) - old_subexpr = get_subexpressions(manager.modules['target']) + old_subexpr = get_subexpressions(result.manager.modules['target']) a.append('==>') @@ -100,7 +100,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: 'Invalid output ({}, line {})'.format(testcase.file, testcase.line)) - def build(self, source: str) -> Tuple[List[str], Optional[BuildManager], Dict[str, State]]: + def build(self, source: str) -> Optional[BuildResult]: options = Options() options.incremental = True options.fine_grained_incremental = True @@ -115,8 +115,8 @@ def build(self, source: str) -> Tuple[List[str], Optional[BuildManager], Dict[st alt_lib_path=test_temp_dir) except CompileError as e: # TODO: Is it okay to return None? - return e.messages, None, {} - return result.errors, result.manager, result.graph + return None + return result def build_increment(self, manager: FineGrainedBuildManager, module_id: str, path: str) -> Tuple[MypyFile, From a8d8134f5ccb645187000b15a67f406b973a6798 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 22 Feb 2018 14:48:17 -0800 Subject: [PATCH 03/16] Make update.py use fscache --- mypy/server/update.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mypy/server/update.py b/mypy/server/update.py index eef6cb8bd43f..77c1e182fef6 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -114,10 +114,8 @@ Major todo items: - Fully support multiple type checking passes -- Use mypy.fscache to access file system """ -import os.path from typing import Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple from mypy.build import ( @@ -132,6 +130,7 @@ ) from mypy.options import Options from mypy.types import Type +from mypy.fscache import FileSystemCache from mypy.server.astdiff import ( snapshot_symbol_table, compare_symbol_table_snapshots, is_identical_type, SnapshotItem ) @@ -352,7 +351,7 @@ def update_single_isolated(module: str, manager.log_fine_grained('new module %r' % module) old_modules = dict(manager.modules) - sources = get_sources(previous_modules, [(module, path)]) + sources = get_sources(manager.fscache, previous_modules, [(module, path)]) if module in manager.missing_modules: manager.missing_modules.remove(module) @@ -376,7 +375,7 @@ def update_single_isolated(module: str, remaining_modules = [] return BlockedUpdate(err.module_with_blocker, path, remaining_modules, err.messages) - if not os.path.isfile(path): + if not manager.fscache.isfile(path): delete_module(module, graph, manager) return NormalUpdate(module, path, [], None) @@ -506,13 +505,14 @@ def get_module_to_path_map(manager: BuildManager) -> Dict[str, str]: for module, node in manager.modules.items()} -def get_sources(modules: Dict[str, str], +def get_sources(fscache: FileSystemCache, + modules: Dict[str, str], changed_modules: List[Tuple[str, str]]) -> List[BuildSource]: # TODO: Race condition when reading from the file system; we should only read each # bit of external state once during a build to have a consistent view of the world sources = [] for id, path in changed_modules: - if os.path.isfile(path): + if fscache.isfile(path): sources.append(BuildSource(path, id, None)) return sources From c9d1d5839374bc89fee4cf56aec67eff2bfc05c8 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 22 Feb 2018 14:57:47 -0800 Subject: [PATCH 04/16] Move read_with_python_encoding --- mypy/build.py | 40 ++------------------------------------- mypy/errors.py | 7 ------- mypy/fscache.py | 5 +---- mypy/server/update.py | 1 + mypy/util.py | 44 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 49 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index ae71d95c62ba..e2ac21050c7c 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -39,10 +39,10 @@ from mypy.semanal_pass3 import SemanticAnalyzerPass3 from mypy.checker import TypeChecker from mypy.indirection import TypeIndirectionVisitor -from mypy.errors import Errors, CompileError, DecodeError, report_internal_error +from mypy.errors import Errors, CompileError, report_internal_error +from mypy.util import DecodeError from mypy.report import Reports from mypy import moduleinfo -from mypy import util from mypy.fixup import fixup_module_pass_one, fixup_module_pass_two from mypy.nodes import Expression from mypy.options import Options @@ -971,42 +971,6 @@ def verify_module(id: str, path: str) -> bool: return True -def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> Tuple[str, str]: - """Read the Python file with while obeying PEP-263 encoding detection. - - Returns: - A tuple: the source as a string, and the hash calculated from the binary representation. - """ - source_bytearray = bytearray() - encoding = 'utf8' if pyversion[0] >= 3 else 'ascii' - - with open(path, 'rb') as f: - # read first two lines and check if PEP-263 coding is present - source_bytearray.extend(f.readline()) - source_bytearray.extend(f.readline()) - m = hashlib.md5(source_bytearray) - - # check for BOM UTF-8 encoding and strip it out if present - if source_bytearray.startswith(b'\xef\xbb\xbf'): - encoding = 'utf8' - source_bytearray = source_bytearray[3:] - else: - _encoding, _ = util.find_python_encoding(source_bytearray, pyversion) - # check that the coding isn't mypy. We skip it since - # registering may not have happened yet - if _encoding != 'mypy': - encoding = _encoding - - remainder = f.read() - m.update(remainder) - source_bytearray.extend(remainder) - try: - source_text = source_bytearray.decode(encoding) - except LookupError as lookuperr: - raise DecodeError(str(lookuperr)) - return source_text, m.hexdigest() - - def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str]: """Return the file names for the cache files. diff --git a/mypy/errors.py b/mypy/errors.py index b1ec7ab1ece6..77abe1dc855f 100644 --- a/mypy/errors.py +++ b/mypy/errors.py @@ -605,13 +605,6 @@ def __init__(self, self.module_with_blocker = module_with_blocker -class DecodeError(Exception): - """Exception raised when a file cannot be decoded due to an unknown encoding type. - - Essentially a wrapper for the LookupError raised by `bytearray.decode` - """ - - def remove_path_prefix(path: str, prefix: str) -> str: """If path starts with prefix, return copy of path with the prefix removed. Otherwise, return path. If path is None, return None. diff --git a/mypy/fscache.py b/mypy/fscache.py index 39d07813547c..0dd3f334a9cc 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -31,8 +31,7 @@ import os import stat from typing import Tuple, Dict, List - -from mypy.errors import DecodeError +from mypy.util import read_with_python_encoding class FileSystemCache: @@ -51,8 +50,6 @@ def flush(self) -> None: self.listdir_error_cache = {} # type: Dict[str, Exception] def read_with_python_encoding(self, path: str) -> str: - from mypy.build import read_with_python_encoding - if path in self.read_cache: return self.read_cache[path] if path in self.read_error_cache: diff --git a/mypy/server/update.py b/mypy/server/update.py index 77c1e182fef6..e5b2db916f3a 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -116,6 +116,7 @@ - Fully support multiple type checking passes """ +import os from typing import Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple from mypy.build import ( diff --git a/mypy/util.py b/mypy/util.py index a516041ec06c..7a37b1a1b7e3 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -2,6 +2,7 @@ import re import subprocess +import hashlib from xml.sax.saxutils import escape from typing import TypeVar, List, Tuple, Optional, Sequence, Dict @@ -60,6 +61,49 @@ def find_python_encoding(text: bytes, pyversion: Tuple[int, int]) -> Tuple[str, return default_encoding, -1 +class DecodeError(Exception): + """Exception raised when a file cannot be decoded due to an unknown encoding type. + + Essentially a wrapper for the LookupError raised by `bytearray.decode` + """ + + +def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> Tuple[str, str]: + """Read the Python file with while obeying PEP-263 encoding detection. + + Returns: + A tuple: the source as a string, and the hash calculated from the binary representation. + """ + source_bytearray = bytearray() + encoding = 'utf8' if pyversion[0] >= 3 else 'ascii' + + with open(path, 'rb') as f: + # read first two lines and check if PEP-263 coding is present + source_bytearray.extend(f.readline()) + source_bytearray.extend(f.readline()) + m = hashlib.md5(source_bytearray) + + # check for BOM UTF-8 encoding and strip it out if present + if source_bytearray.startswith(b'\xef\xbb\xbf'): + encoding = 'utf8' + source_bytearray = source_bytearray[3:] + else: + _encoding, _ = find_python_encoding(source_bytearray, pyversion) + # check that the coding isn't mypy. We skip it since + # registering may not have happened yet + if _encoding != 'mypy': + encoding = _encoding + + remainder = f.read() + m.update(remainder) + source_bytearray.extend(remainder) + try: + source_text = source_bytearray.decode(encoding) + except LookupError as lookuperr: + raise DecodeError(str(lookuperr)) + return source_text, m.hexdigest() + + _python2_interpreter = None # type: Optional[str] From dcb7aa96106874eede386b10f2ac907ed4010661 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 22 Feb 2018 15:59:48 -0800 Subject: [PATCH 05/16] Huge rampage through get_module stuff --- mypy/build.py | 136 +++++++++++++---------------------------- mypy/fscache.py | 45 ++++++++++++-- mypy/main.py | 3 +- mypy/server/update.py | 6 +- mypy/stubgen.py | 3 +- mypy/test/testcheck.py | 6 +- mypy/test/testdmypy.py | 3 +- 7 files changed, 91 insertions(+), 111 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index e2ac21050c7c..f873c655650f 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -208,8 +208,6 @@ def _build(sources: List[BuildSource], data_dir = default_data_dir(bin_dir) - find_module_clear_caches() - # Determine the default module search path. lib_path = default_lib_path(data_dir, options.python_version, @@ -550,6 +548,23 @@ def find_config_file_line_number(path: str, section: str, setting_name: str) -> return -1 +class FindModuleCache: + def __init__(self, fscache: Optional[FileSystemCache] = None) -> None: + self.fscache = fscache or FileSystemCache(None) + # Cache find_module: (id, lib_path) -> result. + self.results = {} # type: Dict[Tuple[str, Tuple[str, ...]], Optional[str]] + + # Cache some repeated work within distinct find_module calls: finding which + # elements of lib_path have even the subdirectory they'd need for the module + # to exist. This is shared among different module ids when they differ only + # in the last component. + self.dirs = {} # type: Dict[Tuple[str, Tuple[str, ...]], List[str]] + + def clear(self) -> None: + self.results.clear() + self.dirs.clear() + + class BuildManager: """This class holds shared state for building a mypy program. @@ -617,6 +632,7 @@ def __init__(self, data_dir: str, self.saved_cache = saved_cache if saved_cache is not None else {} # type: SavedCache self.stats = {} # type: Dict[str, Any] # Values are ints or floats self.fscache = fscache or FileSystemCache(self.options.python_version) + self.find_module_cache = FindModuleCache(self.fscache) def maybe_swap_for_shadow_path(self, path: str) -> str: if (self.options.shadow_file and @@ -698,7 +714,7 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str: def is_module(self, id: str) -> bool: """Is there a file in the file system corresponding to module id?""" - return find_module(id, self.lib_path) is not None + return find_module(self.find_module_cache, id, self.lib_path) is not None def parse_file(self, id: str, path: str, source: str, ignore_errors: bool) -> MypyFile: """Parse the source of a file with the given name. @@ -811,74 +827,10 @@ def remove_cwd_prefix_from_path(p: str) -> str: return p -# Cache find_module: (id, lib_path) -> result. -find_module_cache = {} # type: Dict[Tuple[str, Tuple[str, ...]], Optional[str]] - -# Cache some repeated work within distinct find_module calls: finding which -# elements of lib_path have even the subdirectory they'd need for the module -# to exist. This is shared among different module ids when they differ only -# in the last component. -find_module_dir_cache = {} # type: Dict[Tuple[str, Tuple[str, ...]], List[str]] - -# Cache directory listings. We assume that while one os.listdir() -# call may be more expensive than one os.stat() call, a small number -# of os.stat() calls is quickly more expensive than caching the -# os.listdir() outcome, and the advantage of the latter is that it -# gives us the case-correct filename on Windows and Mac. -find_module_listdir_cache = {} # type: Dict[str, Optional[List[str]]] - -# Cache for is_file() -find_module_is_file_cache = {} # type: Dict[str, bool] - -# Cache for isdir(join(head, tail)) -find_module_isdir_cache = {} # type: Dict[Tuple[str, str], bool] - - -def find_module_clear_caches() -> None: - find_module_cache.clear() - find_module_dir_cache.clear() - find_module_listdir_cache.clear() - find_module_is_file_cache.clear() - find_module_isdir_cache.clear() - - -def list_dir(path: str) -> Optional[List[str]]: - """Return a cached directory listing. - - Returns None if the path doesn't exist or isn't a directory. - """ - res = find_module_listdir_cache.get(path) - if res is None: - try: - res = os.listdir(path) - except OSError: - res = None - find_module_listdir_cache[path] = res - return res - - -def is_file(path: str) -> bool: - """Return whether path exists and is a file. - - On case-insensitive filesystems (like Mac or Windows) this returns - False if the case of the path's last component does not exactly - match the case found in the filesystem. - """ - res = find_module_is_file_cache.get(path) - if res is None: - head, tail = os.path.split(path) - if not tail: - res = False - else: - names = list_dir(head) - res = names is not None and tail in names and os.path.isfile(path) - find_module_is_file_cache[path] = res - return res - - -def find_module(id: str, lib_path_arg: Iterable[str]) -> Optional[str]: +def find_module(cache: FindModuleCache, id: str, lib_path_arg: Iterable[str]) -> Optional[str]: """Return the path of the module source file, or None if not found.""" lib_path = tuple(lib_path_arg) + fscache = cache.fscache def find() -> Optional[str]: # If we're looking for a module like 'foo.bar.baz', it's likely that most of the @@ -887,19 +839,15 @@ def find() -> Optional[str]: # that will require the same subdirectory. components = id.split('.') dir_chain = os.sep.join(components[:-1]) # e.g., 'foo/bar' - if (dir_chain, lib_path) not in find_module_dir_cache: + if (dir_chain, lib_path) not in cache.dirs: dirs = [] for pathitem in lib_path: # e.g., '/usr/lib/python3.4/foo/bar' - isdir = find_module_isdir_cache.get((pathitem, dir_chain)) - if isdir is None: - dir = os.path.normpath(os.path.join(pathitem, dir_chain)) - isdir = os.path.isdir(dir) - find_module_isdir_cache[pathitem, dir_chain] = isdir - if isdir: + dir = os.path.normpath(os.path.join(pathitem, dir_chain)) + if fscache.isdir(dir): dirs.append(dir) - find_module_dir_cache[dir_chain, lib_path] = dirs - candidate_base_dirs = find_module_dir_cache[dir_chain, lib_path] + cache.dirs[dir_chain, lib_path] = dirs + candidate_base_dirs = cache.dirs[dir_chain, lib_path] # If we're looking for a module like 'foo.bar.baz', then candidate_base_dirs now # contains just the subdirectories 'foo/bar' that actually exist under the @@ -912,23 +860,24 @@ def find() -> Optional[str]: # Prefer package over module, i.e. baz/__init__.py* over baz.py*. for extension in PYTHON_EXTENSIONS: path = base_path + sepinit + extension - if is_file(path) and verify_module(id, path): + if fscache.isfile_case(path) and verify_module(fscache, id, path): return path # No package, look for module. for extension in PYTHON_EXTENSIONS: path = base_path + extension - if is_file(path) and verify_module(id, path): + if fscache.isfile_case(path) and verify_module(fscache, id, path): return path return None key = (id, lib_path) - if key not in find_module_cache: - find_module_cache[key] = find() - return find_module_cache[key] + if key not in cache.results: + cache.results[key] = find() + return cache.results[key] -def find_modules_recursive(module: str, lib_path: List[str]) -> List[BuildSource]: - module_path = find_module(module, lib_path) +def find_modules_recursive(cache: FindModuleCache, + module: str, lib_path: List[str]) -> List[BuildSource]: + module_path = find_module(cache, module, lib_path) if not module_path: return [] result = [BuildSource(module_path, module, None)] @@ -948,24 +897,24 @@ def find_modules_recursive(module: str, lib_path: List[str]) -> List[BuildSource (os.path.isfile(os.path.join(abs_path, '__init__.py')) or os.path.isfile(os.path.join(abs_path, '__init__.pyi'))): hits.add(item) - result += find_modules_recursive(module + '.' + item, lib_path) + result += find_modules_recursive(cache, module + '.' + item, lib_path) elif item != '__init__.py' and item != '__init__.pyi' and \ item.endswith(('.py', '.pyi')): mod = item.split('.')[0] if mod not in hits: hits.add(mod) result += find_modules_recursive( - module + '.' + mod, lib_path) + cache, module + '.' + mod, lib_path) return result -def verify_module(id: str, path: str) -> bool: +def verify_module(fscache: FileSystemCache, id: str, path: str) -> bool: """Check that all packages containing id have a __init__ file.""" if path.endswith(('__init__.py', '__init__.pyi')): path = dirname(path) for i in range(id.count('.')): path = dirname(path) - if not any(is_file(os.path.join(path, '__init__{}'.format(extension))) + if not any(fscache.isfile_case(os.path.join(path, '__init__{}'.format(extension))) for extension in PYTHON_EXTENSIONS): return False return True @@ -1578,7 +1527,7 @@ def __init__(self, # difference and just assume 'builtins' everywhere, # which simplifies code. file_id = '__builtin__' - path = find_module(file_id, manager.lib_path) + path = find_module(manager.find_module_cache, file_id, manager.lib_path) if path: # For non-stubs, look at options.follow_imports: # - normal (default) -> fully analyze @@ -2072,11 +2021,8 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: stubs_found=sum(g.path is not None and g.path.endswith('.pyi') for g in graph.values()), graph_load_time=(t1 - t0), - fm_cache_size=len(find_module_cache), - fm_dir_cache_size=len(find_module_dir_cache), - fm_listdir_cache_size=len(find_module_listdir_cache), - fm_is_file_cache_size=len(find_module_is_file_cache), - fm_isdir_cache_size=len(find_module_isdir_cache), + fm_cache_size=len(manager.find_module_cache.results), + fm_dir_cache_size=len(manager.find_module_cache.dirs), ) if not graph: print("Nothing to do?!") diff --git a/mypy/fscache.py b/mypy/fscache.py index 0dd3f334a9cc..221d4dbb70f3 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -30,15 +30,19 @@ import os import stat -from typing import Tuple, Dict, List +from typing import Tuple, Dict, List, Optional from mypy.util import read_with_python_encoding class FileSystemCache: - def __init__(self, pyversion: Tuple[int, int]) -> None: + def __init__(self, pyversion: Optional[Tuple[int, int]] = None) -> None: self.pyversion = pyversion self.flush() + def set_pyversion(self, pyversion: Tuple[int, int]) -> None: + assert not self.read_cache + self.pyversion = pyversion + def flush(self) -> None: """Start another transaction and empty all caches.""" self.stat_cache = {} # type: Dict[str, os.stat_result] @@ -48,8 +52,10 @@ def flush(self) -> None: self.hash_cache = {} # type: Dict[str, str] self.listdir_cache = {} # type: Dict[str, List[str]] self.listdir_error_cache = {} # type: Dict[str, Exception] + self.isfile_case_cache = {} # type: Dict[str, bool] def read_with_python_encoding(self, path: str) -> str: + assert self.pyversion if path in self.read_cache: return self.read_cache[path] if path in self.read_error_cache: @@ -95,12 +101,39 @@ def listdir(self, path: str) -> List[str]: return results def isfile(self, path: str) -> bool: - st = self.stat(path) - return stat.S_ISREG(st.st_mode) + try: + st = self.stat(path) + return stat.S_ISREG(st.st_mode) + except OSError: + return False + + def isfile_case(self, path: str) -> bool: + """Return whether path exists and is a file. + + On case-insensitive filesystems (like Mac or Windows) this returns + False if the case of the path's last component does not exactly + match the case found in the filesystem. + """ + if path in self.isfile_case_cache: + return self.isfile_case_cache[path] + head, tail = os.path.split(path) + if not tail: + res = False + else: + try: + names = self.listdir(head) + res = tail in names and self.isfile(path) + except OSError: + res = False + self.isfile_case_cache[path] = res + return res def isdir(self, path: str) -> bool: - st = self.stat(path) - return stat.S_ISDIR(st.st_mode) + try: + st = self.stat(path) + return stat.S_ISDIR(st.st_mode) + except OSError: + return False def exists(self, path: str) -> bool: try: diff --git a/mypy/main.py b/mypy/main.py index b7c1117ea029..0c5f110f97c9 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -527,7 +527,8 @@ def add_invertible_flag(flag: str, .format(special_opts.package)) options.build_type = BuildType.MODULE lib_path = [os.getcwd()] + build.mypy_path() - targets = build.find_modules_recursive(special_opts.package, lib_path) + targets = build.find_modules_recursive(build.FindModuleCache(), + special_opts.package, lib_path) if not targets: fail("Can't find package '{}'".format(special_opts.package)) return targets, options diff --git a/mypy/server/update.py b/mypy/server/update.py index e5b2db916f3a..6c80cb9ee838 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -120,7 +120,7 @@ from typing import Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple from mypy.build import ( - BuildManager, State, BuildSource, BuildResult, Graph, load_graph, find_module_clear_caches, + BuildManager, State, BuildSource, BuildResult, Graph, load_graph, PRI_INDIRECT, DEBUG_FINE_GRAINED, ) from mypy.checker import DeferredNode @@ -198,8 +198,8 @@ def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]: self.manager.fscache.flush() return self.previous_messages - # Reset global caches for the new build. - find_module_clear_caches() + # Reset find_module's caches for the new build. + self.manager.find_module_cache.clear() self.triggered = [] changed_modules = dedupe_modules(changed_modules + self.stale) diff --git a/mypy/stubgen.py b/mypy/stubgen.py index bcb704e0a168..0495ef2e1e96 100755 --- a/mypy/stubgen.py +++ b/mypy/stubgen.py @@ -156,7 +156,8 @@ def find_module_path_and_all(module: str, pyversion: Tuple[int, int], module_all = getattr(mod, '__all__', None) else: # Find module by going through search path. - module_path = mypy.build.find_module(module, ['.'] + search_path) + module_path = mypy.build.find_module(mypy.build.FindModuleCache(), + module, ['.'] + search_path) if not module_path: raise SystemExit( "Can't find module '{}' (consider using --search-path)".format(module)) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 0b1f1573760e..7bcddc36e5e5 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -7,7 +7,7 @@ from typing import Dict, List, Optional, Set, Tuple from mypy import build, defaults -from mypy.build import BuildSource, find_module_clear_caches +from mypy.build import BuildSource from mypy.test.config import test_temp_dir from mypy.test.data import DataDrivenTestCase, DataSuite from mypy.test.helpers import ( @@ -113,7 +113,6 @@ def clear_cache(self) -> None: shutil.rmtree(dn) def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int = 0) -> None: - find_module_clear_caches() original_program_text = '\n'.join(testcase.input) module_data = self.parse_module(original_program_text, incremental_step) @@ -312,7 +311,8 @@ def parse_module(self, module_names = m.group(1) out = [] for module_name in module_names.split(' '): - path = build.find_module(module_name, [test_temp_dir]) + path = build.find_module(build.FindModuleCache(), + module_name, [test_temp_dir]) assert path is not None, "Can't find ad hoc case file" with open(path) as f: program_text = f.read() diff --git a/mypy/test/testdmypy.py b/mypy/test/testdmypy.py index 5a5cd80ddcc8..a79ba80daafe 100644 --- a/mypy/test/testdmypy.py +++ b/mypy/test/testdmypy.py @@ -81,7 +81,6 @@ def clear_cache(self) -> None: def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int) -> None: assert incremental_step >= 1 - build.find_module_clear_caches() original_program_text = '\n'.join(testcase.input) if incremental_step > 1: @@ -271,7 +270,7 @@ def parse_module(self, module_names = m.group(1) out = [] # type: List[Tuple[str, str, Optional[str]]] for module_name in module_names.split(' '): - path = build.find_module(module_name, [test_temp_dir]) + path = build.find_module(build.FindModuleCache(), module_name, [test_temp_dir]) if path is None and module_name.startswith(NON_EXISTENT_PREFIX): # This is a special name for a file that we don't want to exist. assert '.' not in module_name # TODO: Packages not supported here From 788c8c550627afdb6b75cbb1aad6bfb6d25e4c08 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 22 Feb 2018 18:00:48 -0800 Subject: [PATCH 06/16] Remove some dead code --- mypy/fscache.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mypy/fscache.py b/mypy/fscache.py index 221d4dbb70f3..8653b8c7fe8c 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -39,10 +39,6 @@ def __init__(self, pyversion: Optional[Tuple[int, int]] = None) -> None: self.pyversion = pyversion self.flush() - def set_pyversion(self, pyversion: Tuple[int, int]) -> None: - assert not self.read_cache - self.pyversion = pyversion - def flush(self) -> None: """Start another transaction and empty all caches.""" self.stat_cache = {} # type: Dict[str, os.stat_result] From 11094ee7e855f271db5c1070edae274841d00929 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Fri, 23 Feb 2018 10:44:37 -0800 Subject: [PATCH 07/16] Fix an error message mismatch on windows. --- mypy/build.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index f873c655650f..6152b835b6fc 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1796,8 +1796,11 @@ def parse_file(self) -> None: source = manager.fscache.read_with_python_encoding(path) self.source_hash = manager.fscache.md5(path) except IOError as ioerr: + # ioerr.strerror differs for os.stat failures between Windows and + # other systems, but os.strerror(ioerr.errno) does not, so we use that. raise CompileError([ - "mypy: can't read file '{}': {}".format(self.path, ioerr.strerror)]) + "mypy: can't read file '{}': {}".format( + self.path, os.strerror(ioerr.errno))]) except (UnicodeDecodeError, DecodeError) as decodeerr: raise CompileError([ "mypy: can't decode file '{}': {}".format(self.path, str(decodeerr))]) From afe715d3227e7a58200c6bb547da480cc266c58b Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Wed, 28 Feb 2018 15:58:47 -0800 Subject: [PATCH 08/16] Fix stubgen after move --- mypy/stubgen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy/stubgen.py b/mypy/stubgen.py index 0495ef2e1e96..83f0349888d0 100755 --- a/mypy/stubgen.py +++ b/mypy/stubgen.py @@ -202,7 +202,7 @@ def generate_stub(path: str, include_private: bool = False ) -> None: - source, _ = mypy.build.read_with_python_encoding(path, pyversion) + source, _ = mypy.util.read_with_python_encoding(path, pyversion) options = MypyOptions() options.python_version = pyversion try: From 893e1dacc3fecc49f76b301fce1f0876f94e72c8 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Wed, 28 Feb 2018 16:39:11 -0800 Subject: [PATCH 09/16] Remove some now out-of-date comments. --- mypy/build.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 6152b835b6fc..6aa23b4396ba 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1065,7 +1065,6 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], manager.log('Metadata abandoned for {}: data cache is modified'.format(id)) return None - # TODO: Share stat() outcome with find_module() path = os.path.abspath(path) st = manager.get_stat(path) # TODO: Errors if not stat.S_ISREG(st.st_mode): @@ -1310,14 +1309,6 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None: d. from P import M; checks filesystem whether module P.M exists in filesystem. -e. Race conditions, where somebody modifies a file while we're - processing. I propose not to modify the algorithm to handle this, - but to detect when this could lead to inconsistencies. (For - example, when we decide on the dependencies based on cache - metadata, and then we decide to re-parse a file because of a stale - dependency, if the re-parsing leads to a different list of - dependencies we should warn the user or start over.) - Steps ----- @@ -1852,11 +1843,6 @@ def compute_dependencies(self) -> None: if self.id != 'builtins' and 'builtins' not in dep_line_map: dependencies.append('builtins') - # NOTE: What to do about race conditions (like editing the - # file while mypy runs)? A previous version of this code - # explicitly checked for this, but ran afoul of other reasons - # for differences (e.g. silent mode). - # Missing dependencies will be moved from dependencies to # suppressed when they fail to be loaded in load_graph. self.dependencies = dependencies From 0f5a41177284d7e5d0e659c63500eb56ac56fafa Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 1 Mar 2018 18:54:15 -0800 Subject: [PATCH 10/16] Tweak flushing logic to better match #4664 They'll still merge conflict, but the resolution will be trivial now (I'm trying avoid making one depend on the other) --- mypy/dmypy_server.py | 4 ++-- mypy/server/update.py | 1 - mypy/test/testfinegrained.py | 1 + mypy/test/testmerge.py | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index da0edff7240c..2a62483e0b89 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -253,8 +253,7 @@ def check_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str, def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]: # The file system cache we create gets passed off to # BuildManager, and thence to FineGrainedBuildManager, which - # assumes responsibility for clearing it at the appropriate - # times (after init and update()). + # assumes responsibility for clearing it after updates. fscache = FileSystemCache(self.options.python_version) self.fswatcher = FileSystemWatcher(fscache) self.update_sources(sources) @@ -295,6 +294,7 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict # Run an update messages = self.fine_grained_manager.update(self.find_changed(sources)) + fscache.flush() status = 1 if messages else 0 return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status} diff --git a/mypy/server/update.py b/mypy/server/update.py index 6c80cb9ee838..4bf625ed3774 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -175,7 +175,6 @@ def __init__(self, result: BuildResult) -> None: manager.saved_cache = {} # Active triggers during the last update self.triggered = [] # type: List[str] - self.manager.fscache.flush() def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]: """Update previous build result by processing changed modules. diff --git a/mypy/test/testfinegrained.py b/mypy/test/testfinegrained.py index 83512cf0837f..7ffc3bef159c 100644 --- a/mypy/test/testfinegrained.py +++ b/mypy/test/testfinegrained.py @@ -86,6 +86,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: fine_grained_manager = None if not self.use_cache: + result.manager.fscache.flush() fine_grained_manager = FineGrainedBuildManager(result) if CHECK_CONSISTENCY: check_consistency(fine_grained_manager) diff --git a/mypy/test/testmerge.py b/mypy/test/testmerge.py index 50d9d97a8a76..18fc51d8cbb3 100644 --- a/mypy/test/testmerge.py +++ b/mypy/test/testmerge.py @@ -69,6 +69,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: main_src = '\n'.join(testcase.input) result = self.build(main_src) assert result is not None, 'cases where CompileError occurred should not be run' + result.manager.fscache.flush() fine_grained_manager = FineGrainedBuildManager(result) a = [] From f5164730727872bdb77fecd81e78dc99da8d9fa1 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Wed, 7 Mar 2018 14:34:45 -0800 Subject: [PATCH 11/16] SQUASHED MERGE COMMIT WITH dmypy-fg-testing --- .gitignore | 3 + docs/source/command_line.rst | 21 +- docs/source/revision_history.rst | 5 + mypy/build.py | 25 +- mypy/checker.py | 4 +- mypy/dmypy.py | 20 +- mypy/dmypy_server.py | 74 +++--- mypy/nodes.py | 6 + mypy/options.py | 2 +- mypy/semanal.py | 20 +- mypy/semanal_pass3.py | 5 +- mypy/server/astdiff.py | 11 +- mypy/server/astmerge.py | 47 +++- mypy/server/aststrip.py | 26 +- mypy/server/deps.py | 30 ++- mypy/server/update.py | 15 +- mypy/test/helpers.py | 49 ++++ mypy/test/testcheck.py | 12 +- mypy/test/testdiff.py | 2 + mypy/test/testdmypy.py | 18 +- mypy/test/testfinegrained.py | 155 ++++++------ mypy/test/testmerge.py | 2 + mypy/test/testpythoneval.py | 38 +-- mypy/test/testsemanal.py | 2 + mypy/types.py | 6 +- mypy/version.py | 2 +- mypy/waiter.py | 2 +- runtests.py | 16 +- test-data/unit/cmdline.test | 14 +- test-data/unit/deps-statements.test | 17 ++ test-data/unit/diff.test | 34 +++ test-data/unit/fine-grained-modules.test | 142 ++++++++++- test-data/unit/fine-grained.test | 249 +++++++++++++++++++- test-data/unit/lib-stub/mypy_extensions.pyi | 2 + test-data/unit/merge.test | 27 +++ test-data/unit/reports.test | 2 +- 36 files changed, 860 insertions(+), 245 deletions(-) diff --git a/.gitignore b/.gitignore index 98cbe9c3e6be..c282f07e6fb5 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,6 @@ dmypy.json # Coverage Files htmlcov .coverage* + +# pytest cache +.pytest_cache/ diff --git a/docs/source/command_line.rst b/docs/source/command_line.rst index 6c9f5bdeb443..a72390879911 100644 --- a/docs/source/command_line.rst +++ b/docs/source/command_line.rst @@ -11,14 +11,17 @@ flag (or its long form ``--help``):: usage: mypy [-h] [-v] [-V] [--python-version x.y] [--platform PLATFORM] [-2] [--ignore-missing-imports] [--follow-imports {normal,silent,skip,error}] - [--disallow-any-{unimported,expr,decorated,explicit,generics}] - [--disallow-untyped-calls] [--disallow-untyped-defs] + [--disallow-any-unimported] [--disallow-any-expr] + [--disallow-any-decorated] [--disallow-any-explicit] + [--disallow-any-generics] [--disallow-untyped-calls] + [--disallow-untyped-defs] [--disallow-incomplete-defs] [--check-untyped-defs] [--disallow-subclassing-any] - [--warn-incomplete-stub] [--warn-redundant-casts] - [--no-warn-no-return] [--warn-return-any] [--warn-unused-ignores] + [--warn-incomplete-stub] [--disallow-untyped-decorators] + [--warn-redundant-casts] [--no-warn-no-return] [--warn-return-any] + [--warn-unused-ignores] [--warn-unused-configs] [--show-error-context] [--no-implicit-optional] [-i] - [--quick-and-dirty] [--cache-dir DIR] [--skip-version-check] - [--strict-optional] + [--quick-and-dirty] [--cache-dir DIR] [--cache-fine-grained] + [--skip-version-check] [--strict-optional] [--strict-optional-whitelist [GLOB [GLOB ...]]] [--junit-xml JUNIT_XML] [--pdb] [--show-traceback] [--stats] [--inferstats] [--custom-typing MODULE] @@ -28,9 +31,9 @@ flag (or its long form ``--help``):: [--shadow-file SOURCE_FILE SHADOW_FILE] [--any-exprs-report DIR] [--cobertura-xml-report DIR] [--html-report DIR] [--linecount-report DIR] [--linecoverage-report DIR] - [--memory-xml-report DIR] - [--txt-report DIR] [--xml-report DIR] [--xslt-html-report DIR] - [--xslt-txt-report DIR] [-m MODULE] [-c PROGRAM_TEXT] [-p PACKAGE] + [--memory-xml-report DIR] [--txt-report DIR] [--xml-report DIR] + [--xslt-html-report DIR] [--xslt-txt-report DIR] [-m MODULE] + [-c PROGRAM_TEXT] [-p PACKAGE] [files [files ...]] (etc., too long to show everything here) diff --git a/docs/source/revision_history.rst b/docs/source/revision_history.rst index 60ee8787097b..a528ca4dc456 100644 --- a/docs/source/revision_history.rst +++ b/docs/source/revision_history.rst @@ -3,6 +3,11 @@ Revision history List of major changes: +- March 2018 + * Publish ``mypy`` version 0.570 on PyPI. + + * Add support for :ref:`attrs_package`. + - December 2017 * Publish ``mypy`` version 0.560 on PyPI. diff --git a/mypy/build.py b/mypy/build.py index 6aa23b4396ba..326392ee77d0 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -35,7 +35,7 @@ from mypy.nodes import (MODULE_REF, MypyFile, Node, ImportBase, Import, ImportFrom, ImportAll) from mypy.semanal_pass1 import SemanticAnalyzerPass1 -from mypy.semanal import SemanticAnalyzerPass2 +from mypy.semanal import SemanticAnalyzerPass2, apply_semantic_analyzer_patches from mypy.semanal_pass3 import SemanticAnalyzerPass3 from mypy.checker import TypeChecker from mypy.indirection import TypeIndirectionVisitor @@ -611,6 +611,7 @@ def __init__(self, data_dir: str, self.data_dir = data_dir self.errors = errors self.errors.set_ignore_prefix(ignore_prefix) + self.only_load_from_cache = options.use_fine_grained_cache self.lib_path = tuple(lib_path) self.source_set = source_set self.reports = reports @@ -1586,6 +1587,13 @@ def __init__(self, for id, line in zip(self.meta.dependencies, self.meta.dep_lines)} self.child_modules = set(self.meta.child_modules) else: + # In fine-grained cache mode, pretend we only know about modules that + # have cache information and defer handling new modules until the + # fine-grained update. + if manager.only_load_from_cache: + manager.log("Deferring module to fine-grained update %s (%s)" % (path, id)) + raise ModuleNotFound + # Parse the file (and then some) to get the dependencies. self.parse_file() self.compute_dependencies() @@ -1870,9 +1878,7 @@ def semantic_analysis_pass_three(self) -> None: self.patches = patches + self.patches def semantic_analysis_apply_patches(self) -> None: - patches_by_priority = sorted(self.patches, key=lambda x: x[0]) - for priority, patch_func in patches_by_priority: - patch_func() + apply_semantic_analyzer_patches(self.patches) def type_check_first_pass(self) -> None: if self.options.semantic_analysis_only: @@ -2005,6 +2011,15 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: manager.log("Mypy version %s" % __version__) t0 = time.time() graph = load_graph(sources, manager) + + # This is a kind of unfortunate hack to work around some of fine-grained's + # fragility: if we have loaded less than 50% of the specified files from + # cache in fine-grained cache mode, load the graph again honestly. + if manager.options.use_fine_grained_cache and len(graph) < 0.50 * len(sources): + manager.log("Redoing load_graph because too much was missing") + manager.only_load_from_cache = False + graph = load_graph(sources, manager) + t1 = time.time() manager.add_stats(graph_size=len(graph), stubs_found=sum(g.path is not None and g.path.endswith('.pyi') @@ -2115,7 +2130,7 @@ def load_graph(sources: List[BuildSource], manager: BuildManager, there are syntax errors. """ - graph = old_graph or {} # type: Graph + graph = old_graph if old_graph is not None else {} # type: Graph # The deque is used to implement breadth-first traversal. # TODO: Consider whether to go depth-first instead. This may diff --git a/mypy/checker.py b/mypy/checker.py index c5007dc32f47..77c9282e4061 100644 --- a/mypy/checker.py +++ b/mypy/checker.py @@ -874,12 +874,12 @@ def is_trivial_body(self, block: Block) -> bool: body = block.body # Skip a docstring - if (isinstance(body[0], ExpressionStmt) and + if (body and isinstance(body[0], ExpressionStmt) and isinstance(body[0].expr, (StrExpr, UnicodeExpr))): body = block.body[1:] if len(body) == 0: - # There's only a docstring. + # There's only a docstring (or no body at all). return True elif len(body) > 1: return False diff --git a/mypy/dmypy.py b/mypy/dmypy.py index 3a927f069ea1..d18f5293d9b4 100644 --- a/mypy/dmypy.py +++ b/mypy/dmypy.py @@ -17,6 +17,7 @@ from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, TypeVar from mypy.dmypy_util import STATUS_FILE, receive +from mypy.util import write_junit_xml # Argument parser. Subparsers are tied to action functions by the # @action(subparse) decorator. @@ -50,12 +51,14 @@ help="Check some files (requires running daemon)") p.add_argument('-v', '--verbose', action='store_true', help="Print detailed status") p.add_argument('-q', '--quiet', action='store_true', help=argparse.SUPPRESS) # Deprecated +p.add_argument('--junit-xml', help="write junit.xml to the given file") p.add_argument('files', metavar='FILE', nargs='+', help="File (or directory) to check") recheck_parser = p = subparsers.add_parser('recheck', help="Check the same files as the most previous check run (requires running daemon)") p.add_argument('-v', '--verbose', action='store_true', help="Print detailed status") p.add_argument('-q', '--quiet', action='store_true', help=argparse.SUPPRESS) # Deprecated +p.add_argument('--junit-xml', help="write junit.xml to the given file") hang_parser = p = subparsers.add_parser('hang', help="Hang for 100 seconds") @@ -144,8 +147,8 @@ def do_restart(args: argparse.Namespace) -> None: def start_server(args: argparse.Namespace) -> None: """Start the server from command arguments and wait for it.""" # Lazy import so this import doesn't slow down other commands. - from mypy.dmypy_server import daemonize, Server - if daemonize(Server(args.flags).serve, args.log_file) != 0: + from mypy.dmypy_server import daemonize, Server, process_start_options + if daemonize(Server(process_start_options(args.flags)).serve, args.log_file) != 0: sys.exit(1) wait_for_server() @@ -221,7 +224,7 @@ def do_check(args: argparse.Namespace) -> None: response = request('check', files=args.files) t1 = time.time() response['roundtrip_time'] = t1 - t0 - check_output(response, args.verbose) + check_output(response, args.verbose, args.junit_xml) @action(recheck_parser) @@ -234,10 +237,10 @@ def do_recheck(args: argparse.Namespace) -> None: response = request('recheck') t1 = time.time() response['roundtrip_time'] = t1 - t0 - check_output(response, args.verbose) + check_output(response, args.verbose, args.junit_xml) -def check_output(response: Dict[str, Any], verbose: bool) -> None: +def check_output(response: Dict[str, Any], verbose: bool, junit_xml: Optional[str]) -> None: """Print the output from a check or recheck command. Call sys.exit() unless the status code is zero. @@ -252,6 +255,9 @@ def check_output(response: Dict[str, Any], verbose: bool) -> None: sys.stderr.write(err) if verbose: show_stats(response) + if junit_xml: + messages = (out + err).splitlines() + write_junit_xml(response['roundtrip_time'], bool(err), messages, junit_xml) if status_code: sys.exit(status_code) @@ -277,8 +283,8 @@ def do_hang(args: argparse.Namespace) -> None: def do_daemon(args: argparse.Namespace) -> None: """Serve requests in the foreground.""" # Lazy import so this import doesn't slow down other commands. - from mypy.dmypy_server import Server - Server(args.flags).serve() + from mypy.dmypy_server import Server, process_start_options + Server(process_start_options(args.flags)).serve() @action(help_parser) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index 2a62483e0b89..690aa3c0f954 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -20,11 +20,12 @@ import mypy.build import mypy.errors import mypy.main -import mypy.server.update +from mypy.server.update import FineGrainedBuildManager from mypy.dmypy_util import STATUS_FILE, receive from mypy.gclogger import GcLogger from mypy.fscache import FileSystemCache from mypy.fswatcher import FileSystemWatcher, FileData +from mypy.options import Options def daemonize(func: Callable[[], None], log_file: Optional[str] = None) -> int: @@ -78,30 +79,44 @@ def daemonize(func: Callable[[], None], log_file: Optional[str] = None) -> int: SOCKET_NAME = 'dmypy.sock' # In current directory. +def process_start_options(flags: List[str]) -> Options: + import mypy.main + sources, options = mypy.main.process_options(['-i'] + flags, + require_targets=False, + server_options=True) + if sources: + sys.exit("dmypy: start/restart does not accept sources") + if options.report_dirs: + sys.exit("dmypy: start/restart cannot generate reports") + if options.junit_xml: + sys.exit("dmypy: start/restart does not support --junit-xml; " + "pass it to check/recheck instead") + if not options.incremental: + sys.exit("dmypy: start/restart should not disable incremental mode") + if options.quick_and_dirty: + sys.exit("dmypy: start/restart should not specify quick_and_dirty mode") + if options.use_fine_grained_cache and not options.fine_grained_incremental: + sys.exit("dmypy: fine-grained cache can only be used in experimental mode") + # Our file change tracking can't yet handle changes to files that aren't + # specified in the sources list. + if options.follow_imports not in ('skip', 'error'): + sys.exit("dmypy: follow-imports must be 'skip' or 'error'") + return options + + class Server: # NOTE: the instance is constructed in the parent process but # serve() is called in the grandchild (by daemonize()). - def __init__(self, flags: List[str]) -> None: + def __init__(self, options: Options, alt_lib_path: Optional[str] = None) -> None: """Initialize the server with the desired mypy flags.""" self.saved_cache = {} # type: mypy.build.SavedCache - self.fine_grained_initialized = False - sources, options = mypy.main.process_options(['-i'] + flags, - require_targets=False, - server_options=True) self.fine_grained = options.fine_grained_incremental - if sources: - sys.exit("dmypy: start/restart does not accept sources") - if options.report_dirs: - sys.exit("dmypy: start/restart cannot generate reports") - if not options.incremental: - sys.exit("dmypy: start/restart should not disable incremental mode") - if options.quick_and_dirty: - sys.exit("dmypy: start/restart should not specify quick_and_dirty mode") - if options.use_fine_grained_cache and not options.fine_grained_incremental: - sys.exit("dmypy: fine-grained cache can only be used in experimental mode") self.options = options + self.alt_lib_path = alt_lib_path + self.fine_grained_manager = None # type: Optional[FineGrainedBuildManager] + if os.path.isfile(STATUS_FILE): os.unlink(STATUS_FILE) if self.fine_grained: @@ -211,30 +226,34 @@ def cmd_recheck(self) -> Dict[str, object]: # Needed by tests. last_manager = None # type: Optional[mypy.build.BuildManager] - def check(self, sources: List[mypy.build.BuildSource], - alt_lib_path: Optional[str] = None) -> Dict[str, Any]: + def check(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]: if self.fine_grained: return self.check_fine_grained(sources) else: - return self.check_default(sources, alt_lib_path) + return self.check_default(sources) - def check_default(self, sources: List[mypy.build.BuildSource], - alt_lib_path: Optional[str] = None) -> Dict[str, Any]: + def check_default(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]: """Check using the default (per-file) incremental mode.""" self.last_manager = None + blockers = False with GcLogger() as gc_result: try: # saved_cache is mutated in place. res = mypy.build.build(sources, self.options, saved_cache=self.saved_cache, - alt_lib_path=alt_lib_path) + alt_lib_path=self.alt_lib_path) msgs = res.errors self.last_manager = res.manager # type: Optional[mypy.build.BuildManager] except mypy.errors.CompileError as err: + blockers = True msgs = err.messages if msgs: msgs.append("") - response = {'out': "\n".join(msgs), 'err': "", 'status': 1} + text = "\n".join(msgs) + if blockers: + response = {'out': "", 'err': text, 'status': 2} + else: + response = {'out': text, 'err': "", 'status': 1} else: response = {'out': "", 'err': "", 'status': 0} response.update(gc_result.get_stats()) @@ -245,7 +264,7 @@ def check_default(self, sources: List[mypy.build.BuildSource], def check_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]: """Check using fine-grained incremental mode.""" - if not self.fine_grained_initialized: + if not self.fine_grained_manager: return self.initialize_fine_grained(sources) else: return self.fine_grained_increment(sources) @@ -261,10 +280,10 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict # Stores the initial state of sources as a side effect. self.fswatcher.find_changed() try: - # TODO: alt_lib_path result = mypy.build.build(sources=sources, options=self.options, - fscache=fscache) + fscache=fscache, + alt_lib_path=self.alt_lib_path) except mypy.errors.CompileError as e: output = ''.join(s + '\n' for s in e.messages) if e.use_stdout: @@ -274,7 +293,6 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict return {'out': out, 'err': err, 'status': 2} messages = result.errors self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(result) - self.fine_grained_initialized = True self.previous_sources = sources # If we are using the fine-grained cache, build hasn't actually done @@ -299,6 +317,8 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status} def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]: + assert self.fine_grained_manager is not None + t0 = time.time() self.update_sources(sources) changed = self.find_changed(sources) diff --git a/mypy/nodes.py b/mypy/nodes.py index b5e825566fb2..d7c44c6194cb 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -2080,6 +2080,11 @@ def is_cached_subtype_check(self, left: 'mypy.types.Instance', return (left, right) in self._cache return (left, right) in self._cache_proper + def reset_subtype_cache(self) -> None: + for item in self.mro: + item._cache = set() + item._cache_proper = set() + def __getitem__(self, name: str) -> 'SymbolTableNode': n = self.get(name) if n: @@ -2116,6 +2121,7 @@ def calculate_mro(self) -> None: self.is_enum = self._calculate_is_enum() # The property of falling back to Any is inherited. self.fallback_to_any = any(baseinfo.fallback_to_any for baseinfo in self.mro) + self.reset_subtype_cache() def calculate_metaclass_type(self) -> 'Optional[mypy.types.Instance]': declared = self.declared_metaclass diff --git a/mypy/options.py b/mypy/options.py index 667d88065648..5ea251df2c9d 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -52,7 +52,7 @@ def __init__(self) -> None: # -- build options -- self.build_type = BuildType.STANDARD - self.python_version = defaults.PYTHON3_VERSION + self.python_version = sys.version_info[:2] # type: Tuple[int, int] self.platform = sys.platform self.custom_typing_module = None # type: Optional[str] self.custom_typeshed_dir = None # type: Optional[str] diff --git a/mypy/semanal.py b/mypy/semanal.py index 5bdefe2e6817..a924689cae54 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -309,8 +309,10 @@ def visit_file(self, file_node: MypyFile, fnam: str, options: Options, del self.cur_mod_node del self.globals - def refresh_partial(self, node: Union[MypyFile, FuncItem, OverloadedFuncDef]) -> None: + def refresh_partial(self, node: Union[MypyFile, FuncItem, OverloadedFuncDef], + patches: List[Tuple[int, Callable[[], None]]]) -> None: """Refresh a stale target in fine-grained incremental mode.""" + self.patches = patches self.scope.enter_file(self.cur_mod_id) if isinstance(node, MypyFile): self.refresh_top_level(node) @@ -318,15 +320,13 @@ def refresh_partial(self, node: Union[MypyFile, FuncItem, OverloadedFuncDef]) -> self.recurse_into_functions = True self.accept(node) self.scope.leave() + del self.patches def refresh_top_level(self, file_node: MypyFile) -> None: """Reanalyze a stale module top-level in fine-grained incremental mode.""" - # TODO: Invoke patches in fine-grained incremental mode. - self.patches = [] self.recurse_into_functions = False for d in file_node.defs: self.accept(d) - del self.patches @contextmanager def file_context(self, file_node: MypyFile, fnam: str, options: Options, @@ -2182,7 +2182,7 @@ def build_newtype_typeinfo(self, name: str, old_type: Type, base_type: Instance) arg_types=[Instance(info, []), old_type], arg_kinds=[arg.kind for arg in args], arg_names=['self', 'item'], - ret_type=old_type, + ret_type=NoneTyp(), fallback=self.named_type('__builtins__.function'), name=name) init_func = FuncDef('__init__', args, Block([]), typ=signature) @@ -4307,3 +4307,13 @@ def visit_any(self, t: AnyType) -> Type: if t.type_of_any == TypeOfAny.explicit: return t.copy_modified(TypeOfAny.special_form) return t + + +def apply_semantic_analyzer_patches(patches: List[Tuple[int, Callable[[], None]]]) -> None: + """Call patch callbacks in the right order. + + This should happen after semantic analyzer pass 3. + """ + patches_by_priority = sorted(patches, key=lambda x: x[0]) + for priority, patch_func in patches_by_priority: + patch_func() diff --git a/mypy/semanal_pass3.py b/mypy/semanal_pass3.py index 5a8b67adfc4b..1b815dd450e4 100644 --- a/mypy/semanal_pass3.py +++ b/mypy/semanal_pass3.py @@ -71,8 +71,10 @@ def visit_file(self, file_node: MypyFile, fnam: str, options: Options, del self.cur_mod_node self.patches = [] - def refresh_partial(self, node: Union[MypyFile, FuncItem, OverloadedFuncDef]) -> None: + def refresh_partial(self, node: Union[MypyFile, FuncItem, OverloadedFuncDef], + patches: List[Tuple[int, Callable[[], None]]]) -> None: """Refresh a stale target in fine-grained incremental mode.""" + self.patches = patches self.scope.enter_file(self.sem.cur_mod_id) if isinstance(node, MypyFile): self.recurse_into_functions = False @@ -81,6 +83,7 @@ def refresh_partial(self, node: Union[MypyFile, FuncItem, OverloadedFuncDef]) -> self.recurse_into_functions = True self.accept(node) self.scope.leave() + self.patches = [] def refresh_top_level(self, file_node: MypyFile) -> None: """Reanalyze a stale module top-level in fine-grained incremental mode.""" diff --git a/mypy/server/astdiff.py b/mypy/server/astdiff.py index 63f19b061800..b9fe1f6c308d 100644 --- a/mypy/server/astdiff.py +++ b/mypy/server/astdiff.py @@ -304,10 +304,6 @@ def snapshot_definition(node: Optional[SymbolNode], snapshot_optional_type(node.var.type), snapshot_definition(node.func, common)) elif isinstance(node, TypeInfo): - # TODO: - # type_vars - # bases - # _promote attrs = (node.is_abstract, node.is_enum, node.fallback_to_any, @@ -315,12 +311,15 @@ def snapshot_definition(node: Optional[SymbolNode], node.is_newtype, snapshot_optional_type(node.tuple_type), snapshot_optional_type(node.typeddict_type), - [base.fullname() for base in node.mro]) + [base.fullname() for base in node.mro], + node.type_vars, + [snapshot_type(base) for base in node.bases], + snapshot_optional_type(node._promote)) prefix = node.fullname() symbol_table = snapshot_symbol_table(prefix, node.names) return ('TypeInfo', common, attrs, symbol_table) else: - # TODO: Handle additional types: TypeVarExpr, MypyFile, ... + # Other node types are handled elsewhere. assert False, type(node) diff --git a/mypy/server/astmerge.py b/mypy/server/astmerge.py index 30f96fdab7f3..2c5832496d25 100644 --- a/mypy/server/astmerge.py +++ b/mypy/server/astmerge.py @@ -51,13 +51,14 @@ Node, MypyFile, SymbolTable, Block, AssignmentStmt, NameExpr, MemberExpr, RefExpr, TypeInfo, FuncDef, ClassDef, NamedTupleExpr, SymbolNode, Var, Statement, SuperExpr, NewTypeExpr, OverloadedFuncDef, LambdaExpr, TypedDictExpr, EnumCallExpr, FuncBase, TypeAliasExpr, CallExpr, + CastExpr, MDEF ) from mypy.traverser import TraverserVisitor from mypy.types import ( - Type, TypeVisitor, Instance, AnyType, NoneTyp, CallableType, DeletedType, PartialType, + Type, SyntheticTypeVisitor, Instance, AnyType, NoneTyp, CallableType, DeletedType, PartialType, TupleType, TypeType, TypeVarType, TypedDictType, UnboundType, UninhabitedType, UnionType, - Overloaded, TypeVarDef, TypeList + Overloaded, TypeVarDef, TypeList, CallableArgument, EllipsisType, StarType ) from mypy.util import get_prefix @@ -166,8 +167,8 @@ def visit_overloaded_func_def(self, node: OverloadedFuncDef) -> None: def visit_class_def(self, node: ClassDef) -> None: # TODO additional things? + node.info = self.fixup_and_reset_typeinfo(node.info) node.defs.body = self.replace_statements(node.defs.body) - node.info = self.fixup(node.info) info = node.info for tv in node.type_vars: self.process_type_var_def(tv) @@ -214,9 +215,13 @@ def visit_ref_expr(self, node: RefExpr) -> None: def visit_namedtuple_expr(self, node: NamedTupleExpr) -> None: super().visit_namedtuple_expr(node) - node.info = self.fixup(node.info) + node.info = self.fixup_and_reset_typeinfo(node.info) self.process_synthetic_type_info(node.info) + def visit_cast_expr(self, node: CastExpr) -> None: + super().visit_cast_expr(node) + self.fixup_type(node.type) + def visit_super_expr(self, node: SuperExpr) -> None: super().visit_super_expr(node) if node.info is not None: @@ -229,7 +234,7 @@ def visit_call_expr(self, node: CallExpr) -> None: def visit_newtype_expr(self, node: NewTypeExpr) -> None: if node.info: - node.info = self.fixup(node.info) + node.info = self.fixup_and_reset_typeinfo(node.info) self.process_synthetic_type_info(node.info) self.fixup_type(node.old_type) super().visit_newtype_expr(node) @@ -240,11 +245,11 @@ def visit_lambda_expr(self, node: LambdaExpr) -> None: def visit_typeddict_expr(self, node: TypedDictExpr) -> None: super().visit_typeddict_expr(node) - node.info = self.fixup(node.info) + node.info = self.fixup_and_reset_typeinfo(node.info) self.process_synthetic_type_info(node.info) def visit_enum_call_expr(self, node: EnumCallExpr) -> None: - node.info = self.fixup(node.info) + node.info = self.fixup_and_reset_typeinfo(node.info) self.process_synthetic_type_info(node.info) super().visit_enum_call_expr(node) @@ -269,6 +274,19 @@ def fixup(self, node: SN) -> SN: return cast(SN, new) return node + def fixup_and_reset_typeinfo(self, node: TypeInfo) -> TypeInfo: + """Fix-up type info and reset subtype caches. + + This needs to be called at least once per each merged TypeInfo, as otherwise we + may leak stale caches. + """ + if node in self.replacements: + # The subclass relationships may change, so reset all caches relevant to the + # old MRO. + new = cast(TypeInfo, self.replacements[node]) + new.reset_subtype_cache() + return self.fixup(node) + def fixup_type(self, typ: Optional[Type]) -> None: if typ is not None: typ.accept(TypeReplaceVisitor(self.replacements)) @@ -307,7 +325,7 @@ def replace_statements(self, nodes: List[Statement]) -> List[Statement]: return result -class TypeReplaceVisitor(TypeVisitor[None]): +class TypeReplaceVisitor(SyntheticTypeVisitor[None]): """Similar to NodeReplaceVisitor, but for type objects.""" def __init__(self, replacements: Dict[SymbolNode, SymbolNode]) -> None: @@ -353,7 +371,9 @@ def visit_partial_type(self, typ: PartialType) -> None: def visit_tuple_type(self, typ: TupleType) -> None: for item in typ.items: item.accept(self) - typ.fallback.accept(self) + # Fallback can be None for implicit tuple types that haven't been semantically analyzed. + if typ.fallback is not None: + typ.fallback.accept(self) def visit_type_type(self, typ: TypeType) -> None: typ.item.accept(self) @@ -376,6 +396,15 @@ def visit_type_list(self, typ: TypeList) -> None: for item in typ.items: item.accept(self) + def visit_callable_argument(self, typ: CallableArgument) -> None: + typ.typ.accept(self) + + def visit_ellipsis_type(self, typ: EllipsisType) -> None: + pass + + def visit_star_type(self, typ: StarType) -> None: + typ.type.accept(self) + def visit_uninhabited_type(self, typ: UninhabitedType) -> None: pass diff --git a/mypy/server/aststrip.py b/mypy/server/aststrip.py index d9fdf7b2da8d..edd0208fb92c 100644 --- a/mypy/server/aststrip.py +++ b/mypy/server/aststrip.py @@ -78,20 +78,24 @@ def strip_file_top_level(self, file_node: MypyFile) -> None: def visit_class_def(self, node: ClassDef) -> None: """Strip class body and type info, but don't strip methods.""" - node.info.type_vars = [] - node.info.bases = [] - node.info.abstract_attributes = [] - node.info.mro = [] - node.info.add_type_vars() - node.info.tuple_type = None - node.info.typeddict_type = None - node.info._cache = set() - node.info._cache_proper = set() + self.strip_type_info(node.info) node.base_type_exprs.extend(node.removed_base_type_exprs) node.removed_base_type_exprs = [] with self.enter_class(node.info): super().visit_class_def(node) + def strip_type_info(self, info: TypeInfo) -> None: + info.type_vars = [] + info.bases = [] + info.abstract_attributes = [] + info.mro = [] + info.add_type_vars() + info.tuple_type = None + info.typeddict_type = None + info.tuple_type = None + info._cache = set() + info._cache_proper = set() + def visit_func_def(self, node: FuncDef) -> None: if not self.recurse_into_functions: return @@ -155,6 +159,8 @@ def visit_import_from(self, node: ImportFrom) -> None: if node.assignments: node.assignments = [] else: + # If the node is unreachable, don't reset entries: they point to something else! + if node.is_unreachable: return if self.names: # Reset entries in the symbol table. This is necessary since # otherwise the semantic analyzer will think that the import @@ -169,6 +175,8 @@ def visit_import(self, node: Import) -> None: if node.assignments: node.assignments = [] else: + # If the node is unreachable, don't reset entries: they point to something else! + if node.is_unreachable: return if self.names: # Reset entries in the symbol table. This is necessary since # otherwise the semantic analyzer will think that the import diff --git a/mypy/server/deps.py b/mypy/server/deps.py index c349793a7128..33a41e5b5e09 100644 --- a/mypy/server/deps.py +++ b/mypy/server/deps.py @@ -92,7 +92,7 @@ class 'mod.Cls'. This can also refer to an attribute inherited from a ComparisonExpr, GeneratorExpr, DictionaryComprehension, StarExpr, PrintStmt, ForStmt, WithStmt, TupleExpr, ListExpr, OperatorAssignmentStmt, DelStmt, YieldFromExpr, Decorator, Block, TypeInfo, FuncBase, OverloadedFuncDef, RefExpr, SuperExpr, Var, NamedTupleExpr, TypedDictExpr, - LDEF, MDEF, GDEF, FuncItem, TypeAliasExpr, + LDEF, MDEF, GDEF, FuncItem, TypeAliasExpr, NewTypeExpr, op_methods, reverse_op_methods, ops_with_inplace_method, unary_op_methods ) from mypy.traverser import TraverserVisitor @@ -211,18 +211,27 @@ def visit_class_def(self, o: ClassDef) -> None: # Add dependencies to type variables of a generic class. for tv in o.type_vars: self.add_dependency(make_trigger(tv.fullname), target) - # Add dependencies to base types. - for base in o.info.bases: + self.process_type_info(o.info) + super().visit_class_def(o) + self.is_class = old_is_class + self.scope.leave() + + def visit_newtype_expr(self, o: NewTypeExpr) -> None: + if o.info: + self.scope.enter_class(o.info) + self.process_type_info(o.info) + self.scope.leave() + + def process_type_info(self, info: TypeInfo) -> None: + target = self.scope.current_full_target() + for base in info.bases: self.add_type_dependencies(base, target=target) - if o.info.tuple_type: - self.add_type_dependencies(o.info.tuple_type, target=make_trigger(target)) - if o.info.typeddict_type: - self.add_type_dependencies(o.info.typeddict_type, target=make_trigger(target)) + if info.tuple_type: + self.add_type_dependencies(info.tuple_type, target=make_trigger(target)) + if info.typeddict_type: + self.add_type_dependencies(info.typeddict_type, target=make_trigger(target)) # TODO: Add dependencies based on remaining TypeInfo attributes. - super().visit_class_def(o) self.add_type_alias_deps(self.scope.current_target()) - self.is_class = old_is_class - info = o.info for name, node in info.names.items(): if isinstance(node.node, Var): for base_info in non_trivial_bases(info): @@ -236,7 +245,6 @@ def visit_class_def(self, o: ClassDef) -> None: target=make_trigger(info.fullname() + '.' + name)) self.add_dependency(make_trigger(base_info.fullname() + '.__init__'), target=make_trigger(info.fullname() + '.__init__')) - self.scope.leave() def visit_import(self, o: Import) -> None: for id, as_id in o.ids: diff --git a/mypy/server/update.py b/mypy/server/update.py index 4bf625ed3774..a6788305268f 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -117,7 +117,10 @@ """ import os -from typing import Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple +import os.path +from typing import ( + Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple, Callable +) from mypy.build import ( BuildManager, State, BuildSource, BuildResult, Graph, load_graph, @@ -132,6 +135,7 @@ from mypy.options import Options from mypy.types import Type from mypy.fscache import FileSystemCache +from mypy.semanal import apply_semantic_analyzer_patches from mypy.server.astdiff import ( snapshot_symbol_table, compare_symbol_table_snapshots, is_identical_type, SnapshotItem ) @@ -173,6 +177,7 @@ def __init__(self, result: BuildResult) -> None: # this directly reflected in load_graph's interface. self.options.cache_dir = os.devnull manager.saved_cache = {} + manager.only_load_from_cache = False # Active triggers during the last update self.triggered = [] # type: List[str] @@ -750,6 +755,8 @@ def key(node: DeferredNode) -> int: strip_target(deferred.node) semantic_analyzer = manager.semantic_analyzer + patches = [] # type: List[Tuple[int, Callable[[], None]]] + # Second pass of semantic analysis. We don't redo the first pass, because it only # does local things that won't go stale. for deferred in nodes: @@ -758,7 +765,7 @@ def key(node: DeferredNode) -> int: fnam=file_node.path, options=manager.options, active_type=deferred.active_typeinfo): - manager.semantic_analyzer.refresh_partial(deferred.node) + manager.semantic_analyzer.refresh_partial(deferred.node, patches) # Third pass of semantic analysis. for deferred in nodes: @@ -767,7 +774,9 @@ def key(node: DeferredNode) -> int: fnam=file_node.path, options=manager.options, active_type=deferred.active_typeinfo): - manager.semantic_analyzer_pass3.refresh_partial(deferred.node) + manager.semantic_analyzer_pass3.refresh_partial(deferred.node, patches) + + apply_semantic_analyzer_patches(patches) # Merge symbol tables to preserve identities of AST nodes. The file node will remain # the same, but other nodes may have been recreated with different identities, such as diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py index daffa8344dc0..794499825a1d 100644 --- a/mypy/test/helpers.py +++ b/mypy/test/helpers.py @@ -1,11 +1,14 @@ import os import re +import subprocess import sys import time +import shutil from typing import List, Dict, Tuple, Callable, Any, Optional from mypy import defaults +from mypy.test.config import test_temp_dir import pytest # type: ignore # no pytest in typeshed @@ -327,3 +330,49 @@ def parse_options(program_text: str, testcase: DataDrivenTestCase, options.python_version = testcase_pyversion(testcase.file, testcase.name) return options + + +def split_lines(*streams: bytes) -> List[str]: + """Returns a single list of string lines from the byte streams in args.""" + return [ + s + for stream in streams + for s in stream.decode('utf8').splitlines() + ] + + +def run_command(cmdline: List[str], *, env: Optional[Dict[str, str]] = None, + timeout: int = 300, cwd: str = test_temp_dir) -> Tuple[int, List[str]]: + """A poor man's subprocess.run() for 3.4 compatibility.""" + process = subprocess.Popen( + cmdline, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=cwd, + ) + try: + out, err = process.communicate(timeout=timeout) + except subprocess.TimeoutExpired: + out = err = b'' + process.kill() + return process.returncode, split_lines(out, err) + + +def copy_and_fudge_mtime(source_path: str, target_path: str) -> None: + # In some systems, mtime has a resolution of 1 second which can + # cause annoying-to-debug issues when a file has the same size + # after a change. We manually set the mtime to circumvent this. + # Note that we increment the old file's mtime, which guarentees a + # different value, rather than incrementing the mtime after the + # copy, which could leave the mtime unchanged if the old file had + # a similarly fudged mtime. + new_time = None + if os.path.isfile(target_path): + new_time = os.stat(target_path).st_mtime + 1 + + # Use retries to work around potential flakiness on Windows (AppVeyor). + retry_on_error(lambda: shutil.copy(source_path, target_path)) + + if new_time: + os.utime(target_path, times=(new_time, new_time)) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 7bcddc36e5e5..b97625e63d63 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -12,7 +12,8 @@ from mypy.test.data import DataDrivenTestCase, DataSuite from mypy.test.helpers import ( assert_string_arrays_equal, normalize_error_messages, - retry_on_error, update_testcase_output, parse_options + retry_on_error, update_testcase_output, parse_options, + copy_and_fudge_mtime ) from mypy.errors import CompileError from mypy.options import Options @@ -131,14 +132,7 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int = 0) if file.endswith('.' + str(incremental_step)): full = os.path.join(dn, file) target = full[:-2] - # Use retries to work around potential flakiness on Windows (AppVeyor). - retry_on_error(lambda: shutil.copy(full, target)) - - # In some systems, mtime has a resolution of 1 second which can cause - # annoying-to-debug issues when a file has the same size after a - # change. We manually set the mtime to circumvent this. - new_time = os.stat(target).st_mtime + 1 - os.utime(target, times=(new_time, new_time)) + copy_and_fudge_mtime(full, target) # Delete files scheduled to be deleted in [delete .num] sections. for path in testcase.deleted_paths.get(incremental_step, set()): # Use retries to work around potential flakiness on Windows (AppVeyor). diff --git a/mypy/test/testdiff.py b/mypy/test/testdiff.py index b1cfc65a4a29..b13c8402befa 100644 --- a/mypy/test/testdiff.py +++ b/mypy/test/testdiff.py @@ -5,6 +5,7 @@ from mypy import build from mypy.build import BuildSource +from mypy.defaults import PYTHON3_VERSION from mypy.errors import CompileError from mypy.nodes import MypyFile from mypy.options import Options @@ -53,6 +54,7 @@ def build(self, source: str) -> Tuple[List[str], Optional[Dict[str, MypyFile]]]: options.use_builtins_fixtures = True options.show_traceback = True options.cache_dir = os.devnull + options.python_version = PYTHON3_VERSION try: result = build.build(sources=[BuildSource('main', None, source)], options=options, diff --git a/mypy/test/testdmypy.py b/mypy/test/testdmypy.py index a79ba80daafe..1205e13106d5 100644 --- a/mypy/test/testdmypy.py +++ b/mypy/test/testdmypy.py @@ -15,6 +15,7 @@ from mypy.test.helpers import ( assert_string_arrays_equal, normalize_error_messages, retry_on_error, testcase_pyversion, update_testcase_output, + copy_and_fudge_mtime, ) from mypy.options import Options @@ -90,14 +91,7 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int) -> if file.endswith('.' + str(incremental_step)): full = os.path.join(dn, file) target = full[:-2] - # Use retries to work around potential flakiness on Windows (AppVeyor). - retry_on_error(lambda: shutil.copy(full, target)) - - # In some systems, mtime has a resolution of 1 second which can cause - # annoying-to-debug issues when a file has the same size after a - # change. We manually set the mtime to circumvent this. - new_time = os.stat(target).st_mtime + 1 - os.utime(target, times=(new_time, new_time)) + copy_and_fudge_mtime(full, target) # Delete files scheduled to be deleted in [delete .num] sections. for path in testcase.deleted_paths.get(incremental_step, set()): # Use retries to work around potential flakiness on Windows (AppVeyor). @@ -116,20 +110,16 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int) -> # Parse options after moving files (in case mypy.ini is being moved). options = self.parse_options(original_program_text, testcase, incremental_step) if incremental_step == 1: - server_options = [] # type: List[str] if 'fine-grained' in testcase.file: - server_options.append('--experimental') options.fine_grained_incremental = True - options.local_partial_types = True - self.server = dmypy_server.Server(server_options) # TODO: Fix ugly API - self.server.options = options + self.server = dmypy_server.Server(options, alt_lib_path=test_temp_dir) assert self.server is not None # Set in step 1 and survives into next steps sources = [] for module_name, program_path, program_text in module_data: # Always set to none so we're forced to reread the module in incremental mode sources.append(build.BuildSource(program_path, module_name, None)) - response = self.server.check(sources, alt_lib_path=test_temp_dir) + response = self.server.check(sources) a = (response['out'] or response['err']).splitlines() a = normalize_error_messages(a) diff --git a/mypy/test/testfinegrained.py b/mypy/test/testfinegrained.py index 7ffc3bef159c..5b19929aedc1 100644 --- a/mypy/test/testfinegrained.py +++ b/mypy/test/testfinegrained.py @@ -9,31 +9,28 @@ import os import re -import shutil -from typing import List, Tuple, Dict, Optional, Set +from typing import List, Tuple, Optional, cast from mypy import build -from mypy.build import BuildManager, BuildSource, BuildResult, Graph -from mypy.errors import Errors, CompileError -from mypy.nodes import Node, MypyFile, SymbolTable, SymbolTableNode, TypeInfo, Expression +from mypy.build import BuildManager, BuildSource +from mypy.errors import CompileError from mypy.options import Options -from mypy.server.astmerge import merge_asts -from mypy.server.subexpr import get_subexpressions from mypy.server.update import FineGrainedBuildManager -from mypy.strconv import StrConv, indent -from mypy.test.config import test_temp_dir, test_data_prefix +from mypy.test.config import test_temp_dir from mypy.test.data import ( - parse_test_cases, DataDrivenTestCase, DataSuite, UpdateFile, module_from_path + DataDrivenTestCase, DataSuite, UpdateFile, module_from_path ) -from mypy.test.helpers import assert_string_arrays_equal, parse_options -from mypy.test.testtypegen import ignore_node -from mypy.types import TypeStrVisitor, Type -from mypy.util import short_type +from mypy.test.helpers import assert_string_arrays_equal, parse_options, copy_and_fudge_mtime from mypy.server.mergecheck import check_consistency +from mypy.dmypy_server import Server +from mypy.main import expand_dir import pytest # type: ignore # no pytest in typeshed +# TODO: This entire thing is a weird semi-duplication of testdmypy. +# One of them should be eliminated and its remaining useful features +# merged into the other. # Set to True to perform (somewhat expensive) checks for duplicate AST nodes after merge CHECK_CONSISTENCY = False @@ -75,51 +72,46 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: return main_src = '\n'.join(testcase.input) - sources_override = self.parse_sources(main_src) - result = self.build(main_src, testcase, sources_override, - build_cache=self.use_cache, - enable_cache=self.use_cache) - messages = result.errors + main_path = os.path.join(test_temp_dir, 'main') + with open(main_path, 'w') as f: + f.write(main_src) + + server = Server(self.get_options(main_src, testcase, build_cache=False), + alt_lib_path=test_temp_dir) + + step = 1 + sources = self.parse_sources(main_src, step) + if self.use_cache: + messages = self.build(self.get_options(main_src, testcase, build_cache=True), sources) + else: + messages = self.run_check(server, sources) + a = [] if messages: a.extend(normalize_messages(messages)) - fine_grained_manager = None - if not self.use_cache: - result.manager.fscache.flush() - fine_grained_manager = FineGrainedBuildManager(result) + if server.fine_grained_manager: if CHECK_CONSISTENCY: - check_consistency(fine_grained_manager) + check_consistency(server.fine_grained_manager) steps = testcase.find_steps() all_triggered = [] for operations in steps: - modules = [] + step += 1 for op in operations: if isinstance(op, UpdateFile): # Modify/create file - shutil.copy(op.source_path, op.target_path) - modules.append((op.module, op.target_path)) + copy_and_fudge_mtime(op.source_path, op.target_path) else: # Delete file os.remove(op.path) - modules.append((op.module, op.path)) - if sources_override is not None: - modules = [(module, path) - for module, path in sources_override - if any(m == module for m, _ in modules)] - - # If this is the second iteration and we are using a - # cache, now we need to set it up - if fine_grained_manager is None: - result = self.build(main_src, testcase, sources_override, - build_cache=False, enable_cache=True) - fine_grained_manager = FineGrainedBuildManager(result) - - new_messages = fine_grained_manager.update(modules) - if CHECK_CONSISTENCY: - check_consistency(fine_grained_manager) - all_triggered.append(fine_grained_manager.triggered) + sources = self.parse_sources(main_src, step) + new_messages = self.run_check(server, sources) + + if server.fine_grained_manager: + if CHECK_CONSISTENCY: + check_consistency(server.fine_grained_manager) + all_triggered.append(server.fine_grained_manager.triggered) new_messages = normalize_messages(new_messages) a.append('==') @@ -140,39 +132,39 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: 'Invalid active triggers ({}, line {})'.format(testcase.file, testcase.line)) - def build(self, - source: str, - testcase: DataDrivenTestCase, - sources_override: Optional[List[Tuple[str, str]]], - build_cache: bool, - enable_cache: bool) -> BuildResult: + def get_options(self, + source: str, + testcase: DataDrivenTestCase, + build_cache: bool) -> Options: # This handles things like '# flags: --foo'. options = parse_options(source, testcase, incremental_step=1) options.incremental = True options.use_builtins_fixtures = True options.show_traceback = True options.fine_grained_incremental = not build_cache - options.use_fine_grained_cache = enable_cache and not build_cache - options.cache_fine_grained = enable_cache + options.use_fine_grained_cache = self.use_cache and not build_cache + options.cache_fine_grained = self.use_cache options.local_partial_types = True + if options.follow_imports == 'normal': + options.follow_imports = 'error' - main_path = os.path.join(test_temp_dir, 'main') - with open(main_path, 'w') as f: - f.write(source) - if sources_override is not None: - sources = [BuildSource(path, module, None) - for module, path in sources_override] - else: - sources = [BuildSource(main_path, None, None)] + return options + + def run_check(self, server: Server, sources: List[BuildSource]) -> List[str]: + response = server.check(sources) + out = cast(str, response['out'] or response['err']) + return out.splitlines() + + def build(self, + options: Options, + sources: List[BuildSource]) -> List[str]: try: result = build.build(sources=sources, options=options, alt_lib_path=test_temp_dir) except CompileError as e: - # TODO: We need a manager and a graph in this case as well - assert False, str('\n'.join(e.messages)) - return e.messages, None, None - return result + return e.messages + return result.errors def format_triggered(self, triggered: List[List[str]]) -> List[str]: result = [] @@ -183,14 +175,33 @@ def format_triggered(self, triggered: List[List[str]]) -> List[str]: result.append(('%d: %s' % (n + 2, ', '.join(filtered))).strip()) return result - def parse_sources(self, program_text: str) -> Optional[List[Tuple[str, str]]]: - """Return target (module, path) tuples for a test case, if not using the defaults. + def parse_sources(self, program_text: str, + incremental_step: int) -> List[BuildSource]: + """Return target BuildSources for a test case. + + Normally, the unit tests will check all files included in the test + case. This differs from how testcheck works by default, as dmypy + doesn't currently support following imports. + + You can override this behavior and instruct the tests to check + multiple modules by using a comment like this in the test case + input: + + # cmd: main a.py + + You can also use `# cmdN:` to have a different cmd for incremental + step N (2, 3, ...). - These are defined through a comment like '# cmd: main a.py' in the test case - description. """ - # TODO: Support defining separately for each incremental step. m = re.search('# cmd: mypy ([a-zA-Z0-9_./ ]+)$', program_text, flags=re.MULTILINE) + regex = '# cmd{}: mypy ([a-zA-Z0-9_./ ]+)$'.format(incremental_step) + alt_m = re.search(regex, program_text, flags=re.MULTILINE) + if alt_m is not None and incremental_step > 1: + # Optionally return a different command if in a later step + # of incremental mode, otherwise default to reusing the + # original cmd. + m = alt_m + if m: # The test case wants to use a non-default set of files. paths = m.group(1).strip().split() @@ -200,9 +211,11 @@ def parse_sources(self, program_text: str) -> Optional[List[Tuple[str, str]]]: module = module_from_path(path) if module == 'main': module = '__main__' - result.append((module, path)) + result.append(BuildSource(path, module, None)) return result - return None + else: + base = BuildSource(os.path.join(test_temp_dir, 'main'), '__main__', None) + return [base] + expand_dir(test_temp_dir) def normalize_messages(messages: List[str]) -> List[str]: diff --git a/mypy/test/testmerge.py b/mypy/test/testmerge.py index 18fc51d8cbb3..e2e7027faa15 100644 --- a/mypy/test/testmerge.py +++ b/mypy/test/testmerge.py @@ -6,6 +6,7 @@ from mypy import build from mypy.build import BuildManager, BuildSource, BuildResult, State, Graph +from mypy.defaults import PYTHON3_VERSION from mypy.errors import Errors, CompileError from mypy.nodes import ( Node, MypyFile, SymbolTable, SymbolTableNode, TypeInfo, Expression, Var, TypeVarExpr, @@ -107,6 +108,7 @@ def build(self, source: str) -> Optional[BuildResult]: options.fine_grained_incremental = True options.use_builtins_fixtures = True options.show_traceback = True + options.python_version = PYTHON3_VERSION main_path = os.path.join(test_temp_dir, 'main') with open(main_path, 'w') as f: f.write(source) diff --git a/mypy/test/testpythoneval.py b/mypy/test/testpythoneval.py index 222fa6ff32c2..0634442f172a 100644 --- a/mypy/test/testpythoneval.py +++ b/mypy/test/testpythoneval.py @@ -13,15 +13,16 @@ import os import os.path import re -import subprocess import sys import pytest # type: ignore # no pytest in typeshed -from typing import Dict, List, Tuple, Optional +from typing import List + +from mypy.defaults import PYTHON3_VERSION from mypy.test.config import test_temp_dir from mypy.test.data import DataDrivenTestCase, DataSuite -from mypy.test.helpers import assert_string_arrays_equal +from mypy.test.helpers import assert_string_arrays_equal, run_command from mypy.util import try_find_python2_interpreter from mypy import api @@ -60,6 +61,7 @@ def test_python_evaluation(testcase: DataDrivenTestCase) -> None: return else: interpreter = python3_path + mypy_cmdline.append('--python-version={}'.format('.'.join(map(str, PYTHON3_VERSION)))) # Write the program to a file. program = '_' + testcase.name + '.py' @@ -79,7 +81,7 @@ def test_python_evaluation(testcase: DataDrivenTestCase) -> None: output.append(line.rstrip("\r\n")) if returncode == 0: # Execute the program. - returncode, interp_out = run([interpreter, program]) + returncode, interp_out = run_command([interpreter, program]) output.extend(interp_out) # Remove temp file. os.remove(program_path) @@ -88,35 +90,7 @@ def test_python_evaluation(testcase: DataDrivenTestCase) -> None: testcase.file, testcase.line)) -def split_lines(*streams: bytes) -> List[str]: - """Returns a single list of string lines from the byte streams in args.""" - return [ - s.rstrip('\n\r') - for stream in streams - for s in str(stream, 'utf8').splitlines() - ] - - def adapt_output(testcase: DataDrivenTestCase) -> List[str]: """Translates the generic _program.py into the actual filename.""" program = '_' + testcase.name + '.py' return [program_re.sub(program, line) for line in testcase.output] - - -def run( - cmdline: List[str], *, env: Optional[Dict[str, str]] = None, timeout: int = 300 -) -> Tuple[int, List[str]]: - """A poor man's subprocess.run() for 3.3 and 3.4 compatibility.""" - process = subprocess.Popen( - cmdline, - env=env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - cwd=test_temp_dir, - ) - try: - out, err = process.communicate(timeout=timeout) - except subprocess.TimeoutExpired: - out = err = b'' - process.kill() - return process.returncode, split_lines(out, err) diff --git a/mypy/test/testsemanal.py b/mypy/test/testsemanal.py index 98f3ef64b26c..5ab772707ed6 100644 --- a/mypy/test/testsemanal.py +++ b/mypy/test/testsemanal.py @@ -6,6 +6,7 @@ from mypy import build from mypy.build import BuildSource +from mypy.defaults import PYTHON3_VERSION from mypy.test.helpers import ( assert_string_arrays_equal, normalize_error_messages, testfile_pyversion, ) @@ -38,6 +39,7 @@ def get_semanal_options() -> Options: options.use_builtins_fixtures = True options.semantic_analysis_only = True options.show_traceback = True + options.python_version = PYTHON3_VERSION return options diff --git a/mypy/types.py b/mypy/types.py index e697e0e49c2c..ef3b092d3a42 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -255,6 +255,7 @@ def __init__(self, items: List[Type], line: int = -1, column: int = -1) -> None: self.items = items def accept(self, visitor: 'TypeVisitor[T]') -> T: + assert isinstance(visitor, SyntheticTypeVisitor) return visitor.visit_type_list(self) def serialize(self) -> JsonDict: @@ -1506,11 +1507,6 @@ def visit_type_type(self, t: TypeType) -> T: def visit_forwardref_type(self, t: ForwardRef) -> T: raise RuntimeError('Internal error: unresolved forward reference') - def visit_type_list(self, t: TypeList) -> T: - # TODO: Do we need to implement this in more visitors? TypeList objects can - # exist as components of UnboundTypes. - raise self._notimplemented_helper('type_list') - class SyntheticTypeVisitor(TypeVisitor[T]): """A TypeVisitor that also knows how to visit synthetic AST constructs. diff --git a/mypy/version.py b/mypy/version.py index 0b2dc9938153..c4e1f6967884 100644 --- a/mypy/version.py +++ b/mypy/version.py @@ -1,7 +1,7 @@ import os from mypy import git -__version__ = '0.570-dev' +__version__ = '0.580-dev' base_version = __version__ mypy_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) diff --git a/mypy/waiter.py b/mypy/waiter.py index 7e475d3e61ae..76d6fd335b0b 100644 --- a/mypy/waiter.py +++ b/mypy/waiter.py @@ -160,7 +160,7 @@ def load_log_file(self) -> Optional[List[Dict[str, Dict[str, Any]]]]: test_log = json.load(fp) except FileNotFoundError: test_log = [] - except json.JSONDecodeError: + except ValueError: print('corrupt test log file {}'.format(self.FULL_LOG_FILENAME), file=sys.stderr) test_log = [] return test_log diff --git a/runtests.py b/runtests.py index c2f3361eadf5..a2a24c29a7ca 100755 --- a/runtests.py +++ b/runtests.py @@ -78,9 +78,10 @@ def add_mypy_cmd(self, name: str, mypy_args: List[str], cwd: Optional[str] = Non def add_mypy(self, name: str, *args: str, cwd: Optional[str] = None) -> None: self.add_mypy_cmd(name, list(args), cwd=cwd) - def add_mypy_modules(self, name: str, modules: Iterable[str], - cwd: Optional[str] = None) -> None: - args = list(itertools.chain(*(['-m', mod] for mod in modules))) + def add_mypy_modules(self, name: str, modules: Iterable[str], cwd: Optional[str] = None, + extra_args: Optional[List[str]] = None) -> None: + args = extra_args or [] + args.extend(list(itertools.chain(*(['-m', mod] for mod in modules)))) self.add_mypy_cmd(name, args, cwd=cwd) def add_mypy_package(self, name: str, packagename: str, *flags: str) -> None: @@ -256,7 +257,8 @@ def add_stubs(driver: Driver) -> None: module = file_to_module(f[len(stubdir) + 1:]) modules.add(module) - driver.add_mypy_modules('stubs', sorted(modules)) + # these require at least 3.5 otherwise it will fail trying to import zipapp + driver.add_mypy_modules('stubs', sorted(modules), extra_args=['--python-version=3.5']) def add_stdlibsamples(driver: Driver) -> None: @@ -276,7 +278,11 @@ def add_stdlibsamples(driver: Driver) -> None: def add_samples(driver: Driver) -> None: for f in find_files(os.path.join('test-data', 'samples'), suffix='.py'): - driver.add_mypy('file %s' % f, f) + if f == os.path.join('test-data', 'samples', 'crawl2.py'): + # This test requires 3.5 for async functions + driver.add_mypy_cmd('file {}'.format(f), ['--python-version=3.5', f]) + else: + driver.add_mypy('file %s' % f, f) def usage(status: int) -> None: diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index 86aae2d344f4..4becdb18e23f 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -581,7 +581,7 @@ m.py:6: error: Explicit "Any" is not allowed m.py:9: error: Explicit "Any" is not allowed [case testDisallowAnyExplicitVarDeclaration] -# cmd: mypy m.py +# cmd: mypy --python-version=3.6 m.py [file mypy.ini] [[mypy] @@ -601,7 +601,7 @@ m.py:3: error: Explicit "Any" is not allowed m.py:5: error: Explicit "Any" is not allowed [case testDisallowAnyExplicitGenericVarDeclaration] -# cmd: mypy m.py +# cmd: mypy --python-version=3.6 m.py [file mypy.ini] [[mypy] @@ -785,7 +785,7 @@ N = TypedDict('N', {'x': str, 'y': List}) # no error m.py:4: error: Explicit "Any" is not allowed [case testDisallowAnyGenericsTupleNoTypeParams] -# cmd: mypy m.py +# cmd: mypy --python-version=3.6 m.py [file mypy.ini] [[mypy] [[mypy-m] @@ -821,7 +821,7 @@ def g(s: List[Tuple[str, str]]) -> None: pass # no error m.py:3: error: Missing type parameters for generic type [case testDisallowAnyGenericsTypeType] -# cmd: mypy m.py +# cmd: mypy --python-version=3.6 m.py [file mypy.ini] [[mypy] [[mypy-m] @@ -858,7 +858,7 @@ def g(l: L[str]) -> None: pass # no error m.py:5: error: Missing type parameters for generic type [case testDisallowAnyGenericsGenericAlias] -# cmd: mypy m.py +# cmd: mypy --python-version=3.6 m.py [file mypy.ini] [[mypy] [[mypy-m] @@ -882,7 +882,7 @@ m.py:7: error: Missing type parameters for generic type m.py:11: error: Missing type parameters for generic type [case testDisallowAnyGenericsPlainList] -# cmd: mypy m.py +# cmd: mypy --python-version=3.6 m.py [file mypy.ini] [[mypy] [[mypy-m] @@ -906,7 +906,7 @@ m.py:8: error: Need type annotation for 'x' m.py:9: error: Missing type parameters for generic type [case testDisallowAnyGenericsCustomGenericClass] -# cmd: mypy m.py +# cmd: mypy --python-version=3.6 m.py [file mypy.ini] [[mypy] [[mypy-m] diff --git a/test-data/unit/deps-statements.test b/test-data/unit/deps-statements.test index 7051f5787150..19e192eda882 100644 --- a/test-data/unit/deps-statements.test +++ b/test-data/unit/deps-statements.test @@ -655,3 +655,20 @@ class C: -> m.C -> m -> m + +[case testNewType] +from typing import NewType +from m import C + +N = NewType('N', C) + +def f(n: N) -> None: + pass +[file m.py] +class C: + x: int +[out] + -> , m, m.f + -> + -> + -> m, m.N diff --git a/test-data/unit/diff.test b/test-data/unit/diff.test index 841b5648c077..b97310f8097b 100644 --- a/test-data/unit/diff.test +++ b/test-data/unit/diff.test @@ -680,3 +680,37 @@ B = Dict[str, S] [out] __main__.A __main__.T + +[case testNewType] +from typing import NewType +class C: pass +class D: pass +N1 = NewType('N1', C) +N2 = NewType('N2', D) +N3 = NewType('N3', C) +class N4(C): pass +[file next.py] +from typing import NewType +class C: pass +class D(C): pass +N1 = NewType('N1', C) +N2 = NewType('N2', D) +class N3(C): pass +N4 = NewType('N4', C) +[out] +__main__.D +__main__.N2 +__main__.N3 +__main__.N3.__init__ +__main__.N4 +__main__.N4.__init__ + +[case testChangeGenericBaseClassOnly] +from typing import List +class C(List[int]): pass +[file next.py] +from typing import List +class C(List[str]): pass +[builtins fixtures/list.pyi] +[out] +__main__.C diff --git a/test-data/unit/fine-grained-modules.test b/test-data/unit/fine-grained-modules.test index c765af39553c..715aa80419ed 100644 --- a/test-data/unit/fine-grained-modules.test +++ b/test-data/unit/fine-grained-modules.test @@ -282,7 +282,9 @@ main:1: error: Cannot find module named 'p.q' main:1: note: (Perhaps setting MYPYPATH or using the "--ignore-missing-imports" flag would help) == -[case testDeletionOfSubmoduleTriggersImportFrom1-skip-nocache] +-- TODO: Fix this bug. It is a real bug that was been papered over +-- by the test harness. +[case testDeletionOfSubmoduleTriggersImportFrom1-skip-nocache-skip] -- Different cache/no-cache tests because: -- missing module error message mismatch from p import q @@ -971,3 +973,141 @@ x = Foo() == == main:2: error: Too few arguments for "foo" of "Foo" + +-- This series of tests is designed to test adding a new module that +-- doesn't appear in the cache, for cache mode. They aren't run only +-- in cache mode, though, because they are still perfectly good +-- regular tests. +[case testAddModuleAfterCache1] +# cmd: mypy main a.py +# cmd2: mypy main a.py b.py +# cmd3: mypy main a.py b.py +import a +[file a.py] +pass +[file a.py.2] +import b +b.foo(0) +[file b.py.2] +def foo() -> None: pass +[file b.py.3] +def foo(x: int) -> None: pass +[out] +== +a.py:2: error: Too many arguments for "foo" +== + +[case testAddModuleAfterCache2] +# cmd: mypy main a.py +# cmd2: mypy main a.py b.py +# cmd3: mypy main a.py b.py +# flags: --ignore-missing-imports --follow-imports=skip +import a +[file a.py] +import b +b.foo(0) +[file b.py.2] +def foo() -> None: pass +[file b.py.3] +def foo(x: int) -> None: pass +[out] +== +a.py:2: error: Too many arguments for "foo" +== + +[case testAddModuleAfterCache3] +# cmd: mypy main a.py +# cmd2: mypy main a.py b.py c.py d.py e.py f.py g.py +# flags: --ignore-missing-imports --follow-imports=skip +import a +[file a.py] +import b, c, d, e, f, g +[file b.py.2] +[file c.py.2] +[file d.py.2] +[file e.py.2] +[file f.py.2] +[file g.py.2] +[out] +== + +[case testAddModuleAfterCache4] +# cmd: mypy main a.py +# cmd2: mypy main a.py b.py +# cmd3: mypy main a.py b.py +# flags: --ignore-missing-imports --follow-imports=skip +import a +import b +[file a.py] +def foo() -> None: pass +[file b.py.2] +import a +a.foo(10) +[file a.py.3] +def foo(x: int) -> None: pass +[out] +== +b.py:2: error: Too many arguments for "foo" +== + +[case testAddModuleAfterCache5] +# cmd: mypy main a.py +# cmd2: mypy main a.py b.py +# cmd3: mypy main a.py b.py +# flags: --ignore-missing-imports --follow-imports=skip +import a +import b +[file a.py] +def foo(x: int) -> None: pass +[file a.py.2] +def foo() -> None: pass +[file b.py.2] +import a +a.foo(10) +[file a.py.3] +def foo(x: int) -> None: pass +[out] +== +b.py:2: error: Too many arguments for "foo" +== + +[case testAddModuleAfterCache6] +# cmd: mypy main a.py +# cmd2: mypy main a.py b.py +# cmd3: mypy main a.py b.py +# flags: --ignore-missing-imports --follow-imports=skip +import a +[file a.py] +import b +b.foo() +[file a.py.2] +import b +b.foo(0) +[file b.py.2] +def foo() -> None: pass +[file b.py.3] +def foo(x: int) -> None: pass +[out] +== +a.py:2: error: Too many arguments for "foo" +== + +[case testRefreshImportIfMypyElse1] +import a +[file a.py] +from b import foo + +MYPY = False +if MYPY: + x = 0 +else: + from fictional import x + +x = 1 +[file b/__init__.py] +[file b/foo.py] +[file b/__init__.py.2] +# Dummy change +[builtins fixtures/bool.pyi] +[out] +== diff --git a/test-data/unit/fine-grained.test b/test-data/unit/fine-grained.test index 10abcdb72084..616e69634221 100644 --- a/test-data/unit/fine-grained.test +++ b/test-data/unit/fine-grained.test @@ -1297,6 +1297,28 @@ class A: pass main:2: error: Module 'a' has no attribute 'A' == +[case testRefreshGenericAndFailInPass3] +# Failure in semantic analysis pass 3 +from a import C +a: C[int] +[file a.py] +from typing import TypeVar, Generic +T = TypeVar('T') +class C(Generic[T]): pass +[file a.py.2] +from typing import TypeVar, Generic +T = TypeVar('T') +S = TypeVar('S') +class C(Generic[T, S]): pass +[file a.py.3] +from typing import TypeVar, Generic +T = TypeVar('T') +class C(Generic[T]): pass +[out] +== +main:3: error: "C" expects 2 type arguments, but 1 given +== + [case testPrintStatement_python2] # flags: --py2 import a @@ -1436,9 +1458,11 @@ class C: pass class D(C): 1() class E(D): pass +# Something needs to change [file b.py.2] import a +# Something needs to change [triggered] 2: a, a @@ -1506,7 +1530,8 @@ import a [file a.py] from typing import Dict, NewType -N = NewType('N', int) +class A: pass +N = NewType('N', A) a: Dict[N, int] @@ -1516,7 +1541,8 @@ def f(self, x: N) -> None: [file a.py.2] from typing import Dict, NewType # dummy change -N = NewType('N', int) +class A: pass +N = NewType('N', A) a: Dict[N, int] @@ -1751,7 +1777,7 @@ p = Point(dict(x=42, y=1337)) [file a.py.2] from mypy_extensions import TypedDict Point = TypedDict('Point', {'x': int, 'y': int}) -p = Point(dict(x=42, y=1337)) +p = Point(dict(x=42, y=1337)) # dummy change [out] == @@ -2475,3 +2501,220 @@ else: [builtins fixtures/ops.pyi] [out] == + +[case testNewTypeDependencies1] +from a import N + +def f(x: N) -> None: + x.y = 1 +[file a.py] +from typing import NewType +from b import C + +N = NewType('N', C) +[file b.py] +class C: + y: int +[file b.py.2] +class C: + y: str +[out] +== +main:4: error: Incompatible types in assignment (expression has type "int", variable has type "str") + +[case testNewTypeDependencies2] +from a import N +from b import C, D + +def f(x: C) -> None: pass + +def g(x: N) -> None: + f(x) +[file a.py] +from typing import NewType +from b import D + +N = NewType('N', D) +[file b.py] +class C: pass +class D(C): pass +[file b.py.2] +class C: pass +class D: pass +[out] +== +main:7: error: Argument 1 to "f" has incompatible type "N"; expected "C" + +[case testNewTypeDependencies3] +from a import N + +def f(x: N) -> None: + x.y +[file a.py] +from typing import NewType +from b import C +N = NewType('N', C) +[file a.py.2] +from typing import NewType +from b import D +N = NewType('N', D) +[file b.py] +class C: + y: int +class D: + pass +[out] +== +main:4: error: "N" has no attribute "y" + +[case testNamedTupleWithinFunction] +from typing import NamedTuple +import b +def f() -> None: + b.x + n = NamedTuple('n', []) +[file b.py] +x = 0 +[file b.py.2] +x = '' +[out] +== + +[case testNamedTupleFallback] +# This test will fail without semantic analyzer pass 2 patches +import a +[file a.py] +import b +[file b.py] +from typing import NamedTuple +import c +c.x +class N(NamedTuple): + count: int +[file c.py] +x = 0 +[file c.py.2] +x = '' +[builtins fixtures/tuple.pyi] +[out] +b.py:5: error: Incompatible types in assignment (expression has type "int", base class "tuple" defined the type as "Callable[[Tuple[int, ...], Any], int]") +== +b.py:5: error: Incompatible types in assignment (expression has type "int", base class "tuple" defined the type as "Callable[[Tuple[int, ...], Any], int]") + +[case testReprocessEllipses1] +import a +[file a.py] +from typing import Tuple +def foo(x: Tuple[int, ...]) -> None: pass +[file a.py.2] +from typing import Tuple +def foo(x: Tuple[int, ...]) -> None: pass +[builtins fixtures/tuple.pyi] +[out] +== + +[case testReprocessEllipses2] +import a +[file a.py] +from typing import Callable +def foo(x: Callable[..., int]) -> None: pass +[file a.py.2] +from typing import Callable +def foo(x: Callable[..., int]) -> None: pass +[out] +== + +[case testReprocessCallableArg] +import a +[file a.py] +from typing import Callable +from mypy_extensions import Arg +def a(f: Callable[[Arg(int, 'x')], int]) -> None: pass +[file a.py.2] +from typing import Callable +from mypy_extensions import Arg +def a(f: Callable[[Arg(int, 'x')], int]) -> None: pass +[builtins fixtures/dict.pyi] +[out] +== + +[case testImplicitTuple1] +import a +[file a.py] +# Bogus annotation in nested function masked because outer function +# isn't annotated +def unchecked(): + def inner(): + # type: () -> (str, int) + return 'lol', 10 +[file a.py.2] +# dummy change +def unchecked(): + def inner(): + # type: () -> (str, int) + return 'lol', 10 +[out] +== + +[case testImplicitTuple2] +import a +[file a.py] +def inner(): + # type: () -> (str, int) + return 'lol', 10 +[file a.py.2] +# dummy change +def inner(): + # type: () -> (str, int) + return 'lol', 10 +[out] +a.py:1: error: Invalid tuple literal type +== +a.py:2: error: Invalid tuple literal type + +[case testImplicitTuple3] +import a +[file a.py] +(x, y) = 1, 'hi' # type: (int, str) +[file a.py.2] +# dummy change +(x, y) = 1, 'hi' # type: (int, str) +[out] +== + +[case testCastConfusion] +import b +[file a.py] +from typing import cast +class Thing: + def foo(self) -> None: pass + +thing = cast(Thing, Thing()) + +[file b.py] +from typing import Optional +from a import Thing, thing +class User: + def __init__(self, x: Optional[Thing]) -> None: + self.x = x if x else thing + def use(self) -> None: self.x.foo() + +[file a.py.2] +from typing import cast +class Thing: + def foo(self) -> None: pass + +thing = cast(Thing, Thing()) +# update + +[file b.py.2] +from typing import Optional +from a import Thing, thing +class User: + def __init__(self, x: Optional[Thing]) -> None: + self.x = x if x else thing + def use(self) -> None: self.x.foo() +# update +[builtins fixtures/ops.pyi] +[out] +== diff --git a/test-data/unit/lib-stub/mypy_extensions.pyi b/test-data/unit/lib-stub/mypy_extensions.pyi index a604c9684eeb..107608e980b0 100644 --- a/test-data/unit/lib-stub/mypy_extensions.pyi +++ b/test-data/unit/lib-stub/mypy_extensions.pyi @@ -1,3 +1,5 @@ +# NOTE: Requires fixtures/dict.pyi + from typing import Dict, Type, TypeVar, Optional, Any _T = TypeVar('_T') diff --git a/test-data/unit/merge.test b/test-data/unit/merge.test index d755b83d43dd..492f86d6b99e 100644 --- a/test-data/unit/merge.test +++ b/test-data/unit/merge.test @@ -1395,3 +1395,30 @@ TypeInfo<2>( Bases(builtins.type<3>) Mro(target.M<2>, builtins.type<3>, builtins.object<1>) Names()) + +[case testCast_symtable] +import target +[file target.py] +from typing import cast +class Thing: + pass +thing = cast(Thing, Thing()) +[file target.py.next] +from typing import cast +class Thing: + pass +thing = cast(Thing, Thing()) +[out] +__main__: + target: MypyFile<0> +target: + Thing: TypeInfo<1> + cast: Var<2> + thing: Var<3>(target.Thing<1>) +==> +__main__: + target: MypyFile<0> +target: + Thing: TypeInfo<1> + cast: Var<2> + thing: Var<3>(target.Thing<1>) diff --git a/test-data/unit/reports.test b/test-data/unit/reports.test index 8343212fc8df..e69157d20c21 100644 --- a/test-data/unit/reports.test +++ b/test-data/unit/reports.test @@ -281,7 +281,7 @@ Total 0 16 100.00% [case testAnyExpressionsReportTypesOfAny] -# cmd: mypy --any-exprs-report report n.py +# cmd: mypy --python-version=3.6 --any-exprs-report report n.py [file n.py] from typing import Any, List From f71de12811b620a21a57ae8a58b3c258c9a871b7 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 8 Mar 2018 15:16:43 -0800 Subject: [PATCH 12/16] SQUASHED MERGE COMMIT WITH delete_optimize --- mypy/build.py | 74 ++++++++++++------ mypy/dmypy_server.py | 18 +++-- mypy/fixup.py | 3 +- mypy/server/astdiff.py | 4 +- mypy/server/update.py | 16 +++- mypy/test/helpers.py | 17 +++- mypy/test/testcheck.py | 20 +---- mypy/test/testfinegrained.py | 22 +++++- test-data/unit/fine-grained-modules.test | 99 +++++++++++++++++++++--- test-data/unit/fine-grained.test | 5 ++ 10 files changed, 214 insertions(+), 64 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 326392ee77d0..3080d54fd3c5 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -81,6 +81,7 @@ class BuildResult: manager: The build manager. files: Dictionary from module name to related AST node. types: Dictionary from parse tree node to its inferred type. + used_cache: Whether the build took advantage of a cache errors: List of error messages. """ @@ -89,6 +90,7 @@ def __init__(self, manager: 'BuildManager', graph: Graph) -> None: self.graph = graph self.files = manager.modules self.types = manager.all_types # Non-empty for tests only or if dumping deps + self.used_cache = manager.cache_enabled self.errors = [] # type: List[str] # Filled in by build if desired @@ -590,6 +592,7 @@ class BuildManager: flush_errors: A function for processing errors after each SCC saved_cache: Dict with saved cache state for coarse-grained dmypy (read-write!) + cache_enabled: Whether cache usage is enabled stats: Dict with various instrumentation numbers fscache: A file system cacher """ @@ -611,7 +614,6 @@ def __init__(self, data_dir: str, self.data_dir = data_dir self.errors = errors self.errors.set_ignore_prefix(ignore_prefix) - self.only_load_from_cache = options.use_fine_grained_cache self.lib_path = tuple(lib_path) self.source_set = source_set self.reports = reports @@ -630,11 +632,15 @@ def __init__(self, data_dir: str, self.rechecked_modules = set() # type: Set[str] self.plugin = plugin self.flush_errors = flush_errors + self.cache_enabled = options.incremental and options.cache_dir != os.devnull self.saved_cache = saved_cache if saved_cache is not None else {} # type: SavedCache self.stats = {} # type: Dict[str, Any] # Values are ints or floats self.fscache = fscache or FileSystemCache(self.options.python_version) self.find_module_cache = FindModuleCache(self.fscache) + def use_fine_grained_cache(self) -> bool: + return self.cache_enabled and self.options.use_fine_grained_cache + def maybe_swap_for_shadow_path(self, path: str) -> str: if (self.options.shadow_file and os.path.samefile(self.options.shadow_file[0], path)): @@ -1078,7 +1084,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], # changed since the cache was generated. We *don't* want to do a # coarse-grained incremental rebuild, so we accept the cache # metadata even if it doesn't match the source file. - if manager.options.use_fine_grained_cache: + if manager.use_fine_grained_cache(): manager.log('Using potentially stale metadata for {}'.format(id)) return meta @@ -1567,7 +1573,7 @@ def __init__(self, self.path = path self.xpath = path or '' self.source = source - if path and source is None and self.options.incremental: + if path and source is None and self.manager.cache_enabled: self.meta = find_cache_meta(self.id, path, manager) # TODO: Get mtime if not cached. if self.meta is not None: @@ -1587,10 +1593,10 @@ def __init__(self, for id, line in zip(self.meta.dependencies, self.meta.dep_lines)} self.child_modules = set(self.meta.child_modules) else: - # In fine-grained cache mode, pretend we only know about modules that - # have cache information and defer handling new modules until the - # fine-grained update. - if manager.only_load_from_cache: + # When doing a fine-grained cache load, pretend we only + # know about modules that have cache information and defer + # handling new modules until the fine-grained update. + if manager.use_fine_grained_cache(): manager.log("Deferring module to fine-grained update %s (%s)" % (path, id)) raise ModuleNotFound @@ -1707,13 +1713,15 @@ def load_tree(self) -> None: def fix_cross_refs(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" + # We need to set quick_and_dirty when doing a fine grained + # cache load because we need to gracefully handle missing modules. fixup_module_pass_one(self.tree, self.manager.modules, - self.manager.options.quick_and_dirty) + self.manager.options.quick_and_dirty or + self.manager.use_fine_grained_cache()) def calculate_mros(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" - fixup_module_pass_two(self.tree, self.manager.modules, - self.manager.options.quick_and_dirty) + fixup_module_pass_two(self.tree, self.manager.modules) def patch_dependency_parents(self) -> None: """ @@ -1968,7 +1976,7 @@ def valid_references(self) -> Set[str]: def write_cache(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" - if not self.path or self.options.cache_dir == os.devnull: + if not self.path or not self.manager.cache_enabled: return if self.manager.options.quick_and_dirty: is_errors = self.manager.errors.is_errors_for_file(self.path) @@ -2015,9 +2023,12 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: # This is a kind of unfortunate hack to work around some of fine-grained's # fragility: if we have loaded less than 50% of the specified files from # cache in fine-grained cache mode, load the graph again honestly. - if manager.options.use_fine_grained_cache and len(graph) < 0.50 * len(sources): - manager.log("Redoing load_graph because too much was missing") - manager.only_load_from_cache = False + # In this case, we just turn the cache off entirely, so we don't need + # to worry about some files being loaded and some from cache and so + # that fine-grained mode never *writes* to the cache. + if manager.use_fine_grained_cache() and len(graph) < 0.50 * len(sources): + manager.log("Redoing load_graph without cache because too much was missing") + manager.cache_enabled = False graph = load_graph(sources, manager) t1 = time.time() @@ -2035,7 +2046,13 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: if manager.options.dump_graph: dump_graph(graph) return graph - process_graph(graph, manager) + # If we are loading a fine-grained incremental mode cache, we + # don't want to do a real incremental reprocess of the graph---we + # just want to load in all of the cache information. + if manager.use_fine_grained_cache(): + process_fine_grained_cache_graph(graph, manager) + else: + process_graph(graph, manager) updated = preserve_cache(graph) set_updated = set(updated) manager.saved_cache.clear() @@ -2344,14 +2361,6 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: manager.log("Processing SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg)) process_stale_scc(graph, scc, manager) - # If we are running in fine-grained incremental mode with caching, - # we always process fresh SCCs so that we have all of the symbol - # tables and fine-grained dependencies available. - if manager.options.use_fine_grained_cache: - for prev_scc in fresh_scc_queue: - process_fresh_scc(graph, prev_scc, manager) - fresh_scc_queue = [] - sccs_left = len(fresh_scc_queue) nodes_left = sum(len(scc) for scc in fresh_scc_queue) manager.add_stats(sccs_left=sccs_left, nodes_left=nodes_left) @@ -2363,6 +2372,25 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: manager.log("No fresh SCCs left in queue") +def process_fine_grained_cache_graph(graph: Graph, manager: BuildManager) -> None: + """Finish loading everything for use in the fine-grained incremental cache""" + + # If we are running in fine-grained incremental mode with caching, + # we process all SCCs as fresh SCCs so that we have all of the symbol + # tables and fine-grained dependencies available. + # We fail the loading of any SCC that we can't load a meta for, so we + # don't have anything *but* fresh SCCs. + sccs = sorted_components(graph) + manager.log("Found %d SCCs; largest has %d nodes" % + (len(sccs), max(len(scc) for scc in sccs))) + + for ascc in sccs: + # Order the SCC's nodes using a heuristic. + # Note that ascc is a set, and scc is a list. + scc = order_ascc(graph, ascc) + process_fresh_scc(graph, scc, manager) + + def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]: """Come up with the ideal processing order within an SCC. diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index 690aa3c0f954..19e932003020 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -276,9 +276,6 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict fscache = FileSystemCache(self.options.python_version) self.fswatcher = FileSystemWatcher(fscache) self.update_sources(sources) - if not self.options.use_fine_grained_cache: - # Stores the initial state of sources as a side effect. - self.fswatcher.find_changed() try: result = mypy.build.build(sources=sources, options=self.options, @@ -298,7 +295,7 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict # If we are using the fine-grained cache, build hasn't actually done # the typechecking on the updated files yet. # Run a fine-grained update starting from the cached data - if self.options.use_fine_grained_cache: + if result.used_cache: # Pull times and hashes out of the saved_cache and stick them into # the fswatcher, so we pick up the changes. for state in self.fine_grained_manager.graph.values(): @@ -310,7 +307,18 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash)) # Run an update - messages = self.fine_grained_manager.update(self.find_changed(sources)) + changed = self.find_changed(sources) + + # Find anything that has had its dependency list change + for state in self.fine_grained_manager.graph.values(): + if not state.is_fresh(): + assert state.path is not None + changed.append((state.id, state.path)) + + messages = self.fine_grained_manager.update(changed) + else: + # Stores the initial state of sources as a side effect. + self.fswatcher.find_changed() fscache.flush() status = 1 if messages else 0 diff --git a/mypy/fixup.py b/mypy/fixup.py index 375ca09c4c3a..8c6324fb4cc7 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -22,8 +22,7 @@ def fixup_module_pass_one(tree: MypyFile, modules: Dict[str, MypyFile], node_fixer.visit_symbol_table(tree.names) -def fixup_module_pass_two(tree: MypyFile, modules: Dict[str, MypyFile], - quick_and_dirty: bool) -> None: +def fixup_module_pass_two(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: compute_all_mros(tree.names, modules) diff --git a/mypy/server/astdiff.py b/mypy/server/astdiff.py index 7fc9b5ca81d1..2cd46b8cb1a8 100644 --- a/mypy/server/astdiff.py +++ b/mypy/server/astdiff.py @@ -136,7 +136,9 @@ def snapshot_symbol_table(name_prefix: str, table: SymbolTable) -> Dict[str, Sna common = (fullname, symbol.kind, symbol.module_public) if symbol.kind == MODULE_REF: # This is a cross-reference to another module. - assert isinstance(node, MypyFile) + # If the reference is busted because the other module is missing, + # the node will be a "stale_info" TypeInfo produced by fixup, + # but that doesn't really matter to us here. result[name] = ('Moduleref', common) elif symbol.kind == TVAR: assert isinstance(node, TypeVarExpr) diff --git a/mypy/server/update.py b/mypy/server/update.py index e666795551d2..315e609ab5a7 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -174,13 +174,17 @@ def __init__(self, result: BuildResult) -> None: # Module that we haven't processed yet but that are known to be stale. self.stale = [] # type: List[Tuple[str, str]] # Disable the cache so that load_graph doesn't try going back to disk - # for the cache. This is kind of a hack and it might be better to have - # this directly reflected in load_graph's interface. - self.options.cache_dir = os.devnull + # for the cache. + self.manager.cache_enabled = False manager.saved_cache = {} - manager.only_load_from_cache = False + + # Some hints to the test suite about what is going on: # Active triggers during the last update self.triggered = [] # type: List[str] + # Modules passed to update during the last update + self.changed_modules = [] # type: List[Tuple[str, str]] + # Modules processed during the last update + self.updated_modules = [] # type: List[str] def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]: """Update previous build result by processing changed modules. @@ -199,6 +203,8 @@ def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]: Returns: A list of errors. """ + self.changed_modules = changed_modules + if not changed_modules: self.manager.fscache.flush() return self.previous_messages @@ -207,6 +213,7 @@ def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]: self.manager.find_module_cache.clear() self.triggered = [] + self.updated_modules = [] changed_modules = dedupe_modules(changed_modules + self.stale) initial_set = {id for id, _ in changed_modules} self.manager.log_fine_grained('==== update %s ====' % ', '.join( @@ -265,6 +272,7 @@ def update_single(self, module: str, path: str) -> Tuple[List[str], - Whether there was a blocking error in the module """ self.manager.log_fine_grained('--- update single %r ---' % module) + self.updated_modules.append(module) # TODO: If new module brings in other modules, we parse some files multiple times. manager = self.manager diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py index 794499825a1d..4b2fa3a4f131 100644 --- a/mypy/test/helpers.py +++ b/mypy/test/helpers.py @@ -5,7 +5,7 @@ import time import shutil -from typing import List, Dict, Tuple, Callable, Any, Optional +from typing import List, Iterable, Dict, Tuple, Callable, Any, Optional from mypy import defaults from mypy.test.config import test_temp_dir @@ -98,6 +98,21 @@ def assert_string_arrays_equal(expected: List[str], actual: List[str], raise AssertionError(msg) +def assert_module_equivalence(name: str, + expected: Optional[Iterable[str]], actual: Iterable[str]) -> None: + if expected is not None: + expected_normalized = sorted(expected) + actual_normalized = sorted(set(actual).difference({"__main__"})) + assert_string_arrays_equal( + expected_normalized, + actual_normalized, + ('Actual modules ({}) do not match expected modules ({}) ' + 'for "[{} ...]"').format( + ', '.join(actual_normalized), + ', '.join(expected_normalized), + name)) + + def update_testcase_output(testcase: DataDrivenTestCase, output: List[str]) -> None: assert testcase.old_cwd is not None, "test was not properly set up" testcase_path = os.path.join(testcase.old_cwd, testcase.file) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index b97625e63d63..85a510e49814 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -11,7 +11,7 @@ from mypy.test.config import test_temp_dir from mypy.test.data import DataDrivenTestCase, DataSuite from mypy.test.helpers import ( - assert_string_arrays_equal, normalize_error_messages, + assert_string_arrays_equal, normalize_error_messages, assert_module_equivalence, retry_on_error, update_testcase_output, parse_options, copy_and_fudge_mtime ) @@ -189,29 +189,15 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int = 0) self.verify_cache(module_data, a, res.manager) if incremental_step > 1: suffix = '' if incremental_step == 2 else str(incremental_step - 1) - self.check_module_equivalence( + assert_module_equivalence( 'rechecked' + suffix, testcase.expected_rechecked_modules.get(incremental_step - 1), res.manager.rechecked_modules) - self.check_module_equivalence( + assert_module_equivalence( 'stale' + suffix, testcase.expected_stale_modules.get(incremental_step - 1), res.manager.stale_modules) - def check_module_equivalence(self, name: str, - expected: Optional[Set[str]], actual: Set[str]) -> None: - if expected is not None: - expected_normalized = sorted(expected) - actual_normalized = sorted(actual.difference({"__main__"})) - assert_string_arrays_equal( - expected_normalized, - actual_normalized, - ('Actual modules ({}) do not match expected modules ({}) ' - 'for "[{} ...]"').format( - ', '.join(actual_normalized), - ', '.join(expected_normalized), - name)) - def verify_cache(self, module_data: List[Tuple[str, str, str]], a: List[str], manager: build.BuildManager) -> None: # There should be valid cache metadata for each module except diff --git a/mypy/test/testfinegrained.py b/mypy/test/testfinegrained.py index 5b19929aedc1..69bef72eee2a 100644 --- a/mypy/test/testfinegrained.py +++ b/mypy/test/testfinegrained.py @@ -10,7 +10,7 @@ import os import re -from typing import List, Tuple, Optional, cast +from typing import List, Set, Tuple, Optional, cast from mypy import build from mypy.build import BuildManager, BuildSource @@ -21,7 +21,9 @@ from mypy.test.data import ( DataDrivenTestCase, DataSuite, UpdateFile, module_from_path ) -from mypy.test.helpers import assert_string_arrays_equal, parse_options, copy_and_fudge_mtime +from mypy.test.helpers import ( + assert_string_arrays_equal, parse_options, copy_and_fudge_mtime, assert_module_equivalence, +) from mypy.server.mergecheck import check_consistency from mypy.dmypy_server import Server from mypy.main import expand_dir @@ -96,6 +98,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: steps = testcase.find_steps() all_triggered = [] + for operations in steps: step += 1 for op in operations: @@ -108,10 +111,25 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: sources = self.parse_sources(main_src, step) new_messages = self.run_check(server, sources) + updated = [] # type: List[str] + changed = [] # type: List[str] if server.fine_grained_manager: if CHECK_CONSISTENCY: check_consistency(server.fine_grained_manager) all_triggered.append(server.fine_grained_manager.triggered) + + updated = server.fine_grained_manager.updated_modules + changed = [mod for mod, file in server.fine_grained_manager.changed_modules] + + assert_module_equivalence( + 'stale' + str(step - 1), + testcase.expected_stale_modules.get(step - 1), + changed) + assert_module_equivalence( + 'rechecked' + str(step - 1), + testcase.expected_rechecked_modules.get(step - 1), + updated) + new_messages = normalize_messages(new_messages) a.append('==') diff --git a/test-data/unit/fine-grained-modules.test b/test-data/unit/fine-grained-modules.test index 715aa80419ed..ae1fae13f4e5 100644 --- a/test-data/unit/fine-grained-modules.test +++ b/test-data/unit/fine-grained-modules.test @@ -975,10 +975,10 @@ x = Foo() main:2: error: Too few arguments for "foo" of "Foo" -- This series of tests is designed to test adding a new module that --- doesn't appear in the cache, for cache mode. They aren't run only --- in cache mode, though, because they are still perfectly good --- regular tests. -[case testAddModuleAfterCache1] +-- doesn't appear in the cache, for cache mode. They are run in +-- cache mode oly because stale and rechecked differ heavily between +-- the modes. +[case testAddModuleAfterCache1-skip-nocache] # cmd: mypy main a.py # cmd2: mypy main a.py b.py # cmd3: mypy main a.py b.py @@ -990,14 +990,22 @@ import b b.foo(0) [file b.py.2] def foo() -> None: pass + +[stale a, b] +[rechecked a, b] + [file b.py.3] def foo(x: int) -> None: pass + +[stale2 b] +[rechecked2 b] + [out] == a.py:2: error: Too many arguments for "foo" == -[case testAddModuleAfterCache2] +[case testAddModuleAfterCache2-skip-nocache] # cmd: mypy main a.py # cmd2: mypy main a.py b.py # cmd3: mypy main a.py b.py @@ -1008,30 +1016,51 @@ import b b.foo(0) [file b.py.2] def foo() -> None: pass + +[stale b] +[rechecked a, b] + [file b.py.3] def foo(x: int) -> None: pass + +[stale2 b] + [out] == a.py:2: error: Too many arguments for "foo" == -[case testAddModuleAfterCache3] +[case testAddModuleAfterCache3-skip-nocache] # cmd: mypy main a.py # cmd2: mypy main a.py b.py c.py d.py e.py f.py g.py # flags: --ignore-missing-imports --follow-imports=skip import a [file a.py] import b, c, d, e, f, g +b.foo(10) [file b.py.2] +def foo() -> None: pass [file c.py.2] [file d.py.2] [file e.py.2] [file f.py.2] [file g.py.2] + +-- No files should be stale or reprocessed in the first step since the large number +-- of missing files will force build to give up on cache loading. +[stale] + +[file b.py.3] +def foo(x: int) -> None: pass +[stale2 b] + [out] == +a.py:2: error: Too many arguments for "foo" +== -[case testAddModuleAfterCache4] + +[case testAddModuleAfterCache4-skip-nocache] # cmd: mypy main a.py # cmd2: mypy main a.py b.py # cmd3: mypy main a.py b.py @@ -1050,7 +1079,7 @@ def foo(x: int) -> None: pass b.py:2: error: Too many arguments for "foo" == -[case testAddModuleAfterCache5] +[case testAddModuleAfterCache5-skip-nocache] # cmd: mypy main a.py # cmd2: mypy main a.py b.py # cmd3: mypy main a.py b.py @@ -1064,14 +1093,20 @@ def foo() -> None: pass [file b.py.2] import a a.foo(10) + +[stale a, b] + [file a.py.3] def foo(x: int) -> None: pass + +[stale2 a] + [out] == b.py:2: error: Too many arguments for "foo" == -[case testAddModuleAfterCache6] +[case testAddModuleAfterCache6-skip-nocache] # cmd: mypy main a.py # cmd2: mypy main a.py b.py # cmd3: mypy main a.py b.py @@ -1080,18 +1115,64 @@ import a [file a.py] import b b.foo() + [file a.py.2] import b b.foo(0) [file b.py.2] def foo() -> None: pass + +[stale a, b] + [file b.py.3] def foo(x: int) -> None: pass + +[stale2 b] + [out] == a.py:2: error: Too many arguments for "foo" == +[case testRenameAndDeleteModuleAfterCache-skip-nocache] +import a +[file a.py] +from b1 import f +f() +[file b1.py] +def f() -> None: pass +[file b2.py.2] +def f() -> None: pass +[delete b1.py.2] +[file a.py.2] +from b2 import f +f() + +-- in no cache mode, there is no way to know about b1 yet +[stale a, b2] + +[out] +== + +[case testDeleteModuleAfterCache-skip-nocache] +import a +[file a.py] +from b import f +f() +[file b.py] +def f() -> None: pass +[delete b.py.2] + +-- in no cache mode, there is no way to know about b yet, +-- but a should still get triggered +[stale a] + +[out] +== +a.py:1: error: Cannot find module named 'b' +a.py:1: note: (Perhaps setting MYPYPATH or using the "--ignore-missing-imports" flag would help) + + [case testRefreshImportIfMypyElse1] import a [file a.py] diff --git a/test-data/unit/fine-grained.test b/test-data/unit/fine-grained.test index ca9db254447b..a911dda66c93 100644 --- a/test-data/unit/fine-grained.test +++ b/test-data/unit/fine-grained.test @@ -15,6 +15,11 @@ -- -- == -- +-- +-- +-- Modules that are expected to be detected as changed can be checked with [stale ...] +-- while modules that are reprocessed by update (which can include cached files +-- that need to be loaded) can be checked with [rechecked ...] [case testReprocessFunction] import m From 5a15f68655d78dd5cec91aef37cfd64042dc9b54 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Fri, 9 Mar 2018 15:22:25 -0800 Subject: [PATCH 13/16] Refactor find_module into the cache class, doc tweaks --- mypy/build.py | 196 +++++++++++++++++++++-------------------- mypy/dmypy_server.py | 2 +- mypy/main.py | 3 +- mypy/server/update.py | 2 - mypy/stubgen.py | 3 +- mypy/test/testcheck.py | 3 +- mypy/test/testdmypy.py | 2 +- 7 files changed, 106 insertions(+), 105 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 3da9efd1cf30..e3dadd8cfe89 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -550,23 +550,6 @@ def find_config_file_line_number(path: str, section: str, setting_name: str) -> return -1 -class FindModuleCache: - def __init__(self, fscache: Optional[FileSystemCache] = None) -> None: - self.fscache = fscache or FileSystemCache(None) - # Cache find_module: (id, lib_path) -> result. - self.results = {} # type: Dict[Tuple[str, Tuple[str, ...]], Optional[str]] - - # Cache some repeated work within distinct find_module calls: finding which - # elements of lib_path have even the subdirectory they'd need for the module - # to exist. This is shared among different module ids when they differ only - # in the last component. - self.dirs = {} # type: Dict[Tuple[str, Tuple[str, ...]], List[str]] - - def clear(self) -> None: - self.results.clear() - self.dirs.clear() - - class BuildManager: """This class holds shared state for building a mypy program. @@ -724,7 +707,7 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str: def is_module(self, id: str) -> bool: """Is there a file in the file system corresponding to module id?""" - return find_module(self.find_module_cache, id, self.lib_path) is not None + return self.find_module_cache.find_module(id, self.lib_path) is not None def parse_file(self, id: str, path: str, source: str, ignore_errors: bool) -> MypyFile: """Parse the source of a file with the given name. @@ -837,85 +820,108 @@ def remove_cwd_prefix_from_path(p: str) -> str: return p -def find_module(cache: FindModuleCache, id: str, lib_path_arg: Iterable[str]) -> Optional[str]: - """Return the path of the module source file, or None if not found.""" - lib_path = tuple(lib_path_arg) - fscache = cache.fscache - - def find() -> Optional[str]: - # If we're looking for a module like 'foo.bar.baz', it's likely that most of the - # many elements of lib_path don't even have a subdirectory 'foo/bar'. Discover - # that only once and cache it for when we look for modules like 'foo.bar.blah' - # that will require the same subdirectory. - components = id.split('.') - dir_chain = os.sep.join(components[:-1]) # e.g., 'foo/bar' - if (dir_chain, lib_path) not in cache.dirs: - dirs = [] - for pathitem in lib_path: - # e.g., '/usr/lib/python3.4/foo/bar' - dir = os.path.normpath(os.path.join(pathitem, dir_chain)) - if fscache.isdir(dir): - dirs.append(dir) - cache.dirs[dir_chain, lib_path] = dirs - candidate_base_dirs = cache.dirs[dir_chain, lib_path] - - # If we're looking for a module like 'foo.bar.baz', then candidate_base_dirs now - # contains just the subdirectories 'foo/bar' that actually exist under the - # elements of lib_path. This is probably much shorter than lib_path itself. - # Now just look for 'baz.pyi', 'baz/__init__.py', etc., inside those directories. - seplast = os.sep + components[-1] # so e.g. '/baz' - sepinit = os.sep + '__init__' - for base_dir in candidate_base_dirs: - base_path = base_dir + seplast # so e.g. '/usr/lib/python3.4/foo/bar/baz' - # Prefer package over module, i.e. baz/__init__.py* over baz.py*. - for extension in PYTHON_EXTENSIONS: - path = base_path + sepinit + extension - if fscache.isfile_case(path) and verify_module(fscache, id, path): - return path - # No package, look for module. - for extension in PYTHON_EXTENSIONS: - path = base_path + extension - if fscache.isfile_case(path) and verify_module(fscache, id, path): - return path - return None +class FindModuleCache: + """Module finder with integrated cache. - key = (id, lib_path) - if key not in cache.results: - cache.results[key] = find() - return cache.results[key] + Module locations and some intermediate results are cached internally + and can be cleared with the clear() method. + All file system accesses are performed through a FileSystemCache, + which is not ever cleared by this class. If necessary it must be + cleared by client code. + """ -def find_modules_recursive(cache: FindModuleCache, - module: str, lib_path: List[str]) -> List[BuildSource]: - module_path = find_module(cache, module, lib_path) - if not module_path: - return [] - result = [BuildSource(module_path, module, None)] - if module_path.endswith(('__init__.py', '__init__.pyi')): - # Subtle: this code prefers the .pyi over the .py if both - # exists, and also prefers packages over modules if both x/ - # and x.py* exist. How? We sort the directory items, so x - # comes before x.py and x.pyi. But the preference for .pyi - # over .py is encoded in find_module(); even though we see - # x.py before x.pyi, find_module() will find x.pyi first. We - # use hits to avoid adding it a second time when we see x.pyi. - # This also avoids both x.py and x.pyi when x/ was seen first. - hits = set() # type: Set[str] - for item in sorted(os.listdir(os.path.dirname(module_path))): - abs_path = os.path.join(os.path.dirname(module_path), item) - if os.path.isdir(abs_path) and \ - (os.path.isfile(os.path.join(abs_path, '__init__.py')) or - os.path.isfile(os.path.join(abs_path, '__init__.pyi'))): - hits.add(item) - result += find_modules_recursive(cache, module + '.' + item, lib_path) - elif item != '__init__.py' and item != '__init__.pyi' and \ - item.endswith(('.py', '.pyi')): - mod = item.split('.')[0] - if mod not in hits: - hits.add(mod) - result += find_modules_recursive( - cache, module + '.' + mod, lib_path) - return result + def __init__(self, fscache: Optional[FileSystemCache] = None) -> None: + self.fscache = fscache or FileSystemCache(None) + # Cache find_module: (id, lib_path) -> result. + self.results = {} # type: Dict[Tuple[str, Tuple[str, ...]], Optional[str]] + + # Cache some repeated work within distinct find_module calls: finding which + # elements of lib_path have even the subdirectory they'd need for the module + # to exist. This is shared among different module ids when they differ only + # in the last component. + self.dirs = {} # type: Dict[Tuple[str, Tuple[str, ...]], List[str]] + + def clear(self) -> None: + self.results.clear() + self.dirs.clear() + + def find_module(self, id: str, lib_path_arg: Iterable[str]) -> Optional[str]: + """Return the path of the module source file, or None if not found.""" + lib_path = tuple(lib_path_arg) + fscache = self.fscache + + def find() -> Optional[str]: + # If we're looking for a module like 'foo.bar.baz', it's likely that most of the + # many elements of lib_path don't even have a subdirectory 'foo/bar'. Discover + # that only once and cache it for when we look for modules like 'foo.bar.blah' + # that will require the same subdirectory. + components = id.split('.') + dir_chain = os.sep.join(components[:-1]) # e.g., 'foo/bar' + if (dir_chain, lib_path) not in self.dirs: + dirs = [] + for pathitem in lib_path: + # e.g., '/usr/lib/python3.4/foo/bar' + dir = os.path.normpath(os.path.join(pathitem, dir_chain)) + if fscache.isdir(dir): + dirs.append(dir) + self.dirs[dir_chain, lib_path] = dirs + candidate_base_dirs = self.dirs[dir_chain, lib_path] + + # If we're looking for a module like 'foo.bar.baz', then candidate_base_dirs now + # contains just the subdirectories 'foo/bar' that actually exist under the + # elements of lib_path. This is probably much shorter than lib_path itself. + # Now just look for 'baz.pyi', 'baz/__init__.py', etc., inside those directories. + seplast = os.sep + components[-1] # so e.g. '/baz' + sepinit = os.sep + '__init__' + for base_dir in candidate_base_dirs: + base_path = base_dir + seplast # so e.g. '/usr/lib/python3.4/foo/bar/baz' + # Prefer package over module, i.e. baz/__init__.py* over baz.py*. + for extension in PYTHON_EXTENSIONS: + path = base_path + sepinit + extension + if fscache.isfile_case(path) and verify_module(fscache, id, path): + return path + # No package, look for module. + for extension in PYTHON_EXTENSIONS: + path = base_path + extension + if fscache.isfile_case(path) and verify_module(fscache, id, path): + return path + return None + + key = (id, lib_path) + if key not in self.results: + self.results[key] = find() + return self.results[key] + + def find_modules_recursive(self, module: str, lib_path: List[str]) -> List[BuildSource]: + module_path = self.find_module(module, lib_path) + if not module_path: + return [] + result = [BuildSource(module_path, module, None)] + if module_path.endswith(('__init__.py', '__init__.pyi')): + # Subtle: this code prefers the .pyi over the .py if both + # exists, and also prefers packages over modules if both x/ + # and x.py* exist. How? We sort the directory items, so x + # comes before x.py and x.pyi. But the preference for .pyi + # over .py is encoded in find_module(); even though we see + # x.py before x.pyi, find_module() will find x.pyi first. We + # use hits to avoid adding it a second time when we see x.pyi. + # This also avoids both x.py and x.pyi when x/ was seen first. + hits = set() # type: Set[str] + for item in sorted(os.listdir(os.path.dirname(module_path))): + abs_path = os.path.join(os.path.dirname(module_path), item) + if os.path.isdir(abs_path) and \ + (os.path.isfile(os.path.join(abs_path, '__init__.py')) or + os.path.isfile(os.path.join(abs_path, '__init__.pyi'))): + hits.add(item) + result += self.find_modules_recursive(module + '.' + item, lib_path) + elif item != '__init__.py' and item != '__init__.pyi' and \ + item.endswith(('.py', '.pyi')): + mod = item.split('.')[0] + if mod not in hits: + hits.add(mod) + result += self.find_modules_recursive(module + '.' + mod, lib_path) + return result def verify_module(fscache: FileSystemCache, id: str, path: str) -> bool: @@ -1528,7 +1534,7 @@ def __init__(self, # difference and just assume 'builtins' everywhere, # which simplifies code. file_id = '__builtin__' - path = find_module(manager.find_module_cache, file_id, manager.lib_path) + path = manager.find_module_cache.find_module(file_id, manager.lib_path) if path: # For non-stubs, look at options.follow_imports: # - normal (default) -> fully analyze diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index 8f1210effa1f..16196e9ab3d0 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -319,7 +319,6 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict state.path, FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash)) - # Run an update changed = self.find_changed(sources) # Find anything that has had its dependency list change @@ -328,6 +327,7 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict assert state.path is not None changed.append((state.id, state.path)) + # Run an update messages = self.fine_grained_manager.update(changed) else: # Stores the initial state of sources as a side effect. diff --git a/mypy/main.py b/mypy/main.py index 9e2054ac1ff6..ef126b49eae0 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -527,8 +527,7 @@ def add_invertible_flag(flag: str, .format(special_opts.package)) options.build_type = BuildType.MODULE lib_path = [os.getcwd()] + build.mypy_path() - targets = build.find_modules_recursive(build.FindModuleCache(), - special_opts.package, lib_path) + targets = build.FindModuleCache().find_modules_recursive(special_opts.package, lib_path) if not targets: fail("Can't find package '{}'".format(special_opts.package)) return targets, options diff --git a/mypy/server/update.py b/mypy/server/update.py index 315e609ab5a7..b0d9a93b8fb3 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -528,8 +528,6 @@ def get_module_to_path_map(manager: BuildManager) -> Dict[str, str]: def get_sources(fscache: FileSystemCache, modules: Dict[str, str], changed_modules: List[Tuple[str, str]]) -> List[BuildSource]: - # TODO: Race condition when reading from the file system; we should only read each - # bit of external state once during a build to have a consistent view of the world sources = [] for id, path in changed_modules: if fscache.isfile(path): diff --git a/mypy/stubgen.py b/mypy/stubgen.py index 83f0349888d0..bb9112d5dc67 100755 --- a/mypy/stubgen.py +++ b/mypy/stubgen.py @@ -156,8 +156,7 @@ def find_module_path_and_all(module: str, pyversion: Tuple[int, int], module_all = getattr(mod, '__all__', None) else: # Find module by going through search path. - module_path = mypy.build.find_module(mypy.build.FindModuleCache(), - module, ['.'] + search_path) + module_path = mypy.build.FindModuleCache().find_module(module, ['.'] + search_path) if not module_path: raise SystemExit( "Can't find module '{}' (consider using --search-path)".format(module)) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 85a510e49814..d406ff6ade44 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -291,8 +291,7 @@ def parse_module(self, module_names = m.group(1) out = [] for module_name in module_names.split(' '): - path = build.find_module(build.FindModuleCache(), - module_name, [test_temp_dir]) + path = build.FindModuleCache().find_module(module_name, [test_temp_dir]) assert path is not None, "Can't find ad hoc case file" with open(path) as f: program_text = f.read() diff --git a/mypy/test/testdmypy.py b/mypy/test/testdmypy.py index 1205e13106d5..e5bfdf231bc3 100644 --- a/mypy/test/testdmypy.py +++ b/mypy/test/testdmypy.py @@ -260,7 +260,7 @@ def parse_module(self, module_names = m.group(1) out = [] # type: List[Tuple[str, str, Optional[str]]] for module_name in module_names.split(' '): - path = build.find_module(build.FindModuleCache(), module_name, [test_temp_dir]) + path = build.FindModuleCache().find_module(module_name, [test_temp_dir]) if path is None and module_name.startswith(NON_EXISTENT_PREFIX): # This is a special name for a file that we don't want to exist. assert '.' not in module_name # TODO: Packages not supported here From 7869dd69c4b601b8e2aa70bec92b29fb4cbaea15 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Tue, 13 Mar 2018 16:57:30 -0700 Subject: [PATCH 14/16] Some cleanups --- mypy/build.py | 31 +++++++++++++++++++------------ mypy/dmypy_server.py | 2 +- mypy/fscache.py | 6 ++++-- mypy/main.py | 3 ++- mypy/test/testgraph.py | 3 +++ 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index e3dadd8cfe89..00dd39a22a68 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -209,6 +209,7 @@ def _build(sources: List[BuildSource], gc.set_threshold(50000) data_dir = default_data_dir(bin_dir) + fscache = fscache or FileSystemCache(options.python_version) # Determine the default module search path. lib_path = default_lib_path(data_dir, @@ -225,7 +226,7 @@ def _build(sources: List[BuildSource], for source in sources: if source.path: # Include directory of the program file in the module search path. - dir = remove_cwd_prefix_from_path(dirname(source.path)) + dir = remove_cwd_prefix_from_path(fscache, dirname(source.path)) if dir not in lib_path: lib_path.insert(0, dir) @@ -592,8 +593,8 @@ def __init__(self, data_dir: str, plugin: Plugin, errors: Errors, flush_errors: Callable[[List[str], bool], None], + fscache: FileSystemCache, saved_cache: Optional[SavedCache] = None, - fscache: Optional[FileSystemCache] = None, ) -> None: self.start_time = time.time() self.data_dir = data_dir @@ -621,7 +622,7 @@ def __init__(self, data_dir: str, not options.fine_grained_incremental or options.use_fine_grained_cache) self.saved_cache = saved_cache if saved_cache is not None else {} # type: SavedCache self.stats = {} # type: Dict[str, Any] # Values are ints or floats - self.fscache = fscache or FileSystemCache(self.options.python_version) + self.fscache = fscache self.find_module_cache = FindModuleCache(self.fscache) def use_fine_grained_cache(self) -> bool: @@ -792,7 +793,7 @@ def stats_summary(self) -> Mapping[str, object]: return self.stats -def remove_cwd_prefix_from_path(p: str) -> str: +def remove_cwd_prefix_from_path(fscache: FileSystemCache, p: str) -> str: """Remove current working directory prefix from p, if present. Also crawl up until a directory without __init__.py is found. @@ -805,8 +806,8 @@ def remove_cwd_prefix_from_path(p: str) -> str: cur += os.sep # Compute root path. while (p and - (os.path.isfile(os.path.join(p, '__init__.py')) or - os.path.isfile(os.path.join(p, '__init__.pyi')))): + (fscache.isfile(os.path.join(p, '__init__.py')) or + fscache.isfile(os.path.join(p, '__init__.pyi')))): dir, base = os.path.split(p) if not base: break @@ -823,12 +824,12 @@ def remove_cwd_prefix_from_path(p: str) -> str: class FindModuleCache: """Module finder with integrated cache. - Module locations and some intermediate results are cached internally - and can be cleared with the clear() method. + Module locations and some intermediate results are cached internally + and can be cleared with the clear() method. - All file system accesses are performed through a FileSystemCache, - which is not ever cleared by this class. If necessary it must be - cleared by client code. + All file system accesses are performed through a FileSystemCache, + which is not ever cleared by this class. If necessary it must be + cleared by client code. """ def __init__(self, fscache: Optional[FileSystemCache] = None) -> None: @@ -908,7 +909,7 @@ def find_modules_recursive(self, module: str, lib_path: List[str]) -> List[Build # use hits to avoid adding it a second time when we see x.pyi. # This also avoids both x.py and x.pyi when x/ was seen first. hits = set() # type: Set[str] - for item in sorted(os.listdir(os.path.dirname(module_path))): + for item in sorted(self.fscache.listdir(os.path.dirname(module_path))): abs_path = os.path.join(os.path.dirname(module_path), item) if os.path.isdir(abs_path) and \ (os.path.isfile(os.path.join(abs_path, '__init__.py')) or @@ -1325,6 +1326,10 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None: d. from P import M; checks filesystem whether module P.M exists in filesystem. +e. Race conditions, where somebody modifies a file while we're + processing. Solved by using a FileSystemCache. + + Steps ----- @@ -1814,6 +1819,8 @@ def parse_file(self) -> None: except IOError as ioerr: # ioerr.strerror differs for os.stat failures between Windows and # other systems, but os.strerror(ioerr.errno) does not, so we use that. + # (We want the error messages to be platform-independent so that the + # tests have predictable output.) raise CompileError([ "mypy: can't read file '{}': {}".format( self.path, os.strerror(ioerr.errno))]) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index 95d277120695..61a3c6656ad8 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -302,7 +302,7 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict out, err = '', output return {'out': out, 'err': err, 'status': 2} messages = result.errors - self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(result) + self.fine_grained_manager = FineGrainedBuildManager(result) self.previous_sources = sources # If we are using the fine-grained cache, build hasn't actually done diff --git a/mypy/fscache.py b/mypy/fscache.py index 8653b8c7fe8c..1a8b0420be73 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -99,9 +99,9 @@ def listdir(self, path: str) -> List[str]: def isfile(self, path: str) -> bool: try: st = self.stat(path) - return stat.S_ISREG(st.st_mode) except OSError: return False + return stat.S_ISREG(st.st_mode) def isfile_case(self, path: str) -> bool: """Return whether path exists and is a file. @@ -109,6 +109,8 @@ def isfile_case(self, path: str) -> bool: On case-insensitive filesystems (like Mac or Windows) this returns False if the case of the path's last component does not exactly match the case found in the filesystem. + TODO: We should maybe check the case for some directory components also, + to avoid permitting wrongly-cased *packages*. """ if path in self.isfile_case_cache: return self.isfile_case_cache[path] @@ -127,9 +129,9 @@ def isfile_case(self, path: str) -> bool: def isdir(self, path: str) -> bool: try: st = self.stat(path) - return stat.S_ISDIR(st.st_mode) except OSError: return False + return stat.S_ISDIR(st.st_mode) def exists(self, path: str) -> bool: try: diff --git a/mypy/main.py b/mypy/main.py index 5b93fbcb540c..cdb3cb802118 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -535,6 +535,7 @@ def add_invertible_flag(flag: str, .format(special_opts.package)) options.build_type = BuildType.MODULE lib_path = [os.getcwd()] + build.mypy_path() + # TODO: use the same cache as the BuildManager will targets = build.FindModuleCache().find_modules_recursive(special_opts.package, lib_path) if not targets: fail("Can't find package '{}'".format(special_opts.package)) @@ -547,7 +548,7 @@ def add_invertible_flag(flag: str, targets = create_source_list(special_opts.files, options) return targets, options - +# TODO: use a FileSystemCache for this def create_source_list(files: Sequence[str], options: Options) -> List[BuildSource]: targets = [] for f in files: diff --git a/mypy/test/testgraph.py b/mypy/test/testgraph.py index 33d10c0ae1ee..d5b738aa3ff9 100644 --- a/mypy/test/testgraph.py +++ b/mypy/test/testgraph.py @@ -10,6 +10,7 @@ from mypy.report import Reports from mypy.plugin import Plugin from mypy.errors import Errors +from mypy.fscache import FileSystemCache class GraphSuite(Suite): @@ -38,6 +39,7 @@ def test_scc(self) -> None: def _make_manager(self) -> BuildManager: errors = Errors() options = Options() + fscache = FileSystemCache(options.python_version) manager = BuildManager( data_dir='', lib_path=[], @@ -49,6 +51,7 @@ def _make_manager(self) -> BuildManager: plugin=Plugin(options), errors=errors, flush_errors=lambda msgs, serious: None, + fscache=fscache, ) return manager From 7fb70557bcf5cd3947ecba93b74bb35984482050 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Tue, 13 Mar 2018 17:00:36 -0700 Subject: [PATCH 15/16] Refactor a bit --- mypy/build.py | 79 ++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 00dd39a22a68..9a841bce50d7 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -847,51 +847,52 @@ def clear(self) -> None: self.results.clear() self.dirs.clear() + def _find_module(self, id: str, lib_path: Tuple[str, ...]) -> Optional[str]: + fscache = self.fscache + + # If we're looking for a module like 'foo.bar.baz', it's likely that most of the + # many elements of lib_path don't even have a subdirectory 'foo/bar'. Discover + # that only once and cache it for when we look for modules like 'foo.bar.blah' + # that will require the same subdirectory. + components = id.split('.') + dir_chain = os.sep.join(components[:-1]) # e.g., 'foo/bar' + if (dir_chain, lib_path) not in self.dirs: + dirs = [] + for pathitem in lib_path: + # e.g., '/usr/lib/python3.4/foo/bar' + dir = os.path.normpath(os.path.join(pathitem, dir_chain)) + if fscache.isdir(dir): + dirs.append(dir) + self.dirs[dir_chain, lib_path] = dirs + candidate_base_dirs = self.dirs[dir_chain, lib_path] + + # If we're looking for a module like 'foo.bar.baz', then candidate_base_dirs now + # contains just the subdirectories 'foo/bar' that actually exist under the + # elements of lib_path. This is probably much shorter than lib_path itself. + # Now just look for 'baz.pyi', 'baz/__init__.py', etc., inside those directories. + seplast = os.sep + components[-1] # so e.g. '/baz' + sepinit = os.sep + '__init__' + for base_dir in candidate_base_dirs: + base_path = base_dir + seplast # so e.g. '/usr/lib/python3.4/foo/bar/baz' + # Prefer package over module, i.e. baz/__init__.py* over baz.py*. + for extension in PYTHON_EXTENSIONS: + path = base_path + sepinit + extension + if fscache.isfile_case(path) and verify_module(fscache, id, path): + return path + # No package, look for module. + for extension in PYTHON_EXTENSIONS: + path = base_path + extension + if fscache.isfile_case(path) and verify_module(fscache, id, path): + return path + return None + def find_module(self, id: str, lib_path_arg: Iterable[str]) -> Optional[str]: """Return the path of the module source file, or None if not found.""" lib_path = tuple(lib_path_arg) - fscache = self.fscache - - def find() -> Optional[str]: - # If we're looking for a module like 'foo.bar.baz', it's likely that most of the - # many elements of lib_path don't even have a subdirectory 'foo/bar'. Discover - # that only once and cache it for when we look for modules like 'foo.bar.blah' - # that will require the same subdirectory. - components = id.split('.') - dir_chain = os.sep.join(components[:-1]) # e.g., 'foo/bar' - if (dir_chain, lib_path) not in self.dirs: - dirs = [] - for pathitem in lib_path: - # e.g., '/usr/lib/python3.4/foo/bar' - dir = os.path.normpath(os.path.join(pathitem, dir_chain)) - if fscache.isdir(dir): - dirs.append(dir) - self.dirs[dir_chain, lib_path] = dirs - candidate_base_dirs = self.dirs[dir_chain, lib_path] - - # If we're looking for a module like 'foo.bar.baz', then candidate_base_dirs now - # contains just the subdirectories 'foo/bar' that actually exist under the - # elements of lib_path. This is probably much shorter than lib_path itself. - # Now just look for 'baz.pyi', 'baz/__init__.py', etc., inside those directories. - seplast = os.sep + components[-1] # so e.g. '/baz' - sepinit = os.sep + '__init__' - for base_dir in candidate_base_dirs: - base_path = base_dir + seplast # so e.g. '/usr/lib/python3.4/foo/bar/baz' - # Prefer package over module, i.e. baz/__init__.py* over baz.py*. - for extension in PYTHON_EXTENSIONS: - path = base_path + sepinit + extension - if fscache.isfile_case(path) and verify_module(fscache, id, path): - return path - # No package, look for module. - for extension in PYTHON_EXTENSIONS: - path = base_path + extension - if fscache.isfile_case(path) and verify_module(fscache, id, path): - return path - return None key = (id, lib_path) if key not in self.results: - self.results[key] = find() + self.results[key] = self._find_module(id, lib_path) return self.results[key] def find_modules_recursive(self, module: str, lib_path: List[str]) -> List[BuildSource]: From ea76cc78c90ac79653c78e234791466e8278b71c Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Tue, 13 Mar 2018 17:08:46 -0700 Subject: [PATCH 16/16] Refactor fscache into two classes --- mypy/build.py | 8 +++---- mypy/fscache.py | 60 ++++++++++++++++++++++++++++--------------------- mypy/main.py | 1 + 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 9a841bce50d7..acf4041ad2fe 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -53,7 +53,7 @@ from mypy.plugin import Plugin, DefaultPlugin, ChainedPlugin from mypy.defaults import PYTHON3_VERSION_MIN from mypy.server.deps import get_dependencies -from mypy.fscache import FileSystemCache +from mypy.fscache import FileSystemCache, FileSystemMetaCache # Switch to True to produce debug output related to fine-grained incremental @@ -832,8 +832,8 @@ class FindModuleCache: cleared by client code. """ - def __init__(self, fscache: Optional[FileSystemCache] = None) -> None: - self.fscache = fscache or FileSystemCache(None) + def __init__(self, fscache: Optional[FileSystemMetaCache] = None) -> None: + self.fscache = fscache or FileSystemMetaCache() # Cache find_module: (id, lib_path) -> result. self.results = {} # type: Dict[Tuple[str, Tuple[str, ...]], Optional[str]] @@ -926,7 +926,7 @@ def find_modules_recursive(self, module: str, lib_path: List[str]) -> List[Build return result -def verify_module(fscache: FileSystemCache, id: str, path: str) -> bool: +def verify_module(fscache: FileSystemMetaCache, id: str, path: str) -> bool: """Check that all packages containing id have a __init__ file.""" if path.endswith(('__init__.py', '__init__.pyi')): path = dirname(path) diff --git a/mypy/fscache.py b/mypy/fscache.py index 1a8b0420be73..75600dba2951 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -34,42 +34,18 @@ from mypy.util import read_with_python_encoding -class FileSystemCache: - def __init__(self, pyversion: Optional[Tuple[int, int]] = None) -> None: - self.pyversion = pyversion +class FileSystemMetaCache: + def __init__(self) -> None: self.flush() def flush(self) -> None: """Start another transaction and empty all caches.""" self.stat_cache = {} # type: Dict[str, os.stat_result] self.stat_error_cache = {} # type: Dict[str, Exception] - self.read_cache = {} # type: Dict[str, str] - self.read_error_cache = {} # type: Dict[str, Exception] - self.hash_cache = {} # type: Dict[str, str] self.listdir_cache = {} # type: Dict[str, List[str]] self.listdir_error_cache = {} # type: Dict[str, Exception] self.isfile_case_cache = {} # type: Dict[str, bool] - def read_with_python_encoding(self, path: str) -> str: - assert self.pyversion - if path in self.read_cache: - return self.read_cache[path] - if path in self.read_error_cache: - raise self.read_error_cache[path] - - # Need to stat first so that the contents of file are from no - # earlier instant than the mtime reported by self.stat(). - self.stat(path) - - try: - data, md5hash = read_with_python_encoding(path, self.pyversion) - except Exception as err: - self.read_error_cache[path] = err - raise - self.read_cache[path] = data - self.hash_cache[path] = md5hash - return data - def stat(self, path: str) -> os.stat_result: if path in self.stat_cache: return self.stat_cache[path] @@ -140,6 +116,38 @@ def exists(self, path: str) -> bool: return False return True + +class FileSystemCache(FileSystemMetaCache): + def __init__(self, pyversion: Tuple[int, int]) -> None: + self.pyversion = pyversion + self.flush() + + def flush(self) -> None: + """Start another transaction and empty all caches.""" + super().flush() + self.read_cache = {} # type: Dict[str, str] + self.read_error_cache = {} # type: Dict[str, Exception] + self.hash_cache = {} # type: Dict[str, str] + + def read_with_python_encoding(self, path: str) -> str: + if path in self.read_cache: + return self.read_cache[path] + if path in self.read_error_cache: + raise self.read_error_cache[path] + + # Need to stat first so that the contents of file are from no + # earlier instant than the mtime reported by self.stat(). + self.stat(path) + + try: + data, md5hash = read_with_python_encoding(path, self.pyversion) + except Exception as err: + self.read_error_cache[path] = err + raise + self.read_cache[path] = data + self.hash_cache[path] = md5hash + return data + def md5(self, path: str) -> str: if path not in self.hash_cache: self.read_with_python_encoding(path) diff --git a/mypy/main.py b/mypy/main.py index cdb3cb802118..6a74f3c9be00 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -548,6 +548,7 @@ def add_invertible_flag(flag: str, targets = create_source_list(special_opts.files, options) return targets, options + # TODO: use a FileSystemCache for this def create_source_list(files: Sequence[str], options: Options) -> List[BuildSource]: targets = []