stubtest script to compare imported module and stub (python#3036)

* test_stub now returns a list of errors Error defined as NamedTuple(name, error_type, message). stubtest no longer errors when there is no stubfile * Make stubtest runnable from any directory Instead of calling the dumpmodule script, do a relative import and call the function directly. * record line number in Error object, blacklist unittest.mock unittest.mock problem related to https://bugs.python.org/issue25532 * record types in Error object, make verify_node recursive also changed dump['attrs'] to a dict * Remove '--all_stdlib' flag from stubtest. doing this automatically depends on module stdlib_list, which is not suitable for master branch. * Refactored verify_node to single dispatch recursion
JelleZijlstra · Apr 30, 2017 · f07c8be · f07c8be
1 parent eb39308
commit f07c8be
Show file tree

Hide file tree

Showing 2 changed files with 184 additions and 99 deletions.
diff --git a/scripts/dumpmodule.py b/scripts/dumpmodule.py
@@ -12,6 +12,7 @@
 import sys
 import types
 from typing import Text
+from collections import defaultdict
 
 
 if sys.version_info >= (3, 0):
@@ -32,16 +33,26 @@ def module_to_json(m):
     result = {}
     for name, value in m.__dict__.items():
         # Filter out some useless attributes.
+
         if name in ('__file__',
                     '__doc__',
                     '__name__',
                     '__builtins__',
                     '__package__'):
             continue
+
         if name == '__all__':
-            result[name] = sorted(value)
+            result[name] = {'type': 'list', 'values': sorted(value)}
         else:
             result[name] = dump_value(value)
+
+        try:
+            _, line = inspect.getsourcelines(getattr(m, name))
+        except (TypeError, OSError):
+            line = None
+
+        result[name]['line'] = line
+
     return result
 
 
@@ -53,25 +64,25 @@ def dump_value(value, depth=0):
     if inspect.isfunction(value):
         return dump_function(value)
     if callable(value):
-        return 'callable'  # TODO more information
+        return {'type': 'callable'}  # TODO more information
     if isinstance(value, types.ModuleType):
-        return 'module'  # TODO module name
+        return {'type': 'module'}  # TODO module name
     if inspect.isdatadescriptor(value):
-        return 'datadescriptor'
+        return {'type': 'datadescriptor'}
 
     if inspect.ismemberdescriptor(value):
-        return 'memberdescriptor'
+        return {'type': 'memberdescriptor'}
     return dump_simple(value)
 
 
 def dump_simple(value):
     if type(value) in (int, bool, float, str, bytes, Text, long, list, set, dict, tuple):
-        return type(value).__name__
+        return {'type': type(value).__name__}
     if value is None:
-        return 'None'
+        return {'type': 'None'}
     if value is inspect.Parameter.empty:
-        return None
-    return 'unknown'
+        return {'type': None}  # 'None' and None: Ruh-Roh
+    return {'type': 'unknown'}
 
 
 def dump_class(value, depth):
@@ -92,8 +103,9 @@ def dump_class(value, depth):
 ]
 
 
+# Change to return a dict
 def dump_attrs(d, depth):
-    result = []
+    result = {}
     seen = set()
     try:
         mro = d.mro()
@@ -103,11 +115,11 @@ def dump_attrs(d, depth):
         v = vars(base)
         for name, value in v.items():
             if name not in seen:
-                result.append([name, dump_value(value, depth + 1)])
+                result[name] = dump_value(value, depth + 1)
                 seen.add(name)
     for m in special_methods:
         if hasattr(d, m) and m not in seen:
-            result.append([m, dump_value(getattr(d, m), depth + 1)])
+            result[m] = dump_value(getattr(d, m), depth + 1)
     return result
 
 
@@ -133,14 +145,12 @@ def dump_function(value):
         sig = inspect.signature(value)
     except ValueError:
         # The signature call sometimes fails for some reason.
-        return 'invalid_signature'
+        return {'type': 'invalid_signature'}
     params = list(sig.parameters.items())
     return {
         'type': 'function',
-        'args': [(name,
-             param_kind(p),
-             dump_simple(p.default))
-            for name, p in params],
+        'args': [(name, param_kind(p), dump_simple(p.default))
+                 for name, p in params],
     }
 
 

diff --git a/scripts/stubtest.py b/scripts/stubtest.py
@@ -5,126 +5,201 @@
 """
 
 import importlib
-import json
-import subprocess
 import sys
-from typing import Dict, Any
-from collections import defaultdict
+from typing import Dict, Any, List
+from collections import defaultdict, namedtuple
 
 from mypy import build
 from mypy.build import default_data_dir, default_lib_path, find_modules_recursive
 from mypy.errors import CompileError
-from mypy.nodes import MypyFile, TypeInfo, FuncItem
+from mypy import nodes
 from mypy.options import Options
 
+import dumpmodule
 
-skipped = {
+if sys.version_info < (3, 4):
+    from singledispatch import singledispatch
+else:
+    from functools import singledispatch
+
+# TODO: email.contentmanager has a symbol table with a None node.
+#       This seems like it should not be.
+
+skip = {
     '_importlib_modulespec',
     '_subprocess',
     'distutils.command.bdist_msi',
     'distutils.command.bdist_packager',
     'msvcrt',
     'wsgiref.types',
+    'mypy_extensions',
+    'unittest.mock',  # mock.call infinite loops on inspect.getsourcelines
+                      # https://bugs.python.org/issue25532
+                      # TODO: can we filter only call?
+}
+
+messages = {
+    'not_in_runtime': ('{error.stub_type} "{error.name}" defined at line '
+                       ' {error.line} in stub but is not defined at runtime'),
+    'not_in_stub': ('{error.module_type} "{error.name}" defined at line'
+                    ' {error.line} at runtime but is not defined in stub'),
+    'no_stubs': 'could not find typeshed {error.name}',
+    'inconsistent': ('"{error.name}" is {error.stub_type} in stub but'
+                     ' {error.module_type} at runtime'),
 }
 
+Error = namedtuple('Error', (
+    'module',
+    'name',
+    'error_type',
+    'line',
+    'stub_type',
+    'module_type'))
 
-class Errors:
-    def __init__(self, id):
-        self.id = id
-        self.num_errors = 0
 
-    def fail(self, msg):
-        print('{}: {}'.format(self.id, msg))
-        self.num_errors += 1
+def test_stub(name: str):
+    stubs = {
+        mod: stub for mod, stub in build_stubs(name).items()
+        if (mod == name or mod.startswith(name + '.')) and mod not in skip
+    }
 
+    for mod, stub in stubs.items():
+        instance = dump_module(mod)
 
-def test_stub(id: str) -> None:
-    result = build_stubs(id)
-    verify_stubs(result.files, prefix=id)
+        for identifiers, *error in verify(stub, instance):
+            yield Error(mod, '.'.join(identifiers), *error)
 
 
-def verify_stubs(files: Dict[str, MypyFile], prefix: str) -> None:
-    for id, node in files.items():
-        if not (id == prefix or id.startswith(prefix + '.')):
-            # Not one of the target modules
-            continue
-        if id in skipped:
-            # There's some issue with processing this module; skip for now
-            continue
-        dumped = dump_module(id)
-        verify_stub(id, node.names, dumped)
+@singledispatch
+def verify(node, module_node):
+    raise TypeError('unknown mypy node ' + str(node))
 
 
-# symbols is typeshed, dumped is runtime
-def verify_stub(id, symbols, dumped):
-    errors = Errors(id)
-    symbols = defaultdict(lambda: None, symbols)
-    dumped = defaultdict(lambda: None, dumped)
 
-    all_symbols = {
-        name: (symbols[name], dumped[name])
-        for name in (set(symbols) | set(dumped))
-        if not name.startswith('_')  # private attributes
-        and (symbols[name] is None or symbols[name].module_public)
-    }
+@verify.register(nodes.MypyFile)
+def verify_mypyfile(stub, instance):
+    if instance is None:
+        yield [], 'not_in_runtime', stub.line, type(stub), None
+    elif instance['type'] != 'file':
+        yield [], 'inconsistent', stub.line, type(stub), instance['type']
+    else:
+        stub_children = defaultdict(lambda: None, stub.names)
+        instance_children = defaultdict(lambda: None, instance['names'])
+
+        # TODO: I would rather not filter public children here.
+        #       For example, what if the checkersurfaces an inconsistency
+        #       in the typing of a private child
+        public_nodes = {
+            name: (stub_children[name], instance_children[name])
+            for name in set(stub_children) | set(instance_children)
+            if not name.startswith('_')
+            and (stub_children[name] is None or stub_children[name].module_public)
+        }
+
+        for node, (stub_child, instance_child) in public_nodes.items():
+            stub_child = getattr(stub_child, 'node', None)
+            for identifiers, *error in verify(stub_child, instance_child):
+                yield ([node] + identifiers, *error)
+
+@verify.register(nodes.TypeInfo)
+def verify_typeinfo(stub, instance):
+    if not instance:
+        yield [], 'not_in_runtime', stub.line, type(stub), None
+    elif instance['type'] != 'class':
+        yield [], 'inconsistent', stub.line, type(stub), instance['type']
+    else:
+        for attr, attr_node in stub.names.items():
+            subdump = instance['attributes'].get(attr, None)
+            for identifiers, *error in verify(attr_node.node, subdump):
+                yield ([attr] + identifiers, *error)
+
+
+@verify.register(nodes.FuncItem)
+def verify_funcitem(stub, instance):
+    if not instance:
+        yield [], 'not_in_runtime', stub.line, type(stub), None
+    elif 'type' not in instance or instance['type'] not in ('function', 'callable'):
+        yield [], 'inconsistent', stub.line, type(stub), instance['type']
+    # TODO check arguments and return value
+
+
+@verify.register(type(None))
+def verify_none(stub, instance):
+    if instance is None:
+        yield [], 'not_in_stub', None, None, None
+    else:
+        yield [], 'not_in_stub', instance['line'], None, instance['type']
 
-    for name, (typeshed, runtime) in all_symbols.items():
-        if runtime is None:
-            errors.fail('"{}" defined in stub but not at runtime'.format(name))
-        elif typeshed is None:
-            errors.fail('"{}" defined at runtime but not in stub'.format(name))
-        else:
-            verify_node(name, typeshed, runtime, errors)
-
-
-def verify_node(name, node, dump, errors):
-    if isinstance(node.node, TypeInfo):
-        if not isinstance(dump, dict) or dump['type'] != 'class':
-            errors.fail('"{}" is a class in stub but not at runtime'.format(name))
-            return
-        all_attrs = {x[0] for x in dump['attributes']}
-        for attr, attr_node in node.node.names.items():
-            if isinstance(attr_node.node, FuncItem) and attr not in all_attrs:
-                errors.fail(
-                    ('"{}.{}" defined as a method in stub but not defined '
-                     'at runtime in class object').format(
-                        name, attr))
-    # TODO other kinds of nodes
-
-
-def dump_module(id: str) -> Dict[str, Any]:
-    try:
-        o = subprocess.check_output(
-            ['python', 'scripts/dumpmodule.py', id])
-    except subprocess.CalledProcessError:
-        print('Failure to dump module contents of "{}"'.format(id))
-        sys.exit(1)
-    return json.loads(o.decode('ascii'))
 
+@verify.register(nodes.Var)
+def verify_var(node, module_node):
+    if False:
+        yield None
+    # Need to check if types are inconsistent.
+    #if 'type' not in dump or dump['type'] != node.node.type:
+    #    import ipdb; ipdb.set_trace()
+    #    yield name, 'inconsistent', node.node.line, shed_type, module_type
 
-def build_stubs(id):
+
+@verify.register(nodes.OverloadedFuncDef)
+def verify_overloadedfuncdef(node, module_node):
+    # Should check types of the union of the overloaded types.
+    if False:
+        yield None
+
+
+@verify.register(nodes.TypeVarExpr)
+def verify_typevarexpr(node, module_node):
+    if False:
+        yield None
+
+
+@verify.register(nodes.Decorator)
+def verify_decorator(node, module_noode):
+    if False:
+        yield None
+
+
+def dump_module(name: str) -> Dict[str, Any]:
+    mod = importlib.import_module(name)
+    return {'type': 'file', 'names': dumpmodule.module_to_json(mod)}
+
+
+def build_stubs(mod):
     data_dir = default_data_dir(None)
     options = Options()
     options.python_version = (3, 6)
     lib_path = default_lib_path(data_dir,
                                 options.python_version,
                                 custom_typeshed_dir=None)
-    sources = find_modules_recursive(id, lib_path)
-    if not sources:
-        sys.exit('Error: Cannot find module {}'.format(repr(id)))
-    msg = []
+    sources = find_modules_recursive(mod, lib_path)
     try:
         res = build.build(sources=sources,
                           options=options)
-        msg = res.errors
-    except CompileError as e:
-        msg = e.messages
-    if msg:
-        for m in msg:
-            print(m)
+        messages = res.errors
+    except CompileError as error:
+        messages = error.messages
+
+    if messages:
+        for msg in messages:
+            print(msg)
         sys.exit(1)
-    return res
+    return res.files
+
+
+def main(args):
+    if len(args) == 1:
+        print('must provide at least one module to test')
+        sys.exit(1)
+    else:
+        modules = args[1:]
+
+    for module in modules:
+        for error in test_stub(module):
+            yield error
 
 
 if __name__ == '__main__':
-    test_stub(sys.argv[1])
+
+    for err in main(sys.argv):
+        print(messages[err.error_type].format(error=err))