Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stubtest script to compare imported module and stub #3036

Merged
merged 6 commits into from
Apr 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 27 additions & 17 deletions scripts/dumpmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import sys
import types
from typing import Text
from collections import defaultdict


if sys.version_info >= (3, 0):
Expand All @@ -32,16 +33,26 @@ def module_to_json(m):
result = {}
for name, value in m.__dict__.items():
# Filter out some useless attributes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary empty line

if name in ('__file__',
'__doc__',
'__name__',
'__builtins__',
'__package__'):
continue

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary empty line

if name == '__all__':
result[name] = sorted(value)
result[name] = {'type': 'list', 'values': sorted(value)}
else:
result[name] = dump_value(value)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary empty line

try:
_, line = inspect.getsourcelines(getattr(m, name))
except (TypeError, OSError):
line = None

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary empty line

result[name]['line'] = line

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary empty line

return result


Expand All @@ -53,25 +64,25 @@ def dump_value(value, depth=0):
if inspect.isfunction(value):
return dump_function(value)
if callable(value):
return 'callable' # TODO more information
return {'type': 'callable'} # TODO more information
if isinstance(value, types.ModuleType):
return 'module' # TODO module name
return {'type': 'module'} # TODO module name
if inspect.isdatadescriptor(value):
return 'datadescriptor'
return {'type': 'datadescriptor'}

if inspect.ismemberdescriptor(value):
return 'memberdescriptor'
return {'type': 'memberdescriptor'}
return dump_simple(value)


def dump_simple(value):
if type(value) in (int, bool, float, str, bytes, Text, long, list, set, dict, tuple):
return type(value).__name__
return {'type': type(value).__name__}
if value is None:
return 'None'
return {'type': 'None'}
if value is inspect.Parameter.empty:
return None
return 'unknown'
return {'type': None} # 'None' and None: Ruh-Roh
return {'type': 'unknown'}


def dump_class(value, depth):
Expand All @@ -92,8 +103,9 @@ def dump_class(value, depth):
]


# Change to return a dict
def dump_attrs(d, depth):
result = []
result = {}
seen = set()
try:
mro = d.mro()
Expand All @@ -103,11 +115,11 @@ def dump_attrs(d, depth):
v = vars(base)
for name, value in v.items():
if name not in seen:
result.append([name, dump_value(value, depth + 1)])
result[name] = dump_value(value, depth + 1)
seen.add(name)
for m in special_methods:
if hasattr(d, m) and m not in seen:
result.append([m, dump_value(getattr(d, m), depth + 1)])
result[m] = dump_value(getattr(d, m), depth + 1)
return result


Expand All @@ -133,14 +145,12 @@ def dump_function(value):
sig = inspect.signature(value)
except ValueError:
# The signature call sometimes fails for some reason.
return 'invalid_signature'
return {'type': 'invalid_signature'}
params = list(sig.parameters.items())
return {
'type': 'function',
'args': [(name,
param_kind(p),
dump_simple(p.default))
for name, p in params],
'args': [(name, param_kind(p), dump_simple(p.default))
for name, p in params],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No change here, so can keep the original format to reduce diffs?

}


Expand Down
239 changes: 157 additions & 82 deletions scripts/stubtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,126 +5,201 @@
"""

import importlib
import json
import subprocess
import sys
from typing import Dict, Any
from collections import defaultdict
from typing import Dict, Any, List
from collections import defaultdict, namedtuple

from mypy import build
from mypy.build import default_data_dir, default_lib_path, find_modules_recursive
from mypy.errors import CompileError
from mypy.nodes import MypyFile, TypeInfo, FuncItem
from mypy import nodes
from mypy.options import Options

import dumpmodule

skipped = {
if sys.version_info < (3, 4):
from singledispatch import singledispatch
else:
from functools import singledispatch

# TODO: email.contentmanager has a symbol table with a None node.
# This seems like it should not be.

skip = {
'_importlib_modulespec',
'_subprocess',
'distutils.command.bdist_msi',
'distutils.command.bdist_packager',
'msvcrt',
'wsgiref.types',
'mypy_extensions',
'unittest.mock', # mock.call infinite loops on inspect.getsourcelines
# https://bugs.python.org/issue25532
# TODO: can we filter only call?
}

messages = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use constants like NOT_IN_RUNTIME = ('{error.stub_type} ...., instead of dictionary; that way IDE auto-completes and interpreter will identify a typo right away.

'not_in_runtime': ('{error.stub_type} "{error.name}" defined at line '
' {error.line} in stub but is not defined at runtime'),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If line is None, you might want to suppress output of at line None

'not_in_stub': ('{error.module_type} "{error.name}" defined at line'
' {error.line} at runtime but is not defined in stub'),
'no_stubs': 'could not find typeshed {error.name}',
'inconsistent': ('"{error.name}" is {error.stub_type} in stub but'
' {error.module_type} at runtime'),
}

Error = namedtuple('Error', (
'module',
'name',
'error_type',
'line',
'stub_type',
'module_type'))

class Errors:
def __init__(self, id):
self.id = id
self.num_errors = 0

def fail(self, msg):
print('{}: {}'.format(self.id, msg))
self.num_errors += 1
def test_stub(name: str):
stubs = {
mod: stub for mod, stub in build_stubs(name).items()
if (mod == name or mod.startswith(name + '.')) and mod not in skip
}

for mod, stub in stubs.items():
instance = dump_module(mod)

def test_stub(id: str) -> None:
result = build_stubs(id)
verify_stubs(result.files, prefix=id)
for identifiers, *error in verify(stub, instance):
yield Error(mod, '.'.join(identifiers), *error)


def verify_stubs(files: Dict[str, MypyFile], prefix: str) -> None:
for id, node in files.items():
if not (id == prefix or id.startswith(prefix + '.')):
# Not one of the target modules
continue
if id in skipped:
# There's some issue with processing this module; skip for now
continue
dumped = dump_module(id)
verify_stub(id, node.names, dumped)
@singledispatch
def verify(node, module_node):
raise TypeError('unknown mypy node ' + str(node))


# symbols is typeshed, dumped is runtime
def verify_stub(id, symbols, dumped):
errors = Errors(id)
symbols = defaultdict(lambda: None, symbols)
dumped = defaultdict(lambda: None, dumped)

all_symbols = {
name: (symbols[name], dumped[name])
for name in (set(symbols) | set(dumped))
if not name.startswith('_') # private attributes
and (symbols[name] is None or symbols[name].module_public)
}
@verify.register(nodes.MypyFile)
def verify_mypyfile(stub, instance):
if instance is None:
yield [], 'not_in_runtime', stub.line, type(stub), None
elif instance['type'] != 'file':
yield [], 'inconsistent', stub.line, type(stub), instance['type']
else:
stub_children = defaultdict(lambda: None, stub.names)
instance_children = defaultdict(lambda: None, instance['names'])

# TODO: I would rather not filter public children here.
# For example, what if the checkersurfaces an inconsistency
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

space after 'checker'

# in the typing of a private child
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But if private children are not supposed to be used outside the module, is it worth trying to add them to the stub, especially since some of them may be very tricky to type correctly?

public_nodes = {
name: (stub_children[name], instance_children[name])
for name in set(stub_children) | set(instance_children)
if not name.startswith('_')
and (stub_children[name] is None or stub_children[name].module_public)
}

for node, (stub_child, instance_child) in public_nodes.items():
stub_child = getattr(stub_child, 'node', None)
for identifiers, *error in verify(stub_child, instance_child):
yield ([node] + identifiers, *error)

@verify.register(nodes.TypeInfo)
def verify_typeinfo(stub, instance):
if not instance:
yield [], 'not_in_runtime', stub.line, type(stub), None
elif instance['type'] != 'class':
yield [], 'inconsistent', stub.line, type(stub), instance['type']
else:
for attr, attr_node in stub.names.items():
subdump = instance['attributes'].get(attr, None)
for identifiers, *error in verify(attr_node.node, subdump):
yield ([attr] + identifiers, *error)


@verify.register(nodes.FuncItem)
def verify_funcitem(stub, instance):
if not instance:
yield [], 'not_in_runtime', stub.line, type(stub), None
elif 'type' not in instance or instance['type'] not in ('function', 'callable'):
yield [], 'inconsistent', stub.line, type(stub), instance['type']
# TODO check arguments and return value


@verify.register(type(None))
def verify_none(stub, instance):
if instance is None:
yield [], 'not_in_stub', None, None, None
else:
yield [], 'not_in_stub', instance['line'], None, instance['type']

for name, (typeshed, runtime) in all_symbols.items():
if runtime is None:
errors.fail('"{}" defined in stub but not at runtime'.format(name))
elif typeshed is None:
errors.fail('"{}" defined at runtime but not in stub'.format(name))
else:
verify_node(name, typeshed, runtime, errors)


def verify_node(name, node, dump, errors):
if isinstance(node.node, TypeInfo):
if not isinstance(dump, dict) or dump['type'] != 'class':
errors.fail('"{}" is a class in stub but not at runtime'.format(name))
return
all_attrs = {x[0] for x in dump['attributes']}
for attr, attr_node in node.node.names.items():
if isinstance(attr_node.node, FuncItem) and attr not in all_attrs:
errors.fail(
('"{}.{}" defined as a method in stub but not defined '
'at runtime in class object').format(
name, attr))
# TODO other kinds of nodes


def dump_module(id: str) -> Dict[str, Any]:
try:
o = subprocess.check_output(
['python', 'scripts/dumpmodule.py', id])
except subprocess.CalledProcessError:
print('Failure to dump module contents of "{}"'.format(id))
sys.exit(1)
return json.loads(o.decode('ascii'))

@verify.register(nodes.Var)
def verify_var(node, module_node):
if False:
yield None
# Need to check if types are inconsistent.
#if 'type' not in dump or dump['type'] != node.node.type:
# import ipdb; ipdb.set_trace()
# yield name, 'inconsistent', node.node.line, shed_type, module_type

def build_stubs(id):

@verify.register(nodes.OverloadedFuncDef)
def verify_overloadedfuncdef(node, module_node):
# Should check types of the union of the overloaded types.
if False:
yield None


@verify.register(nodes.TypeVarExpr)
def verify_typevarexpr(node, module_node):
if False:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need if False: yield None here and in several other places? Why not just pass?

yield None


@verify.register(nodes.Decorator)
def verify_decorator(node, module_noode):
if False:
yield None


def dump_module(name: str) -> Dict[str, Any]:
mod = importlib.import_module(name)
return {'type': 'file', 'names': dumpmodule.module_to_json(mod)}


def build_stubs(mod):
data_dir = default_data_dir(None)
options = Options()
options.python_version = (3, 6)
lib_path = default_lib_path(data_dir,
options.python_version,
custom_typeshed_dir=None)
sources = find_modules_recursive(id, lib_path)
if not sources:
sys.exit('Error: Cannot find module {}'.format(repr(id)))
Copy link
Contributor

@pkch pkch Apr 11, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it no longer necessary to check for empty sources?

msg = []
sources = find_modules_recursive(mod, lib_path)
try:
res = build.build(sources=sources,
options=options)
msg = res.errors
except CompileError as e:
msg = e.messages
if msg:
for m in msg:
print(m)
messages = res.errors
except CompileError as error:
messages = error.messages

if messages:
for msg in messages:
print(msg)
sys.exit(1)
return res
return res.files


def main(args):
if len(args) == 1:
print('must provide at least one module to test')
sys.exit(1)
else:
modules = args[1:]

for module in modules:
for error in test_stub(module):
yield error


if __name__ == '__main__':
test_stub(sys.argv[1])

for err in main(sys.argv):
print(messages[err.error_type].format(error=err))