From ef9254221f1aaad61b188affe946b3f223adab4c Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 30 Jan 2019 00:26:42 +0000 Subject: [PATCH] Improve CLI, refactor and document stubgen (#6256) This PR does several "infrastructure" changes to `stubgen` tool: * Update CLI and source discovery/collection to match the `mypy` CLI * Refactor the logic in `stubgen.main()` into independent functions * Use semantically analyzed Python ASTs * Separate inference of signatures from docs to a separate module `stubdoc.py` * Move some functions from `stubgen.py` to `stubutil.py` and reorganize the latter * Clean-up the test runner to make more use of `DataSuite` * Add documentation for the `stubgen` script This also does few smaller things: * Minimize number of repeated hard-coded constants * Removed duplicate TODOs * Added dosctrings to `stubgenc.py` and (new) `stubdoc.py` * Avoid mutable defaults This is not a pure refactoring, turning the semantic analysis on required some (although relatively small) changes in logic (because the sources should be semantically analyzed as a whole). It also required couple minor changes in `semanal.py` and `build.py`. --- docs/source/index.rst | 1 + docs/source/stubgen.rst | 143 +++++++ mypy/build.py | 3 +- mypy/newsemanal/semanal.py | 8 +- mypy/nodes.py | 3 + mypy/semanal.py | 8 +- mypy/stubdoc.py | 312 +++++++++++++++ mypy/stubgen.py | 741 +++++++++++++++++++++--------------- mypy/stubgenc.py | 77 ++-- mypy/stubutil.py | 398 +++++++------------ mypy/test/teststubgen.py | 338 ++++++++-------- test-data/unit/stubgen.test | 205 +++++++++- 12 files changed, 1458 insertions(+), 779 deletions(-) create mode 100644 docs/source/stubgen.rst create mode 100644 mypy/stubdoc.py diff --git a/docs/source/index.rst b/docs/source/index.rst index 78f401d2d438..efd0d7d41ff3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -53,6 +53,7 @@ Mypy is a static type checker for Python 3 and Python 2.7. mypy_daemon installed_packages extending_mypy + stubgen .. toctree:: :maxdepth: 2 diff --git a/docs/source/stubgen.rst b/docs/source/stubgen.rst new file mode 100644 index 000000000000..6bcbdc2b49d5 --- /dev/null +++ b/docs/source/stubgen.rst @@ -0,0 +1,143 @@ +.. _stugen: + +Automatic stub generation +========================= + +Stub files (see `PEP 484 `_) +are files containing only type hints not the actual runtime implementation. +They can be useful for C extension modules, third-party modules whose authors +have not yet added type hints, etc. + +Mypy comes with a ``stubgen`` tool for automatic generation of +stub files (``.pyi`` files) from Python source files. For example, +this source file: + +.. code-block:: python + + from other_module import dynamic + + BORDER_WIDTH = 15 + + class Window: + parent = dynamic() + def __init__(self, width, hight): + self.width = width + self.hight = hight + + def create_empty() -> Window: + return Window(0, 0) + +will be transformed into the following stub file: + +.. code-block:: python + + from typing import Any + + BORDER_WIDTH: int = ... + + class Window: + parent: Any = ... + width: Any = ... + height: Any: ... + def __init__(self, width, height) -> None: ... + + def create_empty() -> Window: ... + +In most cases, the auto-generated stub files require manual check for +completeness. This section documents stubgen's command line interface. +You can view a quick summary of the available flags by running +``stubgen --help``. + +.. note:: + + Stubgen tool is still experimental and will evolve. Command line flags + are liable to change between releases. + +Specifying what to stub +*********************** + +By default, you can specify for what code you want to generate +stub files by passing in the paths to the sources:: + + $ stubgen foo.py bar.py some_directory + +Note that directories are checked recursively. + +Stubgen also lets you specify modules for stub generation in two +other ways. The relevant flags are: + +``-m MODULE``, ``--module MODULE`` + Asks stubgen to generate stub file for the provided module. This flag + may be repeated multiple times. + + Stubgen *will not* recursively generate stubs for any submodules of + the provided module. + +``-p PACKAGE``, ``--package PACKAGE`` + Asks stubgen to generate stubs for the provided package. This flag may + be repeated multiple times. + + Stubgen *will* recursively generate stubs for all submodules of + the provided package. This flag is identical to ``--module`` apart from + this behavior. + +.. note:: + + You can use either module/package mode or source code mode, these two + can't be mixed together in the same stubgen invocation. + +Specifying how to generate stubs +******************************** + +By default stubgen will try to import the modules and packages given. +This has an advantage of possibility to discover and stub also C modules. +By default stubgen will use mypy to semantically analyze the Python +sources found. To alter this behavior, you can use following flags: + +``--no-import`` + Don't try to import modules, instead use mypy's normal mechanisms to find + sources. This will not find any C extension modules. Stubgen also uses + runtime introspection to find actual value of ``__all__``, so with this flag + the set of re-exported names may be incomplete. This flag will be useful if + importing the module causes an error. + +``--parse-only`` + Don't perform mypy semantic analysis of source files. This may generate + worse stubs: in particular some module, class, and function aliases may + be typed as variables with ``Any`` type. This can be useful if semantic + analysis causes a critical mypy error. + +``--doc-dir PATH`` + Try to infer function and class signatures by parsing .rst documentation + in ``PATH``. This may result in better stubs, but currently only works for + C modules. + +Additional flags +**************** + +``--py2`` + Run stubgen in Python 2 mode (the default is Python 3 mode). + +``--ignore-errors`` + Ignore any errors when trying to generate stubs for modules and packages. + This may be useful for C modules where runtime introspection is used + intensively. + +``--include-private`` + Generate stubs for objects and members considered private (with single + leading underscore and no trailing underscores). + +``--search-path PATH`` + Specify module search directories, separated by colons (currently only + used if ``--no-import`` is given). + +``--python-executable PATH`` + Use Python interpreter at ``PATH`` for module finding and runtime + introspection (has no effect with ``--no-import``). Currently only works + for Python 2. In Python 3 mode only the default interpreter will be used. + +``-o PATH``, ``--output PATH`` + Change the output directory. By default the stubs are written in + ``./out`` directory. The output directory will be created if it didn't + exist. Existing stubs in the output directory will be overwritten without + warning. diff --git a/mypy/build.py b/mypy/build.py index 17ff2a8687f2..0201cd2c2186 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1463,7 +1463,8 @@ def __init__(self, # as error reporting should be avoided. temporary: bool = False, ) -> None: - assert id or path or source is not None, "Neither id, path nor source given" + if not temporary: + assert id or path or source is not None, "Neither id, path nor source given" self.manager = manager State.order_counter += 1 self.order = State.order_counter diff --git a/mypy/newsemanal/semanal.py b/mypy/newsemanal/semanal.py index 06fec7473d78..03bf9ec4994e 100644 --- a/mypy/newsemanal/semanal.py +++ b/mypy/newsemanal/semanal.py @@ -245,7 +245,9 @@ def __init__(self, self.incomplete_namespaces = incomplete_namespaces self.postpone_nested_functions_stack = [FUNCTION_BOTH_PHASES] self.postponed_functions_stack = [] - self.all_exports = set() # type: Set[str] + self.all_exports = [] # type: List[str] + # Map from module id to list of explicitly exported names (i.e. names in __all__). + self.export_map = {} # type: Dict[str, List[str]] self.plugin = plugin # If True, process function definitions. If False, don't. This is used # for processing module top levels in fine-grained incremental mode. @@ -317,6 +319,8 @@ def visit_file(self, file_node: MypyFile, fnam: str, options: Options, if name not in self.all_exports: g.module_public = False + self.export_map[self.cur_mod_id] = self.all_exports + self.all_exports = [] del self.options del self.patches del self.cur_mod_node @@ -3899,7 +3903,7 @@ def add_exports(self, exp_or_exps: Union[Iterable[Expression], Expression]) -> N exps = [exp_or_exps] if isinstance(exp_or_exps, Expression) else exp_or_exps for exp in exps: if isinstance(exp, StrExpr): - self.all_exports.add(exp.value) + self.all_exports.append(exp.value) def check_no_global(self, n: str, ctx: Context, is_overloaded_func: bool = False) -> None: diff --git a/mypy/nodes.py b/mypy/nodes.py index 1990440c00d0..66ca58714f8b 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -652,6 +652,8 @@ class Decorator(SymbolNode, Statement): func = None # type: FuncDef # Decorated function decorators = None # type: List[Expression] # Decorators (may be empty) + # Some decorators are removed by semanal, keep the original here. + original_decorators = None # type: List[Expression] # TODO: This is mostly used for the type; consider replacing with a 'type' attribute var = None # type: Var # Represents the decorated function obj is_overload = False @@ -661,6 +663,7 @@ def __init__(self, func: FuncDef, decorators: List[Expression], super().__init__() self.func = func self.decorators = decorators + self.original_decorators = decorators.copy() self.var = var self.is_overload = False diff --git a/mypy/semanal.py b/mypy/semanal.py index bb2ef45a5531..50ffcf53ef2e 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -242,7 +242,9 @@ def __init__(self, self.missing_modules = missing_modules self.postpone_nested_functions_stack = [FUNCTION_BOTH_PHASES] self.postponed_functions_stack = [] - self.all_exports = set() # type: Set[str] + self.all_exports = [] # type: List[str] + # Map from module id to list of explicitly exported names (i.e. names in __all__). + self.export_map = {} # type: Dict[str, List[str]] self.plugin = plugin # If True, process function definitions. If False, don't. This is used # for processing module top levels in fine-grained incremental mode. @@ -314,6 +316,8 @@ def visit_file(self, file_node: MypyFile, fnam: str, options: Options, if name not in self.all_exports: g.module_public = False + self.export_map[self.cur_mod_id] = self.all_exports + self.all_exports = [] del self.options del self.patches del self.cur_mod_node @@ -3707,7 +3711,7 @@ def add_exports(self, exp_or_exps: Union[Iterable[Expression], Expression]) -> N exps = [exp_or_exps] if isinstance(exp_or_exps, Expression) else exp_or_exps for exp in exps: if isinstance(exp, StrExpr): - self.all_exports.add(exp.value) + self.all_exports.append(exp.value) def check_no_global(self, n: str, ctx: Context, is_overloaded_func: bool = False) -> None: diff --git a/mypy/stubdoc.py b/mypy/stubdoc.py new file mode 100644 index 000000000000..e590821ee441 --- /dev/null +++ b/mypy/stubdoc.py @@ -0,0 +1,312 @@ +"""Parsing/inferring signatures from documentation. + +This module provides several functions to generate better stubs using +docstrings and Sphinx docs (.rst files). +""" +import re +import io +import contextlib +import tokenize + +from typing import ( + Optional, MutableMapping, MutableSequence, List, Sequence, Tuple, NamedTuple, Any +) + +MYPY = False +if MYPY: + from typing_extensions import Final + +# Type alias for signatures strings in format ('func_name', '(arg, opt_arg=False)'). +Sig = Tuple[str, str] + + +class ArgSig: + """Signature info for a single argument.""" + def __init__(self, name: str, type: Optional[str] = None, default: bool = False): + self.name = name + self.type = type + # Does this argument have a default value? + self.default = default + + def __repr__(self) -> str: + return "ArgSig(name={}, type={}, default={})".format(repr(self.name), repr(self.type), + repr(self.default)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, ArgSig): + return (self.name == other.name and self.type == other.type and + self.default == other.default) + return False + + +FunctionSig = NamedTuple('FunctionSig', [ + ('name', str), + ('args', List[ArgSig]), + ('ret_type', str) +]) + + +# States of the docstring parser. +STATE_INIT = 1 # type: Final +STATE_FUNCTION_NAME = 2 # type: Final +STATE_ARGUMENT_LIST = 3 # type: Final +STATE_ARGUMENT_TYPE = 4 # type: Final +STATE_ARGUMENT_DEFAULT = 5 # type: Final +STATE_RETURN_VALUE = 6 # type: Final +STATE_OPEN_BRACKET = 7 # type: Final # For generic types. + + +class DocStringParser: + """Parse function signstures in documentation.""" + def __init__(self, function_name: str) -> None: + # Only search for signatures of function with this name. + self.function_name = function_name + self.state = [STATE_INIT] + self.accumulator = "" + self.arg_type = None # type: Optional[str] + self.arg_name = "" + self.arg_default = None # type: Optional[str] + self.ret_type = "Any" + self.found = False + self.args = [] # type: List[ArgSig] + # Valid signatures found so far. + self.signatures = [] # type: List[FunctionSig] + + def add_token(self, token: tokenize.TokenInfo) -> None: + """Process next token fro the token stream.""" + if (token.type == tokenize.NAME and token.string == self.function_name and + self.state[-1] == STATE_INIT): + self.state.append(STATE_FUNCTION_NAME) + + elif (token.type == tokenize.OP and token.string == '(' and + self.state[-1] == STATE_FUNCTION_NAME): + self.state.pop() + self.accumulator = "" + self.found = True + self.state.append(STATE_ARGUMENT_LIST) + + elif self.state[-1] == STATE_FUNCTION_NAME: + # Reset state, function name not followed by '('. + self.state.pop() + + elif (token.type == tokenize.OP and token.string in ('[', '(', '{') and + self.state[-1] != STATE_INIT): + self.accumulator += token.string + self.state.append(STATE_OPEN_BRACKET) + + elif (token.type == tokenize.OP and token.string in (']', ')', '}') and + self.state[-1] == STATE_OPEN_BRACKET): + self.accumulator += token.string + self.state.pop() + + elif (token.type == tokenize.OP and token.string == ':' and + self.state[-1] == STATE_ARGUMENT_LIST): + self.arg_name = self.accumulator + self.accumulator = "" + self.state.append(STATE_ARGUMENT_TYPE) + + elif (token.type == tokenize.OP and token.string == '=' and + self.state[-1] in (STATE_ARGUMENT_LIST, STATE_ARGUMENT_TYPE)): + if self.state[-1] == STATE_ARGUMENT_TYPE: + self.arg_type = self.accumulator + self.state.pop() + else: + self.arg_name = self.accumulator + self.accumulator = "" + self.state.append(STATE_ARGUMENT_DEFAULT) + + elif (token.type == tokenize.OP and token.string in (',', ')') and + self.state[-1] in (STATE_ARGUMENT_LIST, STATE_ARGUMENT_DEFAULT, + STATE_ARGUMENT_TYPE)): + if self.state[-1] == STATE_ARGUMENT_DEFAULT: + self.arg_default = self.accumulator + self.state.pop() + elif self.state[-1] == STATE_ARGUMENT_TYPE: + self.arg_type = self.accumulator + self.state.pop() + elif self.state[-1] == STATE_ARGUMENT_LIST: + self.arg_name = self.accumulator + + if token.string == ')': + self.state.pop() + self.args.append(ArgSig(name=self.arg_name, type=self.arg_type, + default=bool(self.arg_default))) + self.arg_name = "" + self.arg_type = None + self.arg_default = None + self.accumulator = "" + + elif token.type == tokenize.OP and token.string == '->' and self.state[-1] == STATE_INIT: + self.accumulator = "" + self.state.append(STATE_RETURN_VALUE) + + # ENDMAKER is necessary for python 3.4 and 3.5. + elif (token.type in (tokenize.NEWLINE, tokenize.ENDMARKER) and + self.state[-1] in (STATE_INIT, STATE_RETURN_VALUE)): + if self.state[-1] == STATE_RETURN_VALUE: + self.ret_type = self.accumulator + self.accumulator = "" + self.state.pop() + + if self.found: + self.signatures.append(FunctionSig(name=self.function_name, args=self.args, + ret_type=self.ret_type)) + self.found = False + self.args = [] + self.ret_type = 'Any' + # Leave state as INIT. + else: + self.accumulator += token.string + + def get_signatures(self) -> List[FunctionSig]: + """Return sorted copy of the list of signatures found so far.""" + def has_arg(name: str, signature: FunctionSig) -> bool: + return any(x.name == name for x in signature.args) + + def args_kwargs(signature: FunctionSig) -> bool: + return has_arg('*args', signature) and has_arg('**kwargs', signature) + + # Move functions with (*args, **kwargs) in their signature to last place. + return list(sorted(self.signatures, key=lambda x: 1 if args_kwargs(x) else 0)) + + +def infer_sig_from_docstring(docstr: str, name: str) -> Optional[List[FunctionSig]]: + """Concert function signature to list of TypedFunctionSig + + Look for function signatures of function in docstring. Signature is a string of + the format () -> or perhaps without + the return type. + + Returns empty list, when no signature is found, one signature in typical case, + multiple signatures, if docstring specifies multiple signatures for overload functions. + Return None if the docstring is empty. + + Arguments: + * docstr: docstring + * name: name of function for which signatures are to be found + """ + if not docstr: + return None + + state = DocStringParser(name) + # Return all found signatures, even if there is a parse error after some are found. + with contextlib.suppress(tokenize.TokenError): + for token in tokenize.tokenize(io.BytesIO(docstr.encode('utf-8')).readline): + state.add_token(token) + return state.get_signatures() + + +def infer_arg_sig_from_docstring(docstr: str) -> List[ArgSig]: + """Convert signature in form of "(self: TestClass, arg0: str='ada')" to List[TypedArgList].""" + ret = infer_sig_from_docstring("stub" + docstr, "stub") + if ret: + return ret[0].args + return [] + + +def parse_signature(sig: str) -> Optional[Tuple[str, + List[str], + List[str]]]: + """Split function signature into its name, positional an optional arguments. + + The expected format is "func_name(arg, opt_arg=False)". Return the name of function + and lists of positional and optional argument names. + """ + m = re.match(r'([.a-zA-Z0-9_]+)\(([^)]*)\)', sig) + if not m: + return None + name = m.group(1) + name = name.split('.')[-1] + arg_string = m.group(2) + if not arg_string.strip(): + # Simple case -- no arguments. + return name, [], [] + + args = [arg.strip() for arg in arg_string.split(',')] + positional = [] + optional = [] + i = 0 + while i < len(args): + # Accept optional arguments as in both formats: x=None and [x]. + if args[i].startswith('[') or '=' in args[i]: + break + positional.append(args[i].rstrip('[')) + i += 1 + if args[i - 1].endswith('['): + break + while i < len(args): + arg = args[i] + arg = arg.strip('[]') + arg = arg.split('=')[0] + optional.append(arg) + i += 1 + return name, positional, optional + + +def build_signature(positional: Sequence[str], + optional: Sequence[str]) -> str: + """Build function signature from lists of positional and optional argument names.""" + args = [] # type: MutableSequence[str] + args.extend(positional) + for arg in optional: + if arg.startswith('*'): + args.append(arg) + else: + args.append('%s=...' % arg) + sig = '(%s)' % ', '.join(args) + # Ad-hoc fixes. + sig = sig.replace('(self)', '') + return sig + + +def parse_all_signatures(lines: Sequence[str]) -> Tuple[List[Sig], + List[Sig]]: + """Parse all signatures in a given reST document. + + Return lists of found signatures for functions and classes. + """ + sigs = [] + class_sigs = [] + for line in lines: + line = line.strip() + m = re.match(r'\.\. *(function|method|class) *:: *[a-zA-Z_]', line) + if m: + sig = line.split('::')[1].strip() + parsed = parse_signature(sig) + if parsed: + name, fixed, optional = parsed + if m.group(1) != 'class': + sigs.append((name, build_signature(fixed, optional))) + else: + class_sigs.append((name, build_signature(fixed, optional))) + + return sorted(sigs), sorted(class_sigs) + + +def find_unique_signatures(sigs: Sequence[Sig]) -> List[Sig]: + """Remove names with duplicate found signatures.""" + sig_map = {} # type: MutableMapping[str, List[str]] + for name, sig in sigs: + sig_map.setdefault(name, []).append(sig) + + result = [] + for name, name_sigs in sig_map.items(): + if len(set(name_sigs)) == 1: + result.append((name, name_sigs[0])) + return sorted(result) + + +def infer_prop_type_from_docstring(docstr: str) -> Optional[str]: + """Check for Google/Numpy style docstring type annotation for a property. + + The docstring has the format ": ". + In the type string, we allow the following characters: + * dot: because sometimes classes are annotated using full path + * brackets: to allow type hints like List[int] + * comma/space: things like Tuple[int, int] + """ + if not docstr: + return None + test_str = r'^([a-zA-Z0-9_, \.\[\]]*): ' + m = re.match(test_str, docstr) + return m.group(1) if m else None diff --git a/mypy/stubgen.py b/mypy/stubgen.py index a97ded77cf6b..6bf5c2e683f0 100755 --- a/mypy/stubgen.py +++ b/mypy/stubgen.py @@ -1,34 +1,43 @@ """Generator of dynamically typed draft stubs for arbitrary modules. +The logic of this script can be split in three steps: +* parsing options and finding sources: + - use runtime imports be default (to find also C modules) + - or use mypy's mechanisms, if importing is prohibited +* (optionally) semantically analysing the sources using mypy (as a single set) +* emitting the stubs text: + - for Python modules: from ASTs using StubGenerator + - for C modules using runtime introspection and (optionally) Sphinx docs + +During first and third steps some problematic files can be skipped, but any +blocking error during second step will cause the whole program to stop. + Basic usage: - $ mkdir out - $ stubgen urllib.parse + $ stubgen foo.py bar.py some_directory + => Generate out/foo.pyi, out/bar.pyi, and stubs for some_directory (recursively). + $ stubgen -m urllib.parse => Generate out/urllib/parse.pyi. + $ stubgen -p urllib + => Generate stubs for whole urlib package (recursively). + For Python 2 mode, use --py2: - $ stubgen --py2 textwrap + $ stubgen --py2 -m textwrap For C modules, you can get more precise function signatures by parsing .rst (Sphinx) documentation for extra information. For this, use the --doc-dir option: - $ scripts/stubgen --doc-dir /Python-3.4.2/Doc/library curses - - => Generate out/curses.py. - -Use "stubgen -h" for more help. + $ stubgen --doc-dir /Python-3.4.2/Doc/library -m curses -Note: You should verify the generated stubs manually. +Note: The generated stubs should be verified manually. TODO: - - support stubs for C modules in Python 2 mode - - support non-default Python interpreters in Python 3 mode - - if using --no-import, look for __all__ in the AST - - infer some return types, such as no return statement with value -> None - detect 'if PY2 / is_py2' etc. and either preserve those or only include Python 2 or 3 case + - maybe use .rst docs also for Python modules - maybe export more imported names if there is no __all__ (this affects ssl.SSLError, for example) - a quick and dirty heuristic would be to turn this on if a module has something like 'from x import y as _y' @@ -36,21 +45,15 @@ """ import glob -import importlib -import json import os import os.path -import pkgutil -import inspect -import subprocess import sys -import textwrap import traceback import argparse from collections import defaultdict from typing import ( - Any, List, Dict, Tuple, Iterable, Iterator, Mapping, Optional, NamedTuple, Set, cast + Any, List, Dict, Tuple, Iterable, Mapping, Optional, Set, cast ) import mypy.build @@ -59,184 +62,72 @@ import mypy.traverser import mypy.util from mypy import defaults -from mypy.modulefinder import FindModuleCache, SearchPaths +from mypy.modulefinder import FindModuleCache, SearchPaths, BuildSource, default_lib_path from mypy.nodes import ( - Expression, IntExpr, UnaryExpr, StrExpr, BytesExpr, NameExpr, FloatExpr, MemberExpr, TupleExpr, - ListExpr, ComparisonExpr, CallExpr, IndexExpr, EllipsisExpr, - ClassDef, MypyFile, Decorator, AssignmentStmt, + Expression, IntExpr, UnaryExpr, StrExpr, BytesExpr, NameExpr, FloatExpr, MemberExpr, + TupleExpr, ListExpr, ComparisonExpr, CallExpr, IndexExpr, EllipsisExpr, + ClassDef, MypyFile, Decorator, AssignmentStmt, TypeInfo, IfStmt, ReturnStmt, ImportAll, ImportFrom, Import, FuncDef, FuncBase, TempNode, - ARG_POS, ARG_STAR, ARG_STAR2, ARG_NAMED, ARG_NAMED_OPT, + ARG_POS, ARG_STAR, ARG_STAR2, ARG_NAMED, ARG_NAMED_OPT ) from mypy.stubgenc import generate_stub_for_c_module -from mypy.stubutil import is_c_module, write_header, parse_all_signatures, find_unique_signatures +from mypy.stubutil import ( + write_header, default_py2_interpreter, CantImport, generate_guarded, + walk_packages, find_module_path_and_all_py2, find_module_path_and_all_py3, + report_missing, fail_missing +) +from mypy.stubdoc import parse_all_signatures, find_unique_signatures, Sig from mypy.options import Options as MypyOptions from mypy.types import ( Type, TypeStrVisitor, CallableType, UnboundType, NoneTyp, TupleType, TypeList, ) from mypy.visitor import NodeVisitor +from mypy.find_sources import create_source_list, InvalidSourceList +from mypy.build import build +from mypy.errors import CompileError MYPY = False if MYPY: from typing_extensions import Final -Options = NamedTuple('Options', [('pyversion', Tuple[int, int]), - ('no_import', bool), - ('doc_dir', str), - ('search_path', List[str]), - ('interpreter', str), - ('modules', List[str]), - ('ignore_errors', bool), - ('recursive', bool), - ('include_private', bool), - ('output_dir', str), - ]) - - -class CantImport(Exception): - pass - - -def generate_stub_for_module(module: str, output_dir: str, quiet: bool = False, - add_header: bool = False, sigs: Dict[str, str] = {}, - class_sigs: Dict[str, str] = {}, - pyversion: Tuple[int, int] = defaults.PYTHON3_VERSION, - no_import: bool = False, - search_path: List[str] = [], - interpreter: str = sys.executable, - include_private: bool = False) -> None: - target = module.replace('.', '/') - try: - result = find_module_path_and_all(module=module, - pyversion=pyversion, - no_import=no_import, - search_path=search_path, - interpreter=interpreter) - except CantImport: - if not quiet: - traceback.print_exc() - print('Failed to import %s; skipping it' % module) - return - - if not result: - # C module - target = os.path.join(output_dir, target + '.pyi') - generate_stub_for_c_module(module_name=module, - target=target, - add_header=add_header, - sigs=sigs, - class_sigs=class_sigs) - else: - # Python module - module_path, module_all = result - if os.path.basename(module_path) == '__init__.py': - target += '/__init__.pyi' - else: - target += '.pyi' - target = os.path.join(output_dir, target) - - generate_stub(module_path, output_dir, module_all, - target=target, add_header=add_header, module=module, - pyversion=pyversion, include_private=include_private) - if not quiet: - print('Created %s' % target) +class Options: + """Represents stubgen options. -def find_module_path_and_all(module: str, pyversion: Tuple[int, int], - no_import: bool, - search_path: List[str], - interpreter: str) -> Optional[Tuple[str, - Optional[List[str]]]]: - """Find module and determine __all__. - - Return None if the module is a C module. Return (module_path, __all__) if - Python module. Raise an exception or exit if failed. + This class is mutable to simplify testing. """ - module_path = None # type: Optional[str] - if not no_import: - if pyversion[0] == 2: - module_path, module_all = load_python_module_info(module, interpreter) - else: - # TODO: Support custom interpreters. - try: - mod = importlib.import_module(module) - except Exception: - raise CantImport(module) - if is_c_module(mod): - return None - module_path = mod.__file__ - module_all = getattr(mod, '__all__', None) - else: - # Find module by going through search path. - search_paths = SearchPaths(('.',) + tuple(search_path), (), (), ()) - module_path = FindModuleCache(search_paths).find_module(module) - if not module_path: - raise SystemExit( - "Can't find module '{}' (consider using --search-path)".format(module)) - module_all = None - return module_path, module_all - - -def load_python_module_info(module: str, interpreter: str) -> Tuple[str, Optional[List[str]]]: - """Return tuple (module path, module __all__) for a Python 2 module. - - The path refers to the .py/.py[co] file. The second tuple item is - None if the module doesn't define __all__. - - Exit if the module can't be imported or if it's a C extension module. + def __init__(self, pyversion: Tuple[int, int], no_import: bool, doc_dir: str, + search_path: List[str], interpreter: str, parse_only: bool, ignore_errors: bool, + include_private: bool, output_dir: str, modules: List[str], packages: List[str], + files: List[str]) -> None: + # See parse_options for descriptions of the flags. + self.pyversion = pyversion + self.no_import = no_import + self.doc_dir = doc_dir + self.search_path = search_path + self.interpreter = interpreter + self.decointerpreter = interpreter + self.parse_only = parse_only + self.ignore_errors = ignore_errors + self.include_private = include_private + self.output_dir = output_dir + self.modules = modules + self.packages = packages + self.files = files + + +class StubSource(BuildSource): + """A single source for stub: can be a Python or C module. + + A simple extension of BuildSource that also carries the AST and + the value of __all__ detected at runtime. """ - cmd_template = '{interpreter} -c "%s"'.format(interpreter=interpreter) - code = ("import importlib, json; mod = importlib.import_module('%s'); " - "print(mod.__file__); print(json.dumps(getattr(mod, '__all__', None)))") % module - try: - output_bytes = subprocess.check_output(cmd_template % code, shell=True) - except subprocess.CalledProcessError: - print("Can't import module %s" % module, file=sys.stderr) - sys.exit(1) - output = output_bytes.decode('ascii').strip().splitlines() - module_path = output[0] - if not module_path.endswith(('.py', '.pyc', '.pyo')): - raise SystemExit('%s looks like a C module; they are not supported for Python 2' % - module) - if module_path.endswith(('.pyc', '.pyo')): - module_path = module_path[:-1] - module_all = json.loads(output[1]) - return module_path, module_all - - -def generate_stub(path: str, - output_dir: str, - _all_: Optional[List[str]] = None, - target: Optional[str] = None, - add_header: bool = False, - module: Optional[str] = None, - pyversion: Tuple[int, int] = defaults.PYTHON3_VERSION, - include_private: bool = False - ) -> None: - with open(path, 'rb') as f: - data = f.read() - source = mypy.util.decode_python_encoding(data, pyversion) - options = MypyOptions() - options.python_version = pyversion - try: - ast = mypy.parse.parse(source, fnam=path, module=module, errors=None, options=options) - except mypy.errors.CompileError as e: - # Syntax error! - for m in e.messages: - sys.stderr.write('%s\n' % m) - sys.exit(1) - - gen = StubGenerator(_all_, pyversion=pyversion, include_private=include_private) - ast.accept(gen) - if not target: - target = os.path.join(output_dir, os.path.basename(path)) - subdir = os.path.dirname(target) - if subdir and not os.path.isdir(subdir): - os.makedirs(subdir) - with open(target, 'w') as file: - if add_header: - write_header(file, module, pyversion=pyversion) - file.write(''.join(gen.output())) + def __init__(self, module: str, path: Optional[str] = None, + runtime_all: Optional[List[str]] = None) -> None: + super().__init__(path, module, None) + self.runtime_all = runtime_all + self.ast = None # type: Optional[MypyFile] # What was generated previously in the stub file. We keep track of these to generate @@ -248,18 +139,35 @@ def generate_stub(path: str, VAR = 'VAR' # type: Final NOT_IN_ALL = 'NOT_IN_ALL' # type: Final +# Indicates that we failed to generate a reasonable output +# for a given node. These should be manually replaced by a user. + +ERROR_MARKER = '' # type: Final + class AnnotationPrinter(TypeStrVisitor): + """Visitor used to print existing annotations in a file. + The main difference from TypeStrVisitor is a better treatment of + unbound types. + + Notes: + * This visitor doesn't add imports necessary for annotations, this is done separately + by ImportTracker. + * It can print all kinds of types, but the generated strings may not be valid (notably + callable types) since it prints the same string that reveal_type() does. + * For Instance types it prints the fully qualified names. + """ + # TODO: Generate valid string representation for callable types. + # TODO: Use short names for Instances. def __init__(self, stubgen: 'StubGenerator') -> None: super().__init__() self.stubgen = stubgen def visit_unbound_type(self, t: UnboundType) -> str: s = t.name - base = s.split('.')[0] - self.stubgen.import_tracker.require_name(base) - if t.args != []: + self.stubgen.import_tracker.require_name(s) + if t.args: s += '[{}]'.format(self.list_str(t.args)) return s @@ -271,7 +179,10 @@ def visit_type_list(self, t: TypeList) -> str: class AliasPrinter(NodeVisitor[str]): + """Visitor used to collect type aliases _and_ type variable definitions. + Visit r.h.s of the definition to get the string representation of type alias. + """ def __init__(self, stubgen: 'StubGenerator') -> None: self.stubgen = stubgen super().__init__() @@ -298,6 +209,17 @@ def visit_name_expr(self, node: NameExpr) -> str: self.stubgen.import_tracker.require_name(node.name) return node.name + def visit_member_expr(self, o: MemberExpr) -> str: + node = o # type: Expression + trailer = '' + while isinstance(node, MemberExpr): + trailer = '.' + node.name + trailer + node = node.expr + if not isinstance(node, NameExpr): + return ERROR_MARKER + self.stubgen.import_tracker.require_name(node.name) + return node.name + trailer + def visit_str_expr(self, node: StrExpr) -> str: return repr(node.value) @@ -317,6 +239,7 @@ def visit_ellipsis(self, node: EllipsisExpr) -> str: class ImportTracker: + """Record necessary imports during stub generation.""" def __init__(self) -> None: # module_for['foo'] has the module name where 'foo' was imported from, or None if @@ -360,19 +283,16 @@ def require_name(self, name: str) -> None: self.required_names.add(name.split('.')[0]) def reexport(self, name: str) -> None: - """ - Mark a given non qualified name as needed in __all__. This means that in case it - comes from a module, it should be imported with an alias even is the alias is the same - as the name. + """Mark a given non qualified name as needed in __all__. + This means that in case it comes from a module, it should be + imported with an alias even is the alias is the same as the name. """ self.require_name(name) self.reexports.add(name) def import_lines(self) -> List[str]: - """ - The list of required import lines (as strings with python code) - """ + """The list of required import lines (as strings with python code).""" result = [] # To summarize multiple names imported from a same module, we collect those @@ -414,17 +334,23 @@ def import_lines(self) -> List[str]: class StubGenerator(mypy.traverser.TraverserVisitor): def __init__(self, _all_: Optional[List[str]], pyversion: Tuple[int, int], - include_private: bool = False) -> None: + include_private: bool = False, analyzed: bool = False) -> None: + # Best known value of __all__. self._all_ = _all_ self._output = [] # type: List[str] self._import_lines = [] # type: List[str] + # Current indent level (indent is hardcoded to 4 spaces). self._indent = '' + # Stack of defined variables (per scope). self._vars = [[]] # type: List[List[str]] + # What was generated previously in the stub file. self._state = EMPTY self._toplevel_names = [] # type: List[str] self._pyversion = pyversion self._include_private = include_private self.import_tracker = ImportTracker() + # Was the tree semantically analysed before? + self.analyzed = analyzed # Add imports that could be implicitly generated self.import_tracker.add_import_from("collections", [("namedtuple", None)]) typing_imports = "Any Optional TypeVar".split() @@ -444,7 +370,7 @@ def visit_mypy_file(self, o: MypyFile) -> None: for name in sorted(undefined_names): self.add('# %s\n' % name) - def visit_func_def(self, o: FuncDef) -> None: + def visit_func_def(self, o: FuncDef, is_abstract: bool = False) -> None: if self.is_private_name(o.name()): return if self.is_not_in_all(o.name()): @@ -466,7 +392,8 @@ def visit_func_def(self, o: FuncDef) -> None: var = arg_.variable kind = arg_.kind name = var.name() - annotated_type = o.type.arg_types[i] if isinstance(o.type, CallableType) else None + annotated_type = (o.unanalyzed_type.arg_types[i] + if isinstance(o.unanalyzed_type, CallableType) else None) is_self_arg = i == 0 and name == 'self' is_cls_arg = i == 0 and name == 'cls' if (annotated_type is None @@ -498,9 +425,13 @@ def visit_func_def(self, o: FuncDef) -> None: arg = name + annotation args.append(arg) retname = None - if isinstance(o.type, CallableType): - retname = self.print_annotation(o.type.ret_type) - elif o.name() == '__init__' or not has_return_statement(o): + if isinstance(o.unanalyzed_type, CallableType): + retname = self.print_annotation(o.unanalyzed_type.ret_type) + elif isinstance(o, FuncDef) and o.is_abstract: + # Always assume abstract methods return Any unless explicitly annotated. + retname = 'Any' + self.add_typing_import("Any") + elif o.name() == '__init__' or not has_return_statement(o) and not is_abstract: retname = 'None' retfield = '' if retname is not None: @@ -513,7 +444,8 @@ def visit_func_def(self, o: FuncDef) -> None: def visit_decorator(self, o: Decorator) -> None: if self.is_private_name(o.func.name()): return - for decorator in o.decorators: + is_abstract = False + for decorator in o.original_decorators: if isinstance(decorator, NameExpr): if decorator.name in ('property', 'staticmethod', @@ -523,9 +455,22 @@ def visit_decorator(self, o: Decorator) -> None: 'asyncio.coroutines', 'types'): self.add_coroutine_decorator(o.func, decorator.name, decorator.name) + elif (self.import_tracker.module_for.get(decorator.name) == 'abc' and + (decorator.name == 'abstractmethod' or + self.import_tracker.reverse_alias.get(decorator.name) == 'abstractmethod')): + self.add('%s@%s\n' % (self._indent, decorator.name)) + self.import_tracker.require_name(decorator.name) + is_abstract = True elif isinstance(decorator, MemberExpr): if decorator.name == 'setter' and isinstance(decorator.expr, NameExpr): self.add('%s@%s.setter\n' % (self._indent, decorator.expr.name)) + elif (isinstance(decorator.expr, NameExpr) and + (decorator.expr.name == 'abc' or + self.import_tracker.reverse_alias.get('abc')) and + decorator.name == 'abstractmethod'): + self.import_tracker.require_name(decorator.expr.name) + self.add('%s@%s.%s\n' % (self._indent, decorator.expr.name, decorator.name)) + is_abstract = True elif decorator.name == 'coroutine': if (isinstance(decorator.expr, MemberExpr) and decorator.expr.name == 'coroutines' and @@ -544,7 +489,7 @@ def visit_decorator(self, o: Decorator) -> None: self.add_coroutine_decorator(o.func, decorator.expr.name + '.coroutine', decorator.expr.name) - super().visit_decorator(o) + self.visit_func_def(o.func, is_abstract=is_abstract) def visit_class_def(self, o: ClassDef) -> None: sep = None # type: Optional[int] @@ -555,9 +500,17 @@ def visit_class_def(self, o: ClassDef) -> None: self.record_name(o.name) base_types = self.get_base_types(o) if base_types: - self.add('(%s)' % ', '.join(base_types)) for base in base_types: self.import_tracker.require_name(base) + if isinstance(o.metaclass, (NameExpr, MemberExpr)): + meta = o.metaclass.accept(AliasPrinter(self)) + base_types.append('metaclass=' + meta) + elif self.analyzed and o.info.is_abstract: + base_types.append('metaclass=abc.ABCMeta') + self.import_tracker.add_import('abc') + self.import_tracker.require_name('abc') + if base_types: + self.add('(%s)' % ', '.join(base_types)) self.add(':\n') n = len(self._output) self._indent += ' ' @@ -575,6 +528,7 @@ def visit_class_def(self, o: ClassDef) -> None: self._state = CLASS def get_base_types(self, cdef: ClassDef) -> List[str]: + """Get list of base classes for a class.""" base_types = [] # type: List[str] for base in cdef.base_type_exprs: if isinstance(base, NameExpr): @@ -597,18 +551,20 @@ def visit_assignment_stmt(self, o: AssignmentStmt) -> None: self.process_namedtuple(lvalue, o.rvalue) continue if (self.is_top_level() and - isinstance(lvalue, NameExpr) and self.is_type_expression(o.rvalue)): + isinstance(lvalue, NameExpr) and not self.is_private_name(lvalue.name) and + # it is never an alias with explicit annotation + not o.unanalyzed_type and self.is_alias_expression(o.rvalue)): self.process_typealias(lvalue, o.rvalue) continue if isinstance(lvalue, TupleExpr) or isinstance(lvalue, ListExpr): items = lvalue.items - if isinstance(o.type, TupleType): - annotations = o.type.items # type: Iterable[Optional[Type]] + if isinstance(o.unanalyzed_type, TupleType): + annotations = o.unanalyzed_type.items # type: Iterable[Optional[Type]] else: annotations = [None] * len(items) else: items = [lvalue] - annotations = [o.type] + annotations = [o.unanalyzed_type] sep = False found = False for item, annotation in zip(items, annotations): @@ -638,21 +594,22 @@ def process_namedtuple(self, lvalue: NameExpr, rvalue: CallExpr) -> None: self.import_tracker.require_name('namedtuple') if self._state != EMPTY: self.add('\n') - name = repr(getattr(rvalue.args[0], 'value', '')) + name = repr(getattr(rvalue.args[0], 'value', ERROR_MARKER)) if isinstance(rvalue.args[1], StrExpr): items = repr(rvalue.args[1].value) elif isinstance(rvalue.args[1], (ListExpr, TupleExpr)): list_items = cast(List[StrExpr], rvalue.args[1].items) items = '[%s]' % ', '.join(repr(item.value) for item in list_items) else: - items = '' + items = ERROR_MARKER self.add('%s = namedtuple(%s, %s)\n' % (lvalue.name, name, items)) self._state = CLASS - def is_type_expression(self, expr: Expression, top_level: bool = True) -> bool: - """Return True for things that look like type expressions + def is_alias_expression(self, expr: Expression, top_level: bool = True) -> bool: + """Return True for things that look like target for an alias. - Used to know if assignments look like typealiases + Used to know if assignments look like type aliases, function alias, + or module alias. """ # Assignment of TypeVar(...) are passed through if (isinstance(expr, CallExpr) and @@ -667,8 +624,14 @@ def is_type_expression(self, expr: Expression, top_level: bool = True) -> bool: elif expr.name == 'None': return not top_level else: - return True - elif isinstance(expr, IndexExpr) and isinstance(expr.base, NameExpr): + return not self.is_private_name(expr.name) + elif isinstance(expr, MemberExpr) and self.analyzed: + # Also add function and module aliases. + return ((top_level and isinstance(expr.node, (FuncDef, Decorator, MypyFile)) + or isinstance(expr.node, TypeInfo)) and + not self.is_private_member(expr.node.fullname())) + elif (isinstance(expr, IndexExpr) and isinstance(expr.base, NameExpr) and + not self.is_private_name(expr.base.name)): if isinstance(expr.index, TupleExpr): indices = expr.index.items else: @@ -681,7 +644,7 @@ def is_type_expression(self, expr: Expression, top_level: bool = True) -> bool: indices = args.items + [ret] else: return False - return all(self.is_type_expression(i, top_level=False) for i in indices) + return all(self.is_alias_expression(i, top_level=False) for i in indices) else: return False @@ -752,6 +715,12 @@ def get_init(self, lvalue: str, rvalue: Expression, self._vars[-1].append(lvalue) if annotation is not None: typename = self.print_annotation(annotation) + if (isinstance(annotation, UnboundType) and not annotation.args and + annotation.name == 'Final' and + self.import_tracker.module_for.get('Final') in ('typing, typing_extensions')): + # Final without type argument is invalid in stubs. + final_arg = self.get_str_type_of_node(rvalue) + typename += '[{}]'.format(final_arg) else: typename = self.get_str_type_of_node(rvalue) has_rhs = not (isinstance(rvalue, TempNode) and rvalue.no_rhs) @@ -811,6 +780,13 @@ def is_private_name(self, name: str) -> bool: '__setstate__', '__slots__')) + def is_private_member(self, fullname: str) -> bool: + parts = fullname.split('.') + for part in parts: + if self.is_private_name(part): + return True + return False + def get_str_type_of_node(self, rvalue: Expression, can_infer_optional: bool = False) -> str: if isinstance(rvalue, IntExpr): @@ -867,6 +843,10 @@ def visit_assignment_stmt(self, o: AssignmentStmt) -> None: def find_self_initializers(fdef: FuncBase) -> List[Tuple[str, Expression]]: + """Find attribute initializers in a method. + + Return a list of pairs (attribute name, r.h.s. expression). + """ traverser = SelfTraverser() fdef.accept(traverser) return traverser.results @@ -877,10 +857,16 @@ def __init__(self) -> None: self.found = False def visit_return_stmt(self, o: ReturnStmt) -> None: + if o.expr is None or isinstance(o.expr, NameExpr) and o.expr.name == 'None': + return self.found = True def has_return_statement(fdef: FuncBase) -> bool: + """Find if a function has a non-trivial return statement. + + Plain 'return' and 'return None' don't count. + """ seeker = ReturnSeeker() fdef.accept(seeker) return seeker.found @@ -892,86 +878,233 @@ def get_qualified_name(o: Expression) -> str: elif isinstance(o, MemberExpr): return '%s.%s' % (get_qualified_name(o.expr), o.name) else: - return '' + return ERROR_MARKER -def walk_packages(packages: List[str]) -> Iterator[str]: - """Iterates through all packages and sub-packages in the given list. +def collect_build_targets(options: Options, mypy_opts: MypyOptions) -> Tuple[List[StubSource], + List[StubSource]]: + """Collect files for which we need to generate stubs. - Python packages have a __path__ attribute defined, which pkgutil uses to determine - the package hierarchy. However, packages in C extensions do not have this attribute, - so we have to roll out our own. + Return list of Python modules and C modules. """ - for package_name in packages: - package = importlib.import_module(package_name) - yield package.__name__ - # get the path of the object (needed by pkgutil) - path = getattr(package, '__path__', None) - if path is None: - # object has no path; this means it's either a module inside a package - # (and thus no sub-packages), or it could be a C extension package. - if is_c_module(package): - # This is a C extension module, now get the list of all sub-packages - # using the inspect module - subpackages = [package.__name__ + "." + name - for name, val in inspect.getmembers(package) - if inspect.ismodule(val) - and val.__name__ == package.__name__ + "." + name] - # recursively iterate through the subpackages - for submodule in walk_packages(subpackages): - yield submodule - # It's a module inside a package. There's nothing else to walk/yield. + if options.packages or options.modules: + if options.no_import: + py_modules = find_module_paths_using_search(options.modules, + options.packages, + options.search_path, + options.pyversion) + c_modules = [] # type: List[StubSource] else: - all_packages = pkgutil.walk_packages(path, prefix=package.__name__ + ".", - onerror=lambda r: None) - for importer, qualified_name, ispkg in all_packages: - yield qualified_name + # Using imports is the default, since we can also find C modules. + py_modules, c_modules = find_module_paths_using_imports(options.modules, + options.packages, + options.interpreter, + options.pyversion) + else: + # Use mypy native source collection for files and directories. + try: + source_list = create_source_list(options.files, mypy_opts) + except InvalidSourceList as e: + raise SystemExit(str(e)) + py_modules = [StubSource(m.module, m.path) for m in source_list] + c_modules = [] + return py_modules, c_modules -def main() -> None: - # Make sure that the current directory is in sys.path so that - # stubgen can be run on packages in the current directory. - if '' not in sys.path: - sys.path.insert(0, '') - options = parse_options(sys.argv[1:]) - if not os.path.isdir(options.output_dir): - raise SystemExit('Directory "{}" does not exist'.format(options.output_dir)) - if options.recursive and options.no_import: - raise SystemExit('recursive stub generation without importing is not currently supported') - sigs = {} # type: Any - class_sigs = {} # type: Any - if options.doc_dir: - all_sigs = [] # type: Any - all_class_sigs = [] # type: Any - for path in glob.glob('%s/*.rst' % options.doc_dir): - with open(path) as f: - func_sigs, class_sigs = parse_all_signatures(f.readlines()) - all_sigs += func_sigs - all_class_sigs += class_sigs - sigs = dict(find_unique_signatures(all_sigs)) - class_sigs = dict(find_unique_signatures(all_class_sigs)) - for module in (options.modules if not options.recursive else walk_packages(options.modules)): +def find_module_paths_using_imports(modules: List[str], packages: List[str], + interpreter: str, + pyversion: Tuple[int, int], + quiet: bool = True) -> Tuple[List[StubSource], + List[StubSource]]: + """Find path and runtime value of __all__ (if possible) for modules and packages. + + This function uses runtime Python imports to get the information. + """ + py_modules = [] # type: List[StubSource] + c_modules = [] # type: List[StubSource] + modules = modules + list(walk_packages(packages)) + for mod in modules: try: - generate_stub_for_module(module, - output_dir=options.output_dir, - add_header=True, - sigs=sigs, - class_sigs=class_sigs, - pyversion=options.pyversion, - no_import=options.no_import, - search_path=options.search_path, - interpreter=options.interpreter, - include_private=options.include_private) - except Exception as e: - if not options.ignore_errors: - raise e + if pyversion[0] == 2: + result = find_module_path_and_all_py2(mod, interpreter) else: - print("Stub generation failed for", module, file=sys.stderr) + result = find_module_path_and_all_py3(mod) + except CantImport: + if not quiet: + traceback.print_exc() + report_missing(mod) + continue + if not result: + c_modules.append(StubSource(mod)) + else: + path, runtime_all = result + py_modules.append(StubSource(mod, path, runtime_all)) + return py_modules, c_modules -HEADER = """%(prog)s [--py2] [--no-import] [--doc-dir PATH] - [--search-path PATH] [--python-executable PATH] [-o PATH] MODULE ...""" +def find_module_paths_using_search(modules: List[str], packages: List[str], + search_path: List[str], + pyversion: Tuple[int, int]) -> List[StubSource]: + """Find sources for modules and packages requested. + + This function just looks for source files at the file system level. + This is used if user passes --no-import, and will not find C modules. + Exit if some of the modules or packages can't be found. + """ + result = [] # type: List[StubSource] + typeshed_path = default_lib_path(mypy.build.default_data_dir(), pyversion, None) + search_paths = SearchPaths(('.',) + tuple(search_path), (), (), tuple(typeshed_path)) + cache = FindModuleCache(search_paths) + for module in modules: + module_path = cache.find_module(module) + if not module_path: + fail_missing(module) + result.append(StubSource(module, module_path)) + for package in packages: + p_result = cache.find_modules_recursive(package) + if not p_result: + fail_missing(package) + sources = [StubSource(m.module, m.path) for m in p_result] + result.extend(sources) + return result + + +def mypy_options(stubgen_options: Options) -> MypyOptions: + """Generate mypy options using the flag passed by user.""" + options = MypyOptions() + options.follow_imports = 'skip' + options.incremental = False + options.ignore_errors = True + options.semantic_analysis_only = True + options.python_version = stubgen_options.pyversion + return options + + +def parse_source_file(mod: StubSource, mypy_options: MypyOptions) -> None: + """Parse a source file. + + On success, store AST in the corresponding attribute of the stub source. + If there are syntax errors, print them and exit. + """ + assert mod.path is not None, "Not found module was not skipped" + with open(mod.path, 'rb') as f: + data = f.read() + source = mypy.util.decode_python_encoding(data, mypy_options.python_version) + try: + mod.ast = mypy.parse.parse(source, fnam=mod.path, module=mod.module, + errors=None, options=mypy_options) + except mypy.errors.CompileError as e: + # Syntax error! + for m in e.messages: + sys.stderr.write('%s\n' % m) + sys.exit(1) + + +def generate_asts_for_modules(py_modules: List[StubSource], + parse_only: bool, mypy_options: MypyOptions) -> None: + """Use mypy to parse (and optionally analyze) source files.""" + if parse_only: + for mod in py_modules: + parse_source_file(mod, mypy_options) + return + # Perform full semantic analysis of the source set. + try: + res = build(list(py_modules), mypy_options) + except CompileError as e: + raise SystemExit("Critical error during semantic analysis: {}".format(e)) + + for mod in py_modules: + mod.ast = res.graph[mod.module].tree + # Use statically inferred __all__ if there is no runtime one. + if mod.runtime_all is None: + mod.runtime_all = res.manager.semantic_analyzer.export_map[mod.module] + + +def generate_stub_from_ast(mod: StubSource, + target: str, + parse_only: bool = False, + pyversion: Tuple[int, int] = defaults.PYTHON3_VERSION, + include_private: bool = False, + add_header: bool = True) -> None: + """Use analysed (or just parsed) AST to generate type stub for single file. + + If directory for target doesn't exist it will created. Existing stub + will be overwritten. + """ + gen = StubGenerator(mod.runtime_all, + pyversion=pyversion, + include_private=include_private, + analyzed=not parse_only) + assert mod.ast is not None, "This function must be used only with analyzed modules" + mod.ast.accept(gen) + + # Write output to file. + subdir = os.path.dirname(target) + if subdir and not os.path.isdir(subdir): + os.makedirs(subdir) + with open(target, 'w') as file: + if add_header: + write_header(file, mod.module, pyversion=pyversion) + file.write(''.join(gen.output())) + + +def collect_docs_signatures(doc_dir: str) -> Tuple[Dict[str, str], Dict[str, str]]: + """Gather all function and class signatures in the docs. + + Return a tuple (function signatures, class signatures). + Currently only used for C modules. + """ + all_sigs = [] # type: List[Sig] + all_class_sigs = [] # type: List[Sig] + for path in glob.glob('%s/*.rst' % doc_dir): + with open(path) as f: + loc_sigs, loc_class_sigs = parse_all_signatures(f.readlines()) + all_sigs += loc_sigs + all_class_sigs += loc_class_sigs + sigs = dict(find_unique_signatures(all_sigs)) + class_sigs = dict(find_unique_signatures(all_class_sigs)) + return sigs, class_sigs + + +def generate_stubs(options: Options, + # additional args for testing + quiet: bool = False, add_header: bool = True) -> None: + """Main entry point for the program.""" + mypy_opts = mypy_options(options) + py_modules, c_modules = collect_build_targets(options, mypy_opts) + + # Collect info from docs (if given): + sigs = class_sigs = None # type: Optional[Dict[str, str]] + if options.doc_dir: + sigs, class_sigs = collect_docs_signatures(options.doc_dir) + + # Use parsed sources to generate stubs for Python modules. + generate_asts_for_modules(py_modules, options.parse_only, mypy_opts) + for mod in py_modules: + assert mod.path is not None, "Not found module was not skipped" + target = mod.module.replace('.', '/') + if os.path.basename(mod.path) == '__init__.py': + target += '/__init__.pyi' + else: + target += '.pyi' + target = os.path.join(options.output_dir, target) + with generate_guarded(mod.module, target, options.ignore_errors, quiet): + generate_stub_from_ast(mod, target, + options.parse_only, options.pyversion, + options.include_private, add_header) + + # Separately analyse C modules using different logic. + for mod in c_modules: + target = mod.module.replace('.', '/') + '.pyi' + target = os.path.join(options.output_dir, target) + with generate_guarded(mod.module, target, options.ignore_errors, quiet): + generate_stub_for_c_module(mod.module, target, sigs=sigs, class_sigs=class_sigs, + add_header=add_header) + + +HEADER = """%(prog)s [-h] [--py2] [more options, see -h] + [-m MODULE] [-p PACKAGE] [files ...]""" DESCRIPTION = """ Generate draft stubs for modules. @@ -988,14 +1121,15 @@ def parse_options(args: List[str]) -> Options: parser.add_argument('--py2', action='store_true', help="run in Python 2 mode (default: Python 3 mode)") - parser.add_argument('--recursive', action='store_true', - help="traverse listed modules to generate inner package modules as well") parser.add_argument('--ignore-errors', action='store_true', help="ignore errors when trying to generate stubs for modules") parser.add_argument('--no-import', action='store_true', help="don't import the modules, just parse and analyze them " - "(doesn't work with C extension modules and doesn't " + "(doesn't work with C extension modules and might not " "respect __all__)") + parser.add_argument('--parse-only', action='store_true', + help="don't perform semantic analysis of sources, just parse them " + "(only applies to Python modules, might affect quality of stubs)") parser.add_argument('--include-private', action='store_true', help="generate stubs for objects and members considered private " "(single leading underscore and no trailing underscores)") @@ -1009,40 +1143,51 @@ def parse_options(args: List[str]) -> Options: parser.add_argument('--python-executable', metavar='PATH', dest='interpreter', default='', help="use Python interpreter at PATH (only works for " "Python 2 right now)") - parser.add_argument('-o', metavar='PATH', dest='output_dir', default='out', - help="Change the output folder [default: %(default)s]") - parser.add_argument(metavar='modules', nargs='+', dest='modules') + parser.add_argument('-o', '--output', metavar='PATH', dest='output_dir', default='out', + help="change the output directory [default: %(default)s]") + parser.add_argument('-m', '--module', action='append', metavar='MODULE', + dest='modules', default=[], + help="generate stub for module; can repeat for more modules") + parser.add_argument('-p', '--package', action='append', metavar='PACKAGE', + dest='packages', default=[], + help="generate stubs for package recursively; can be repeated") + parser.add_argument(metavar='files', nargs='*', dest='files', + help="generate stubs for given files or directories") ns = parser.parse_args(args) pyversion = defaults.PYTHON2_VERSION if ns.py2 else defaults.PYTHON3_VERSION if not ns.interpreter: - ns.interpreter = sys.executable if pyversion[0] == 3 else default_python2_interpreter() + ns.interpreter = sys.executable if pyversion[0] == 3 else default_py2_interpreter() + if ns.modules + ns.packages and ns.files: + parser.error("May only specify one of: modules/packages or files.") + # Create the output folder if it doesn't already exist. if not os.path.exists(ns.output_dir): os.makedirs(ns.output_dir) + return Options(pyversion=pyversion, no_import=ns.no_import, doc_dir=ns.doc_dir, search_path=ns.search_path.split(':'), interpreter=ns.interpreter, - modules=ns.modules, ignore_errors=ns.ignore_errors, - recursive=ns.recursive, + parse_only=ns.parse_only, include_private=ns.include_private, - output_dir=ns.output_dir) + output_dir=ns.output_dir, + modules=ns.modules, + packages=ns.packages, + files=ns.files) -def default_python2_interpreter() -> str: - # TODO: Make this do something reasonable in Windows. - for candidate in ('/usr/bin/python2', '/usr/bin/python'): - if not os.path.exists(candidate): - continue - output = subprocess.check_output([candidate, '--version'], - stderr=subprocess.STDOUT).strip() - if b'Python 2' in output: - return candidate - raise SystemExit("Can't find a Python 2 interpreter -- please use the -p option") +def main() -> None: + # Make sure that the current directory is in sys.path so that + # stubgen can be run on packages in the current directory. + if not ('' in sys.path or '.' in sys.path): + sys.path.insert(0, '') + + options = parse_options(sys.argv[1:]) + generate_stubs(options) if __name__ == '__main__': diff --git a/mypy/stubgenc.py b/mypy/stubgenc.py index 6e87c5b1f96c..b9663459639f 100644 --- a/mypy/stubgenc.py +++ b/mypy/stubgenc.py @@ -10,18 +10,26 @@ from typing import List, Dict, Tuple, Optional, Mapping, Any, Set from types import ModuleType -from mypy.stubutil import ( - is_c_module, write_header, infer_sig_from_docstring, infer_prop_type_from_docstring, - ArgSig, infer_arg_sig_from_docstring, FunctionSig +from mypy.stubutil import write_header, is_c_module +from mypy.stubdoc import ( + infer_sig_from_docstring, infer_prop_type_from_docstring, ArgSig, + infer_arg_sig_from_docstring, FunctionSig ) def generate_stub_for_c_module(module_name: str, target: str, add_header: bool = True, - sigs: Dict[str, str] = {}, - class_sigs: Dict[str, str] = {}, - ) -> None: + sigs: Optional[Dict[str, str]] = None, + class_sigs: Optional[Dict[str, str]] = None) -> None: + """Generate stub for C module. + + This combines simple runtime introspection (looking for docstrings and attributes + with simple builtin types) and signatures inferred from .rst documentation (if given). + + If directory for target doesn't exist it will be created. Existing stub + will be overwritten. + """ module = importlib.import_module(module_name) assert is_c_module(module), '%s is not a C module' % module_name subdir = os.path.dirname(target) @@ -74,6 +82,7 @@ def generate_stub_for_c_module(module_name: str, def add_typing_import(output: List[str]) -> List[str]: + """Add typing imports for collections/types that occur in the generated stub.""" names = [] for name in ['Any', 'Union', 'Tuple', 'Optional', 'List', 'Dict']: if any(re.search(r'\b%s\b' % name, line) for line in output): @@ -117,10 +126,21 @@ def generate_c_function_stub(module: ModuleType, output: List[str], imports: List[str], self_var: Optional[str] = None, - sigs: Dict[str, str] = {}, + sigs: Optional[Dict[str, str]] = None, class_name: Optional[str] = None, - class_sigs: Dict[str, str] = {}, - ) -> None: + class_sigs: Optional[Dict[str, str]] = None) -> None: + """Generate stub for a single function or method. + + The result (always a single line) will be appended to 'output'. + If necessary, any required names will be added to 'imports'. + The 'class_name' is used to find signature of __init__ or __new__ in + 'class_sigs'. + """ + if sigs is None: + sigs = {} + if class_sigs is None: + class_sigs = {} + ret_type = 'None' if name == '__init__' and class_name else 'Any' if (name in ('__new__', '__init__') and name not in sigs and class_name and @@ -166,16 +186,15 @@ def generate_c_function_stub(module: ModuleType, def strip_or_import(typ: str, module: ModuleType, imports: List[str]) -> str: - """ - Strips unnecessary module names from typ. + """Strips unnecessary module names from typ. - If typ represents a type that is inside module or is a type comming from builtins, remove - module declaration from it + If typ represents a type that is inside module or is a type coming from builtins, remove + module declaration from it. Return stripped name of the type. - :param typ: name of the type - :param module: in which this type is used - :param imports: list of import statements. May be modified during the call - :return: stripped name of the type + Arguments: + typ: name of the type + module: in which this type is used + imports: list of import statements (may be modified during the call) """ arg_type = typ if module and typ.startswith(module.__name__): @@ -190,6 +209,10 @@ def strip_or_import(typ: str, module: ModuleType, imports: List[str]) -> str: def generate_c_property_stub(name: str, obj: object, output: List[str], readonly: bool) -> None: + """Generate property stub using introspection of 'obj'. + + Try to infer type from docstring, append resulting lines to 'output'. + """ docstr = getattr(obj, '__doc__', None) inferred = infer_prop_type_from_docstring(docstr) if not inferred: @@ -207,9 +230,13 @@ def generate_c_type_stub(module: ModuleType, obj: type, output: List[str], imports: List[str], - sigs: Dict[str, str] = {}, - class_sigs: Dict[str, str] = {}, - ) -> None: + sigs: Optional[Dict[str, str]] = None, + class_sigs: Optional[Dict[str, str]] = None) -> None: + """Generate stub for a single class using runtime introspection. + + The result lines will be appended to 'output'. If necessary, any + required names will be added to 'imports'. + """ # typeshed gives obj.__dict__ the not quite correct type Dict[str, Any] # (it could be a mappingproxy!), which makes mypyc mad, so obfuscate it. obj_dict = getattr(obj, '__dict__') # type: Mapping[str, Any] @@ -285,11 +312,15 @@ def generate_c_type_stub(module: ModuleType, def method_name_sort_key(name: str) -> Tuple[int, str]: + """Sort methods in classes in a typical order. + + I.e.: constructor, normal methods, special methods. + """ if name in ('__new__', '__init__'): - return (0, name) + return 0, name if name.startswith('__') and name.endswith('__'): - return (2, name) - return (1, name) + return 2, name + return 1, name def is_skipped_attribute(attr: str) -> bool: diff --git a/mypy/stubutil.py b/mypy/stubutil.py index be27c23f5317..155b5b97d201 100644 --- a/mypy/stubutil.py +++ b/mypy/stubutil.py @@ -1,239 +1,20 @@ -import contextlib -import io -import re -import sys -import os -import tokenize +"""Utilities for mypy.stubgen, mypy.stubgenc, and mypy.stubdoc modules.""" -from typing import (Optional, Tuple, Sequence, MutableSequence, List, MutableMapping, IO, - NamedTuple, Any) +import sys +import os.path +import inspect +import json +import pkgutil +import importlib +import subprocess from types import ModuleType +from contextlib import contextmanager -MYPY = False -if MYPY: - from typing_extensions import Final - -# Type Alias for Signatures -Sig = Tuple[str, str] - - -class ArgSig: - def __init__(self, name: str, type: Optional[str] = None, default: bool = False): - self.name = name - self.type = type - self.default = default - - def __repr__(self) -> str: - return "ArgSig(name={}, type={}, default={})".format(repr(self.name), repr(self.type), - repr(self.default)) - - def __eq__(self, other: Any) -> bool: - if isinstance(other, ArgSig): - return (self.name == other.name and self.type == other.type and - self.default == other.default) - return False - - -FunctionSig = NamedTuple('FunctionSig', [ - ('name', str), - ('args', List[ArgSig]), - ('ret_type', str) -]) - - -STATE_INIT = 1 # type: Final -STATE_FUNCTION_NAME = 2 # type: Final -STATE_ARGUMENT_LIST = 3 # type: Final -STATE_ARGUMENT_TYPE = 4 # type: Final -STATE_ARGUMENT_DEFAULT = 5 # type: Final -STATE_RETURN_VALUE = 6 # type: Final -STATE_OPEN_BRACKET = 7 # type: Final - - -class DocStringParser: - def __init__(self, function_name: str) -> None: - self.function_name = function_name - self.state = [STATE_INIT] - self.accumulator = "" - self.arg_type = None # type: Optional[str] - self.arg_name = "" - self.arg_default = None # type: Optional[str] - self.ret_type = "Any" - self.found = False - self.args = [] # type: List[ArgSig] - self.signatures = [] # type: List[FunctionSig] +from typing import Optional, Tuple, List, IO, Iterator - def add_token(self, token: tokenize.TokenInfo) -> None: - if (token.type == tokenize.NAME and token.string == self.function_name and - self.state[-1] == STATE_INIT): - self.state.append(STATE_FUNCTION_NAME) - - elif (token.type == tokenize.OP and token.string == '(' and - self.state[-1] == STATE_FUNCTION_NAME): - self.state.pop() - self.accumulator = "" - self.found = True - self.state.append(STATE_ARGUMENT_LIST) - - elif self.state[-1] == STATE_FUNCTION_NAME: - # reset state, function name not followed by '(' - self.state.pop() - - elif (token.type == tokenize.OP and token.string in ('[', '(', '{') and - self.state[-1] != STATE_INIT): - self.accumulator += token.string - self.state.append(STATE_OPEN_BRACKET) - - elif (token.type == tokenize.OP and token.string in (']', ')', '}') and - self.state[-1] == STATE_OPEN_BRACKET): - self.accumulator += token.string - self.state.pop() - - elif (token.type == tokenize.OP and token.string == ':' and - self.state[-1] == STATE_ARGUMENT_LIST): - self.arg_name = self.accumulator - self.accumulator = "" - self.state.append(STATE_ARGUMENT_TYPE) - - elif (token.type == tokenize.OP and token.string == '=' and - self.state[-1] in (STATE_ARGUMENT_LIST, STATE_ARGUMENT_TYPE)): - if self.state[-1] == STATE_ARGUMENT_TYPE: - self.arg_type = self.accumulator - self.state.pop() - else: - self.arg_name = self.accumulator - self.accumulator = "" - self.state.append(STATE_ARGUMENT_DEFAULT) - - elif (token.type == tokenize.OP and token.string in (',', ')') and - self.state[-1] in (STATE_ARGUMENT_LIST, STATE_ARGUMENT_DEFAULT, - STATE_ARGUMENT_TYPE)): - if self.state[-1] == STATE_ARGUMENT_DEFAULT: - self.arg_default = self.accumulator - self.state.pop() - elif self.state[-1] == STATE_ARGUMENT_TYPE: - self.arg_type = self.accumulator - self.state.pop() - elif self.state[-1] == STATE_ARGUMENT_LIST: - self.arg_name = self.accumulator - - if token.string == ')': - self.state.pop() - self.args.append(ArgSig(name=self.arg_name, type=self.arg_type, - default=bool(self.arg_default))) - self.arg_name = "" - self.arg_type = None - self.arg_default = None - self.accumulator = "" - - elif token.type == tokenize.OP and token.string == '->' and self.state[-1] == STATE_INIT: - self.accumulator = "" - self.state.append(STATE_RETURN_VALUE) - - # ENDMAKER is necessary for python 3.4 and 3.5 - elif (token.type in (tokenize.NEWLINE, tokenize.ENDMARKER) and - self.state[-1] in (STATE_INIT, STATE_RETURN_VALUE)): - if self.state[-1] == STATE_RETURN_VALUE: - self.ret_type = self.accumulator - self.accumulator = "" - self.state.pop() - - if self.found: - self.signatures.append(FunctionSig(name=self.function_name, args=self.args, - ret_type=self.ret_type)) - self.found = False - self.args = [] - self.ret_type = 'Any' - # leave state as INIT - else: - self.accumulator += token.string - def get_signatures(self) -> List[FunctionSig]: - def has_arg(name: str, signature: FunctionSig) -> bool: - return any(x.name == name for x in signature.args) - - def args_kwargs(signature: FunctionSig) -> bool: - return has_arg('*args', signature) and has_arg('**kwargs', signature) - - # Move functions with (*args, **kwargs) in their signature to last place - return list(sorted(self.signatures, key=lambda x: 1 if args_kwargs(x) else 0)) - - -def parse_signature(sig: str) -> Optional[Tuple[str, - List[str], - List[str]]]: - m = re.match(r'([.a-zA-Z0-9_]+)\(([^)]*)\)', sig) - if not m: - return None - name = m.group(1) - name = name.split('.')[-1] - arg_string = m.group(2) - if not arg_string.strip(): - return (name, [], []) - args = [arg.strip() for arg in arg_string.split(',')] - fixed = [] - optional = [] - i = 0 - while i < len(args): - if args[i].startswith('[') or '=' in args[i]: - break - fixed.append(args[i].rstrip('[')) - i += 1 - if args[i - 1].endswith('['): - break - while i < len(args): - arg = args[i] - arg = arg.strip('[]') - arg = arg.split('=')[0] - optional.append(arg) - i += 1 - return (name, fixed, optional) - - -def build_signature(fixed: Sequence[str], - optional: Sequence[str]) -> str: - args = [] # type: MutableSequence[str] - args.extend(fixed) - for arg in optional: - if arg.startswith('*'): - args.append(arg) - else: - args.append('%s=...' % arg) - sig = '(%s)' % ', '.join(args) - # Ad-hoc fixes. - sig = sig.replace('(self)', '') - return sig - - -def parse_all_signatures(lines: Sequence[str]) -> Tuple[List[Sig], - List[Sig]]: - sigs = [] - class_sigs = [] - for line in lines: - line = line.strip() - m = re.match(r'\.\. *(function|method|class) *:: *[a-zA-Z_]', line) - if m: - sig = line.split('::')[1].strip() - parsed = parse_signature(sig) - if parsed: - name, fixed, optional = parsed - if m.group(1) != 'class': - sigs.append((name, build_signature(fixed, optional))) - else: - class_sigs.append((name, build_signature(fixed, optional))) - - return sorted(sigs), sorted(class_sigs) - - -def find_unique_signatures(sigs: Sequence[Sig]) -> List[Sig]: - sig_map = {} # type: MutableMapping[str, List[str]] - for name, sig in sigs: - sig_map.setdefault(name, []).append(sig) - result = [] - for name, name_sigs in sig_map.items(): - if len(set(name_sigs)) == 1: - result.append((name, name_sigs[0])) - return sorted(result) +class CantImport(Exception): + pass def is_c_module(module: ModuleType) -> bool: @@ -243,58 +24,137 @@ def is_c_module(module: ModuleType) -> bool: def write_header(file: IO[str], module_name: Optional[str] = None, pyversion: Tuple[int, int] = (3, 5)) -> None: + """Write a header to file indicating this file is auto-generated by stubgen.""" if module_name: - if pyversion[0] >= 3: - version = '%d.%d' % (sys.version_info.major, - sys.version_info.minor) - else: - version = '2' - file.write('# Stubs for %s (Python %s)\n' % (module_name, version)) + file.write('# Stubs for %s (Python %s)\n' % (module_name, pyversion[0])) file.write( '#\n' '# NOTE: This dynamically typed stub was automatically generated by stubgen.\n\n') -def infer_sig_from_docstring(docstr: str, name: str) -> Optional[List[FunctionSig]]: - """Concert function signature to list of TypedFunctionSig +def default_py2_interpreter() -> str: + """Find a system Python 2 interpreter. + + Return full path or exit if failed. + """ + # TODO: Make this do something reasonable in Windows. + for candidate in ('/usr/bin/python2', '/usr/bin/python'): + if not os.path.exists(candidate): + continue + output = subprocess.check_output([candidate, '--version'], + stderr=subprocess.STDOUT).strip() + if b'Python 2' in output: + return candidate + raise SystemExit("Can't find a Python 2 interpreter -- " + "please use the --python-executable option") + + +def walk_packages(packages: List[str]) -> Iterator[str]: + """Iterates through all packages and sub-packages in the given list. + + This uses runtime imports to find both Python and C modules. For Python packages + we simply pass the __path__ attribute to pkgutil.walk_packages() to get the content + of the package (all subpackages and modules). However, packages in C extensions + do not have this attribute, so we have to roll out our own logic: recursively find + all modules imported in the package that have matching names. + """ + for package_name in packages: + try: + package = importlib.import_module(package_name) + except Exception: + report_missing(package_name) + continue + yield package.__name__ + # get the path of the object (needed by pkgutil) + path = getattr(package, '__path__', None) + if path is None: + # Object has no path; this means it's either a module inside a package + # (and thus no sub-packages), or it could be a C extension package. + if is_c_module(package): + # This is a C extension module, now get the list of all sub-packages + # using the inspect module + subpackages = [package.__name__ + "." + name + for name, val in inspect.getmembers(package) + if inspect.ismodule(val) + and val.__name__ == package.__name__ + "." + name] + # Recursively iterate through the subpackages + for submodule in walk_packages(subpackages): + yield submodule + # It's a module inside a package. There's nothing else to walk/yield. + else: + all_packages = pkgutil.walk_packages(path, prefix=package.__name__ + ".", + onerror=lambda r: None) + for importer, qualified_name, ispkg in all_packages: + yield qualified_name + + +def find_module_path_and_all_py2(module: str, + interpreter: str) -> Optional[Tuple[str, + Optional[List[str]]]]: + """Return tuple (module path, module __all__) for a Python 2 module. - Looks for function signatures of function in docstring. Returns empty list, when no signature - is found, one signature in typical case, multiple signatures, if docstring specifies multiple - signatures for overload functions. + The path refers to the .py/.py[co] file. The second tuple item is + None if the module doesn't define __all__. - Arguments: - * docstr: docstring - * name: name of function for which signatures are to be found + Raise CantImport if the module can't be imported, or exit if it's a C extension module. + """ + cmd_template = '{interpreter} -c "%s"'.format(interpreter=interpreter) + code = ("import importlib, json; mod = importlib.import_module('%s'); " + "print(mod.__file__); print(json.dumps(getattr(mod, '__all__', None)))") % module + try: + output_bytes = subprocess.check_output(cmd_template % code, shell=True) + except subprocess.CalledProcessError: + raise CantImport(module) + output = output_bytes.decode('ascii').strip().splitlines() + module_path = output[0] + if not module_path.endswith(('.py', '.pyc', '.pyo')): + raise SystemExit('%s looks like a C module; they are not supported for Python 2' % + module) + if module_path.endswith(('.pyc', '.pyo')): + module_path = module_path[:-1] + module_all = json.loads(output[1]) + return module_path, module_all + + +def find_module_path_and_all_py3(module: str) -> Optional[Tuple[str, Optional[List[str]]]]: + """Find module and determine __all__ for a Python 3 module. + + Return None if the module is a C module. Return (module_path, __all__) if + it is a Python module. Raise CantImport if import failed. """ - if not docstr: + # TODO: Support custom interpreters. + try: + mod = importlib.import_module(module) + except Exception: + raise CantImport(module) + if is_c_module(mod): return None + return mod.__file__, getattr(mod, '__all__', None) - state = DocStringParser(name) - with contextlib.suppress(tokenize.TokenError): - for token in tokenize.tokenize(io.BytesIO(docstr.encode('utf-8')).readline): - state.add_token(token) - return state.get_signatures() +@contextmanager +def generate_guarded(mod: str, target: str, + ignore_errors: bool = True, quiet: bool = False) -> Iterator[None]: + """Ignore or report errors during stub generation. -def infer_arg_sig_from_docstring(docstr: str) -> List[ArgSig]: - """Convert signature in form of "(self: TestClass, arg0: str='ada')" to List[TypedArgList].""" - ret = infer_sig_from_docstring("stub" + docstr, "stub") - if ret: - return ret[0].args + Optionally report success. + """ + try: + yield + except Exception as e: + if not ignore_errors: + raise e + else: + # --ignore-errors was passed + print("Stub generation failed for", mod, file=sys.stderr) + else: + if not quiet: + print('Created %s' % target) - return [] +def report_missing(mod: str) -> None: + print('Failed to import {}; skipping it'.format(mod)) -def infer_prop_type_from_docstring(docstr: str) -> Optional[str]: - if not docstr: - return None - # check for Google/Numpy style docstring type annotation - # the docstring has the format ": " - # in the type string, we allow the following characters - # dot: because something classes are annotated using full path, - # brackets: to allow type hints like List[int] - # comma/space: things like Tuple[int, int] - test_str = r'^([a-zA-Z0-9_, \.\[\]]*): ' - m = re.match(test_str, docstr) - return m.group(1) if m else None +def fail_missing(mod: str) -> None: + raise SystemExit("Can't find module '{}' (consider using --search-path)".format(mod)) diff --git a/mypy/test/teststubgen.py b/mypy/test/teststubgen.py index f4083c8cef38..d46d0a4ae681 100644 --- a/mypy/test/teststubgen.py +++ b/mypy/test/teststubgen.py @@ -12,16 +12,63 @@ from mypy.test.data import DataSuite, DataDrivenTestCase from mypy.errors import CompileError from mypy.stubgen import ( - generate_stub, generate_stub_for_module, parse_options, walk_packages, Options + generate_stubs, parse_options, walk_packages, Options, collect_build_targets, + mypy_options ) from mypy.stubgenc import generate_c_type_stub, infer_method_sig, generate_c_function_stub -from mypy.stubutil import ( +from mypy.stubdoc import ( parse_signature, parse_all_signatures, build_signature, find_unique_signatures, infer_sig_from_docstring, infer_prop_type_from_docstring, FunctionSig, ArgSig, infer_arg_sig_from_docstring ) +class StubgenCmdLineSuite(Suite): + def test_files_found(self) -> None: + current = os.getcwd() + with tempfile.TemporaryDirectory() as tmp: + try: + os.chdir(tmp) + os.mkdir('subdir') + self.make_file('subdir', 'a.py') + self.make_file('subdir', 'b.py') + os.mkdir(os.path.join('subdir', 'pack')) + self.make_file('subdir', 'pack', '__init__.py') + opts = parse_options(['subdir']) + py_mods, c_mods = collect_build_targets(opts, mypy_options(opts)) + assert_equal(c_mods, []) + files = {mod.path for mod in py_mods} + assert_equal(files, {os.path.join('subdir', 'pack', '__init__.py'), + os.path.join('subdir', 'a.py'), + os.path.join('subdir', 'b.py')}) + finally: + os.chdir(current) + + def test_packages_found(self) -> None: + current = os.getcwd() + with tempfile.TemporaryDirectory() as tmp: + try: + os.chdir(tmp) + os.mkdir('pack') + self.make_file('pack', '__init__.py', content='from . import a, b') + self.make_file('pack', 'a.py') + self.make_file('pack', 'b.py') + opts = parse_options(['-p', 'pack']) + py_mods, c_mods = collect_build_targets(opts, mypy_options(opts)) + assert_equal(c_mods, []) + files = {os.path.relpath(mod.path or 'FAIL') for mod in py_mods} + assert_equal(files, {os.path.join('pack', '__init__.py'), + os.path.join('pack', 'a.py'), + os.path.join('pack', 'b.py')}) + finally: + os.chdir(current) + + def make_file(self, *path: str, content: str = '') -> None: + file = os.path.join(*path) + with open(file, 'w') as f: + f.write(content) + + class StubgenCliParseSuite(Suite): def test_walk_packages(self) -> None: assert_equal( @@ -104,114 +151,82 @@ def test_find_unique_signatures(self) -> None: ('func3', '(arg, arg2)')]) def test_infer_sig_from_docstring(self) -> None: - assert_equal( - infer_sig_from_docstring('\nfunc(x) - y', 'func'), - [FunctionSig(name='func', args=[ArgSig(name='x')], ret_type='Any')] - ) + assert_equal(infer_sig_from_docstring('\nfunc(x) - y', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x')], ret_type='Any')]) - assert_equal( - infer_sig_from_docstring('\nfunc(x, Y_a=None)', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x'), ArgSig(name='Y_a', default=True)], - ret_type='Any')] - ) - assert_equal( - infer_sig_from_docstring('\nfunc(x, Y_a=3)', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x'), ArgSig(name='Y_a', default=True)], - ret_type='Any')] - ) + assert_equal(infer_sig_from_docstring('\nfunc(x, Y_a=None)', 'func'), + [FunctionSig(name='func', + args=[ArgSig(name='x'), ArgSig(name='Y_a', default=True)], + ret_type='Any')]) - assert_equal( - infer_sig_from_docstring('\nfunc(x, Y_a=[1, 2, 3])', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x'), ArgSig(name='Y_a', default=True)], - ret_type='Any')] - ) + assert_equal(infer_sig_from_docstring('\nfunc(x, Y_a=3)', 'func'), + [FunctionSig(name='func', + args=[ArgSig(name='x'), ArgSig(name='Y_a', default=True)], + ret_type='Any')]) + + assert_equal(infer_sig_from_docstring('\nfunc(x, Y_a=[1, 2, 3])', 'func'), + [FunctionSig(name='func', + args=[ArgSig(name='x'), ArgSig(name='Y_a', default=True)], + ret_type='Any')]) assert_equal(infer_sig_from_docstring('\nafunc(x) - y', 'func'), []) assert_equal(infer_sig_from_docstring('\nfunc(x, y', 'func'), []) - assert_equal( - infer_sig_from_docstring('\nfunc(x=z(y))', 'func'), - [FunctionSig(name='func', args=[ArgSig(name='x', default=True)], ret_type='Any')] - ) + assert_equal(infer_sig_from_docstring('\nfunc(x=z(y))', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x', default=True)], + ret_type='Any')]) + assert_equal(infer_sig_from_docstring('\nfunc x', 'func'), []) - # try to infer signature from type annotation - assert_equal( - infer_sig_from_docstring('\nfunc(x: int)', 'func'), - [FunctionSig(name='func', args=[ArgSig(name='x', type='int')], ret_type='Any')] - ) - assert_equal( - infer_sig_from_docstring('\nfunc(x: int=3)', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x', type='int', default=True)], - ret_type='Any')] - ) - assert_equal( - infer_sig_from_docstring('\nfunc(x: int=3) -> int', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x', type='int', default=True)], - ret_type='int')] - ) - assert_equal( - infer_sig_from_docstring('\nfunc(x: int=3) -> int \n', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x', type='int', default=True)], - ret_type='int')] - ) - assert_equal( - infer_sig_from_docstring('\nfunc(x: Tuple[int, str]) -> str', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x', type='Tuple[int,str]')], - ret_type='str')] - ) + # Try to infer signature from type annotation. + assert_equal(infer_sig_from_docstring('\nfunc(x: int)', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x', type='int')], + ret_type='Any')]) + assert_equal(infer_sig_from_docstring('\nfunc(x: int=3)', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x', type='int', default=True)], + ret_type='Any')]) + + assert_equal(infer_sig_from_docstring('\nfunc(x: int=3) -> int', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x', type='int', default=True)], + ret_type='int')]) + + assert_equal(infer_sig_from_docstring('\nfunc(x: int=3) -> int \n', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x', type='int', default=True)], + ret_type='int')]) + + assert_equal(infer_sig_from_docstring('\nfunc(x: Tuple[int, str]) -> str', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x', type='Tuple[int,str]')], + ret_type='str')]) + assert_equal( infer_sig_from_docstring('\nfunc(x: Tuple[int, Tuple[str, int], str], y: int) -> str', 'func'), [FunctionSig(name='func', args=[ArgSig(name='x', type='Tuple[int,Tuple[str,int],str]'), ArgSig(name='y', type='int')], - ret_type='str')] - ) - assert_equal( - infer_sig_from_docstring('\nfunc(x: foo.bar)', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x', type='foo.bar')], - ret_type='Any')] - ) + ret_type='str')]) - assert_equal( - infer_sig_from_docstring('\nfunc(x: list=[1,2,[3,4]])', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x', type='list', default=True)], - ret_type='Any')] - ) + assert_equal(infer_sig_from_docstring('\nfunc(x: foo.bar)', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x', type='foo.bar')], + ret_type='Any')]) - assert_equal( - infer_sig_from_docstring('\nfunc(x: str="nasty[")', 'func'), - [FunctionSig(name='func', - args=[ArgSig(name='x', type='str', default=True)], - ret_type='Any')] - ) + assert_equal(infer_sig_from_docstring('\nfunc(x: list=[1,2,[3,4]])', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x', type='list', default=True)], + ret_type='Any')]) - assert_equal( - infer_sig_from_docstring('\nfunc[(x: foo.bar, invalid]', 'func'), - [] - ) + assert_equal(infer_sig_from_docstring('\nfunc(x: str="nasty[")', 'func'), + [FunctionSig(name='func', args=[ArgSig(name='x', type='str', default=True)], + ret_type='Any')]) + + assert_equal(infer_sig_from_docstring('\nfunc[(x: foo.bar, invalid]', 'func'), []) def test_infer_arg_sig_from_docstring(self) -> None: - assert_equal( - infer_arg_sig_from_docstring("(*args, **kwargs)"), - [ArgSig(name='*args'), ArgSig(name='**kwargs')] - ) + assert_equal(infer_arg_sig_from_docstring("(*args, **kwargs)"), + [ArgSig(name='*args'), ArgSig(name='**kwargs')]) assert_equal( infer_arg_sig_from_docstring( - "(x: Tuple[int, Tuple[str, int], str]=(1, ('a', 2), 'y'), y: int=4)" - ), + "(x: Tuple[int, Tuple[str, int], str]=(1, ('a', 2), 'y'), y: int=4)"), [ArgSig(name='x', type='Tuple[int,Tuple[str,int],str]', default=True), - ArgSig(name='y', type='int', default=True)] - ) + ArgSig(name='y', type='int', default=True)]) def test_infer_prop_type_from_docstring(self) -> None: assert_equal(infer_prop_type_from_docstring('str: A string.'), 'str') @@ -227,85 +242,49 @@ class StubgenPythonSuite(DataSuite): files = ['stubgen.test'] def run_case(self, testcase: DataDrivenTestCase) -> None: - test_stubgen(testcase) - - -def parse_flags(program_text: str) -> Options: - flags = re.search('# flags: (.*)$', program_text, flags=re.MULTILINE) - if flags: - flag_list = flags.group(1).split() - else: - flag_list = [] - return parse_options(flag_list + ['dummy.py']) - - -def test_stubgen(testcase: DataDrivenTestCase) -> None: - if 'stubgen-test-path' not in sys.path: - sys.path.insert(0, 'stubgen-test-path') - os.mkdir('stubgen-test-path') - source = '\n'.join(testcase.input) - options = parse_flags(source) - handle = tempfile.NamedTemporaryFile(prefix='prog_', suffix='.py', dir='stubgen-test-path', - delete=False) - assert os.path.isabs(handle.name) - path = os.path.basename(handle.name) - name = path[:-3] - path = os.path.join('stubgen-test-path', path) - out_dir = '_out' - os.mkdir(out_dir) - try: - handle.write(bytes(source, 'ascii')) - handle.close() - # Without this we may sometimes be unable to import the module below, as importlib - # caches os.listdir() results in Python 3.3+ (Guido explained this to me). - reset_importlib_cache('stubgen-test-path') + extra = [] + mods = [] + source = '\n'.join(testcase.input) + for file, content in testcase.files + [('./main.py', source)]: + mod = os.path.basename(file)[:-3] + mods.append(mod) + extra.extend(['-m', mod]) + with open(file, 'w') as f: + f.write(content) + + options = self.parse_flags(source, extra) + out_dir = 'out' try: - if testcase.name.endswith('_import'): - generate_stub_for_module(name, out_dir, quiet=True, - no_import=options.no_import, - include_private=options.include_private) - else: - generate_stub(path, out_dir, include_private=options.include_private) - a = load_output(out_dir) - except CompileError as e: - a = e.messages - assert_string_arrays_equal(testcase.output, a, - 'Invalid output ({}, line {})'.format( - testcase.file, testcase.line)) - finally: - handle.close() - os.unlink(handle.name) - shutil.rmtree(out_dir) - - -def reset_importlib_cache(entry: str) -> None: - # importlib.invalidate_caches() is insufficient, since it doesn't - # clear cache entries that indicate that a directory on the path - # does not exist, which can cause failures. Just directly clear - # the sys.path_importer_cache entry ourselves. Other possible - # workarounds include always using different paths in the sys.path - # (perhaps by using the full path name) or removing the entry from - # sys.path after each run. - if entry in sys.path_importer_cache: - del sys.path_importer_cache[entry] - - -def load_output(dirname: str) -> List[str]: - result = [] # type: List[str] - entries = glob.glob('%s/*' % dirname) - assert entries, 'No files generated' - if len(entries) == 1: - add_file(entries[0], result) - else: - for entry in entries: - result.append('## %s ##' % entry) - add_file(entry, result) - return result - - -def add_file(path: str, result: List[str]) -> None: - with open(path, encoding='utf8') as file: - result.extend(file.read().splitlines()) + try: + if not testcase.name.endswith('_import'): + options.no_import = True + if not testcase.name.endswith('_semanal'): + options.parse_only = True + generate_stubs(options, quiet=True, add_header=False) + a = [] # type: List[str] + self.add_file(os.path.join(out_dir, 'main.pyi'), a) + except CompileError as e: + a = e.messages + assert_string_arrays_equal(testcase.output, a, + 'Invalid output ({}, line {})'.format( + testcase.file, testcase.line)) + finally: + for mod in mods: + if mod in sys.modules: + del sys.modules[mod] + shutil.rmtree(out_dir) + + def parse_flags(self, program_text: str, extra: List[str]) -> Options: + flags = re.search('# flags: (.*)$', program_text, flags=re.MULTILINE) + if flags: + flag_list = flags.group(1).split() + else: + flag_list = [] + return parse_options(flag_list + extra) + + def add_file(self, path: str, result: List[str]) -> None: + with open(path, encoding='utf8') as file: + result.extend(file.read().splitlines()) class StubgencSuite(Suite): @@ -418,12 +397,11 @@ def test(arg0: str) -> None: assert_equal(imports, ['import argparse']) def test_generate_c_function_same_module_arg(self) -> None: + """Test that if argument references type from same module but using full path, no module + will be imported, and type specification will be striped to local reference. """ - Test that if argument references type from same module but using full path, no module will - be imported, and type specification will be striped to local reference. - """ - # provide different type in python spec than in docstring to make sure, that docstring - # information is used + # Provide different type in python spec than in docstring to make sure, that docstring + # information is used. def test(arg0: str) -> None: """ test(arg0: argparse.Action) @@ -437,9 +415,7 @@ def test(arg0: str) -> None: assert_equal(imports, []) def test_generate_c_function_other_module_ret(self) -> None: - """ - Test that if return type references type from other module, module will be imported. - """ + """Test that if return type references type from other module, module will be imported.""" def test(arg0: str) -> None: """ test(arg0: str) -> argparse.Action @@ -453,9 +429,8 @@ def test(arg0: str) -> None: assert_equal(imports, ['import argparse']) def test_generate_c_function_same_module_ret(self) -> None: - """ - Test that if return type references type from same module but using full path, no module - will be imported, and type specification will be striped to local reference. + """Test that if return type references type from same module but using full path, + no module will be imported, and type specification will be striped to local reference. """ def test(arg0: str) -> None: """ @@ -492,11 +467,8 @@ def __init__(self, arg0: str) -> None: '@overload', 'def __init__(self, arg0: str, arg1: str) -> None: ...', '@overload', - 'def __init__(*args, **kwargs) -> Any: ...', - ]) - assert_equal(set(imports), { - 'from typing import overload' - }) + 'def __init__(*args, **kwargs) -> Any: ...']) + assert_equal(set(imports), {'from typing import overload'}) class ArgSigSuite(Suite): diff --git a/test-data/unit/stubgen.test b/test-data/unit/stubgen.test index d84ec2accbdd..ce8e918521b0 100644 --- a/test-data/unit/stubgen.test +++ b/test-data/unit/stubgen.test @@ -379,6 +379,9 @@ class A: class A: def __eq__(self) -> None: ... +-- Tests that will perform runtime imports of modules. +-- Don't use `_import` suffix if there are unquoted forward references. + [case testOmitDefsNotInAll_import] __all__ = [] + ['f'] def f(): ... @@ -904,7 +907,7 @@ def g(): return [out] def f(): ... -def g(): ... +def g() -> None: ... [case testFunctionEllipsisInfersReturnNone] def f(): ... @@ -1232,3 +1235,203 @@ class F: @t.coroutine def g(): ... + +-- Tests for stub generation from semantically analyzed trees. +-- These tests are much slower, so use the `_semanal` suffix only when needed. + +[case testNestedClass_semanal] +class Outer: + class Inner: + pass + +A = Outer.Inner +[out] +class Outer: + class Inner: ... +A = Outer.Inner + +[case testFunctionAlias_semanal] +from asyncio import coroutine + +@coroutine +def start_server(): + ... + +start = start_server +[out] +from asyncio import coroutine + +@coroutine +def start_server() -> None: ... +start = start_server + +[case testModuleAlias_semanal] +import a + +b = a +[file a.py] +x = 1 +[out] +import a + +b = a + +[case testBadAliasNested_semanal] +import a + +x = registry[a.f] +[file a.py] +def f(): ... +[out] +from typing import Any + +x: Any + +[case testCrossModuleClass_semanal] +import a + +class C: + x: A + def f(self) -> A: ... +A = a.A +[file a.py] +class A: ... +[out] +import a + +class C: + x: A + def f(self) -> A: ... +A = a.A + +[case testCrossModuleFunction_semanal] +import a +g = a.f +[file a.py] +def f(): ... +[out] +import a + +g = a.f + +[case testPrivateAliasesExcluded_semanal] +import a, _a + +class C: ... + +A = a._A +B = _a.f +_C = C +[file a.py] +class _A: ... +[file _a.py] +def f(): ... +[out] +from typing import Any + +class C: ... + +A: Any +B: Any + +[case testPrivateAliasesIncluded_semanal] +# flags: --include-private +import a, _a + +class C: ... + +A = a._A +B = _a.f +_C = C +[file a.py] +class _A: ... +[file _a.py] +def f(): ... +[out] +import _a +import a + +class C: ... +A = a._A +B = _a.f +_C = C + +[case testFinalWrapped_semanal] +from typing import Final + +x: Final = 1 +y: Final = x +z: Final[object] +t: Final +[out] +from typing import Any, Final + +x: Final[int] +y: Final[Any] +z: Final[object] +t: Final[Any] + +[case testNoFunctionNested_semanal] +import a +from typing import Dict, Any + +funcs: Dict[Any, Any] +f = funcs[a.f] +[out] +from typing import Any, Dict + +funcs: Dict[Any, Any] +f: Any + +[case testAbstractMethodNameExpr] +from abc import ABCMeta, abstractmethod + +class A(metaclass=ABCMeta): + @abstractmethod + def meth(self): + pass +[out] +from abc import ABCMeta, abstractmethod + +class A(metaclass=ABCMeta): + @abstractmethod + def meth(self): ... + +[case testAbstractMethodMemberExpr] +import abc + +class A(metaclass=abc.ABCMeta): + @abc.abstractmethod + def meth(self): + pass +[out] +import abc + +class A(metaclass=abc.ABCMeta): + @abc.abstractmethod + def meth(self): ... + +[case testABCMeta_semanal] +from base import base +from abc import abstractmethod + +class C(Base): + @abstractmethod + def other(self): + pass + +[file base.py] +from abc import abstractmethod, ABCMeta + +class Base(metaclass=ABCMeta): + @abstractmethod + def meth(self): + pass +[out] +import abc +from abc import abstractmethod +from typing import Any + +class C(Base, metaclass=abc.ABCMeta): + @abstractmethod + def other(self) -> Any: ...