From 38872dc3e63c1e88fa97112a9db591905f977089 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wiktor=20Niesiob=C4=99dzki?= Date: Wed, 19 Dec 2018 21:32:23 +0100 Subject: [PATCH] Use tokenize to parse function declarations in docstr --- mypy/stubgenc.py | 4 +- mypy/stubutil.py | 161 ++++++++++++++++++++++++++------------- mypy/test/teststubgen.py | 23 ++++-- 3 files changed, 125 insertions(+), 63 deletions(-) diff --git a/mypy/stubgenc.py b/mypy/stubgenc.py index fcbacd5d79db..f9d3d05ea807 100644 --- a/mypy/stubgenc.py +++ b/mypy/stubgenc.py @@ -156,7 +156,9 @@ def generate_c_function_stub(module: ModuleType, sig.append(arg.name) else: # type info - sig.append('{}: {}'.format(arg.name, strip_or_import(arg.type, module, imports))) + sig.append('{}: {}'.format(arg.name, strip_or_import(arg.type, + module, + imports))) if is_overloaded: output.append('@overload') diff --git a/mypy/stubutil.py b/mypy/stubutil.py index a3a72f506e9c..992540c71087 100644 --- a/mypy/stubutil.py +++ b/mypy/stubutil.py @@ -1,6 +1,9 @@ +import enum +import io import re import sys import os +import tokenize from typing import Optional, Tuple, Sequence, MutableSequence, List, MutableMapping, IO, NamedTuple from types import ModuleType @@ -120,64 +123,112 @@ def write_header(file: IO[str], module_name: Optional[str] = None, '# NOTE: This dynamically typed stub was automatically generated by stubgen.\n\n') +class State(enum.Enum): + INIT = 1 + FUNCTION_NAME = 2 + ARGUMENT_LIST = 3 + ARGUMENT_TYPE = 4 + ARGUMENT_DEFAULT = 5 + RETURN_VALUE = 6 + OPEN_BRACKET = 7 + + def infer_sig_from_docstring(docstr: str, name: str) -> Optional[List[TypedFunctionSig]]: if not docstr: return None - docstr = docstr.lstrip() - is_overloaded = any(('Overloaded function.' == x.strip()) for x in docstr.split('\n')) - # look for function signature, which is any string of the format - # () -> - # or perhaps without the return type - - # in the signature, we allow the following characters: - # colon/equal: to match default values, like "a: int=1" - # comma/space/brackets: for type hints like "a: Tuple[int, float]" - # dot: for classes annotating using full path, like "a: foo.bar.baz" - # to capture return type, - sig_str = r'\([a-zA-Z0-9_=:, \[\]\.]*\)' - sig_match = r'%s(%s)' % (name, sig_str) - sig_match_ret = sig_match + ' -> ([a-zA-Z].*)$' - - if is_overloaded: - def find_sig_ret() -> List[Tuple[str, str]]: - return re.findall(sig_match_ret, docstr, re.MULTILINE) - - def find_sig() -> List[str]: - return re.findall(sig_match, docstr, re.MULTILINE) - else: - def find_sig_ret() -> List[Tuple[str, str]]: - m = re.match(sig_match_ret, docstr, re.MULTILINE) - if m: - return [(m.group(1), m.group(2))] - return [] - - def find_sig() -> List[str]: - m = re.match(sig_match, docstr) - if m: - return [m.group(1)] - return [] - - sig_match_ret_res = find_sig_ret() - if sig_match_ret_res: - ret = [] - for match_ret in sig_match_ret_res: - ret.append(TypedFunctionSig( - name=name, - args=infer_arg_sig_from_docstring(match_ret[0]), - ret_type=match_ret[1].rstrip() - )) - return ret - sig_match_res = find_sig() - if sig_match_res: - ret = [] - for match in sig_match_res: - ret.append(TypedFunctionSig( - name=name, - args=infer_arg_sig_from_docstring(match), - ret_type='Any' - )) - return ret - return None + + state = [State.INIT, ] + accumulator = "" + arg_type = None + arg_name = "" + arg_default = None + ret_type = "Any" + found = False + args = [] # type: List[TypedArgSig] + signatures = [] # type: List[TypedFunctionSig] + try: + for token in tokenize.tokenize(io.BytesIO(docstr.encode('utf-8')).readline): + if token.type == tokenize.NAME and token.string == name and state[-1] == State.INIT: + state.append(State.FUNCTION_NAME) + + elif token.type == tokenize.OP and token.string == '(' and state[-1] == \ + State.FUNCTION_NAME: + state.pop() + accumulator = "" + found = True + state.append(State.ARGUMENT_LIST) + + elif state[-1] == State.FUNCTION_NAME: + # reset state, function name not followed by '(' + state.pop() + + elif token.type == tokenize.OP and token.string in ('[', '(', '{'): + accumulator += token.string + state.append(State.OPEN_BRACKET) + + elif token.type == tokenize.OP and token.string in (']', ')', '}') and \ + state[-1] == State.OPEN_BRACKET: + accumulator += token.string + state.pop() + + elif token.type == tokenize.OP and token.string == ':' and \ + state[-1] == State.ARGUMENT_LIST: + arg_name = accumulator + accumulator = "" + state.append(State.ARGUMENT_TYPE) + + elif token.type == tokenize.OP and token.string == '=' and state[-1] in ( + State.ARGUMENT_LIST, State.ARGUMENT_TYPE): + if state[-1] == State.ARGUMENT_TYPE: + arg_type = accumulator + state.pop() + else: + arg_name = accumulator + accumulator = "" + state.append(State.ARGUMENT_DEFAULT) + + elif token.type == tokenize.OP and token.string in (',', ')') and state[-1] in ( + State.ARGUMENT_LIST, State.ARGUMENT_DEFAULT, State.ARGUMENT_TYPE): + if state[-1] == State.ARGUMENT_DEFAULT: + arg_default = accumulator + state.pop() + elif state[-1] == State.ARGUMENT_TYPE: + arg_type = accumulator + state.pop() + elif state[-1] == State.ARGUMENT_LIST: + arg_name = accumulator + + if token.string == ')': + state.pop() + args.append(TypedArgSig(name=arg_name, type=arg_type, default=arg_default)) + arg_name = "" + arg_type = None + arg_default = None + accumulator = "" + + elif token.type == tokenize.OP and token.string == '->': + accumulator = "" + state.append(State.RETURN_VALUE) + + elif token.type == tokenize.NEWLINE and state[-1] in (State.INIT, State.RETURN_VALUE): + if state[-1] == State.RETURN_VALUE: + ret_type = accumulator + accumulator = "" + state.pop() + + if found: + signatures.append(TypedFunctionSig(name=name, args=args, ret_type=ret_type)) + found = False + args = [] + ret_type = 'Any' + # leave state as INIT + else: + accumulator += token.string + + return signatures + except tokenize.TokenError: + # return as much as collected + return signatures def infer_arg_sig_from_docstring(docstr: str) -> ArgList: diff --git a/mypy/test/teststubgen.py b/mypy/test/teststubgen.py index 28c6f0567ed0..779ff7832e00 100644 --- a/mypy/test/teststubgen.py +++ b/mypy/test/teststubgen.py @@ -141,16 +141,23 @@ def test_infer_sig_from_docstring(self) -> None: name='func', args=[ TypedArgSig(name='x', type=None, default=None), - TypedArgSig(name='Y_a', type=None, default='[1, 2, 3]') + TypedArgSig(name='Y_a', type=None, default='[1,2,3]') ], ret_type='Any' )] ) - assert_equal(infer_sig_from_docstring('\nafunc(x) - y', 'func'), None) - assert_equal(infer_sig_from_docstring('\nfunc(x, y', 'func'), None) - assert_equal(infer_sig_from_docstring('\nfunc(x=z(y))', 'func'), None) - assert_equal(infer_sig_from_docstring('\nfunc x', 'func'), None) + assert_equal(infer_sig_from_docstring('\nafunc(x) - y', 'func'), []) + assert_equal(infer_sig_from_docstring('\nfunc(x, y', 'func'), []) + assert_equal( + infer_sig_from_docstring('\nfunc(x=z(y))', 'func'), + [TypedFunctionSig( + name='func', + args=[TypedArgSig(name='x', type=None, default='z(y)')], + ret_type='Any' + )] + ) + assert_equal(infer_sig_from_docstring('\nfunc x', 'func'), []) # try to infer signature from type annotation assert_equal( infer_sig_from_docstring('\nfunc(x: int)', 'func'), @@ -188,7 +195,7 @@ def test_infer_sig_from_docstring(self) -> None: infer_sig_from_docstring('\nfunc(x: Tuple[int, str]) -> str', 'func'), [TypedFunctionSig( name='func', - args=[TypedArgSig(name='x', type='Tuple[int, str]', default=None)], + args=[TypedArgSig(name='x', type='Tuple[int,str]', default=None)], ret_type='str' )] ) @@ -198,7 +205,7 @@ def test_infer_sig_from_docstring(self) -> None: [TypedFunctionSig( name='func', args=[ - TypedArgSig(name='x', type='Tuple[int, Tuple[str, int], str]', default=None), + TypedArgSig(name='x', type='Tuple[int,Tuple[str,int],str]', default=None), TypedArgSig(name='y', type='int', default=None), ], ret_type='str' @@ -509,6 +516,8 @@ def __init__(self, arg0: str) -> None: generate_c_function_stub(mod, '__init__', TestClass.__init__, output, imports, self_var='self', class_name='TestClass') assert_equal(output, [ + '@overload', + 'def __init__(*args, **kwargs) -> Any: ...', '@overload', 'def __init__(self, arg0: str) -> None: ...', '@overload',