Use tokenize to parse function declarations in docstr

python · Jan 12, 2019 · 38872dc · 38872dc
1 parent cf4de08
commit 38872dc
Show file tree

Hide file tree

Showing 3 changed files with 125 additions and 63 deletions.
diff --git a/mypy/stubgenc.py b/mypy/stubgenc.py
@@ -156,7 +156,9 @@ def generate_c_function_stub(module: ModuleType,
                     sig.append(arg.name)
                 else:
                     # type info
-                    sig.append('{}: {}'.format(arg.name, strip_or_import(arg.type, module, imports)))
+                    sig.append('{}: {}'.format(arg.name, strip_or_import(arg.type,
+                                                                         module,
+                                                                         imports)))
 
             if is_overloaded:
                 output.append('@overload')

diff --git a/mypy/stubutil.py b/mypy/stubutil.py
@@ -1,6 +1,9 @@
+import enum
+import io
 import re
 import sys
 import os
+import tokenize
 
 from typing import Optional, Tuple, Sequence, MutableSequence, List, MutableMapping, IO, NamedTuple
 from types import ModuleType
@@ -120,64 +123,112 @@ def write_header(file: IO[str], module_name: Optional[str] = None,
         '# NOTE: This dynamically typed stub was automatically generated by stubgen.\n\n')
 
 
+class State(enum.Enum):
+    INIT = 1
+    FUNCTION_NAME = 2
+    ARGUMENT_LIST = 3
+    ARGUMENT_TYPE = 4
+    ARGUMENT_DEFAULT = 5
+    RETURN_VALUE = 6
+    OPEN_BRACKET = 7
+
+
 def infer_sig_from_docstring(docstr: str, name: str) -> Optional[List[TypedFunctionSig]]:
     if not docstr:
         return None
-    docstr = docstr.lstrip()
-    is_overloaded = any(('Overloaded function.' == x.strip()) for x in docstr.split('\n'))
-    # look for function signature, which is any string of the format
-    # <function_name>(<signature>) -> <return type>
-    # or perhaps without the return type
-
-    # in the signature, we allow the following characters:
-    # colon/equal: to match default values, like "a: int=1"
-    # comma/space/brackets: for type hints like "a: Tuple[int, float]"
-    # dot: for classes annotating using full path, like "a: foo.bar.baz"
-    # to capture return type,
-    sig_str = r'\([a-zA-Z0-9_=:, \[\]\.]*\)'
-    sig_match = r'%s(%s)' % (name, sig_str)
-    sig_match_ret = sig_match + ' -> ([a-zA-Z].*)$'
-
-    if is_overloaded:
-        def find_sig_ret() -> List[Tuple[str, str]]:
-            return re.findall(sig_match_ret, docstr, re.MULTILINE)
-
-        def find_sig() -> List[str]:
-            return re.findall(sig_match, docstr, re.MULTILINE)
-    else:
-        def find_sig_ret() -> List[Tuple[str, str]]:
-            m = re.match(sig_match_ret, docstr, re.MULTILINE)
-            if m:
-                return [(m.group(1), m.group(2))]
-            return []
-
-        def find_sig() -> List[str]:
-            m = re.match(sig_match, docstr)
-            if m:
-                return [m.group(1)]
-            return []
-
-    sig_match_ret_res = find_sig_ret()
-    if sig_match_ret_res:
-        ret = []
-        for match_ret in sig_match_ret_res:
-            ret.append(TypedFunctionSig(
-                name=name,
-                args=infer_arg_sig_from_docstring(match_ret[0]),
-                ret_type=match_ret[1].rstrip()
-            ))
-        return ret
-    sig_match_res = find_sig()
-    if sig_match_res:
-        ret = []
-        for match in sig_match_res:
-            ret.append(TypedFunctionSig(
-                name=name,
-                args=infer_arg_sig_from_docstring(match),
-                ret_type='Any'
-            ))
-        return ret
-    return None
+
+    state = [State.INIT, ]
+    accumulator = ""
+    arg_type = None
+    arg_name = ""
+    arg_default = None
+    ret_type = "Any"
+    found = False
+    args = []  # type: List[TypedArgSig]
+    signatures = []  # type: List[TypedFunctionSig]
+    try:
+        for token in tokenize.tokenize(io.BytesIO(docstr.encode('utf-8')).readline):
+            if token.type == tokenize.NAME and token.string == name and state[-1] == State.INIT:
+                state.append(State.FUNCTION_NAME)
+
+            elif token.type == tokenize.OP and token.string == '(' and state[-1] == \
+                    State.FUNCTION_NAME:
+                state.pop()
+                accumulator = ""
+                found = True
+                state.append(State.ARGUMENT_LIST)
+
+            elif state[-1] == State.FUNCTION_NAME:
+                # reset state, function name not followed by '('
+                state.pop()
+
+            elif token.type == tokenize.OP and token.string in ('[', '(', '{'):
+                accumulator += token.string
+                state.append(State.OPEN_BRACKET)
+
+            elif token.type == tokenize.OP and token.string in (']', ')', '}') and \
+                    state[-1] == State.OPEN_BRACKET:
+                accumulator += token.string
+                state.pop()
+
+            elif token.type == tokenize.OP and token.string == ':' and \
+                    state[-1] == State.ARGUMENT_LIST:
+                arg_name = accumulator
+                accumulator = ""
+                state.append(State.ARGUMENT_TYPE)
+
+            elif token.type == tokenize.OP and token.string == '=' and state[-1] in (
+                    State.ARGUMENT_LIST, State.ARGUMENT_TYPE):
+                if state[-1] == State.ARGUMENT_TYPE:
+                    arg_type = accumulator
+                    state.pop()
+                else:
+                    arg_name = accumulator
+                accumulator = ""
+                state.append(State.ARGUMENT_DEFAULT)
+
+            elif token.type == tokenize.OP and token.string in (',', ')') and state[-1] in (
+                    State.ARGUMENT_LIST, State.ARGUMENT_DEFAULT, State.ARGUMENT_TYPE):
+                if state[-1] == State.ARGUMENT_DEFAULT:
+                    arg_default = accumulator
+                    state.pop()
+                elif state[-1] == State.ARGUMENT_TYPE:
+                    arg_type = accumulator
+                    state.pop()
+                elif state[-1] == State.ARGUMENT_LIST:
+                    arg_name = accumulator
+
+                if token.string == ')':
+                    state.pop()
+                args.append(TypedArgSig(name=arg_name, type=arg_type, default=arg_default))
+                arg_name = ""
+                arg_type = None
+                arg_default = None
+                accumulator = ""
+
+            elif token.type == tokenize.OP and token.string == '->':
+                accumulator = ""
+                state.append(State.RETURN_VALUE)
+
+            elif token.type == tokenize.NEWLINE and state[-1] in (State.INIT, State.RETURN_VALUE):
+                if state[-1] == State.RETURN_VALUE:
+                    ret_type = accumulator
+                    accumulator = ""
+                    state.pop()
+
+                if found:
+                    signatures.append(TypedFunctionSig(name=name, args=args, ret_type=ret_type))
+                    found = False
+                args = []
+                ret_type = 'Any'
+                # leave state as INIT
+            else:
+                accumulator += token.string
+
+        return signatures
+    except tokenize.TokenError:
+        # return as much as collected
+        return signatures
 
 
 def infer_arg_sig_from_docstring(docstr: str) -> ArgList:

diff --git a/mypy/test/teststubgen.py b/mypy/test/teststubgen.py
@@ -141,16 +141,23 @@ def test_infer_sig_from_docstring(self) -> None:
                 name='func',
                 args=[
                     TypedArgSig(name='x', type=None, default=None),
-                    TypedArgSig(name='Y_a', type=None, default='[1, 2, 3]')
+                    TypedArgSig(name='Y_a', type=None, default='[1,2,3]')
                 ],
                 ret_type='Any'
             )]
         )
 
-        assert_equal(infer_sig_from_docstring('\nafunc(x) - y', 'func'), None)
-        assert_equal(infer_sig_from_docstring('\nfunc(x, y', 'func'), None)
-        assert_equal(infer_sig_from_docstring('\nfunc(x=z(y))', 'func'), None)
-        assert_equal(infer_sig_from_docstring('\nfunc x', 'func'), None)
+        assert_equal(infer_sig_from_docstring('\nafunc(x) - y', 'func'), [])
+        assert_equal(infer_sig_from_docstring('\nfunc(x, y', 'func'), [])
+        assert_equal(
+            infer_sig_from_docstring('\nfunc(x=z(y))', 'func'),
+            [TypedFunctionSig(
+                name='func',
+                args=[TypedArgSig(name='x', type=None, default='z(y)')],
+                ret_type='Any'
+            )]
+        )
+        assert_equal(infer_sig_from_docstring('\nfunc x', 'func'), [])
         # try to infer signature from type annotation
         assert_equal(
             infer_sig_from_docstring('\nfunc(x: int)', 'func'),
@@ -188,7 +195,7 @@ def test_infer_sig_from_docstring(self) -> None:
             infer_sig_from_docstring('\nfunc(x: Tuple[int, str]) -> str', 'func'),
             [TypedFunctionSig(
                 name='func',
-                args=[TypedArgSig(name='x', type='Tuple[int, str]', default=None)],
+                args=[TypedArgSig(name='x', type='Tuple[int,str]', default=None)],
                 ret_type='str'
             )]
         )
@@ -198,7 +205,7 @@ def test_infer_sig_from_docstring(self) -> None:
             [TypedFunctionSig(
                 name='func',
                 args=[
-                    TypedArgSig(name='x', type='Tuple[int, Tuple[str, int], str]', default=None),
+                    TypedArgSig(name='x', type='Tuple[int,Tuple[str,int],str]', default=None),
                     TypedArgSig(name='y', type='int', default=None),
                 ],
                 ret_type='str'
@@ -509,6 +516,8 @@ def __init__(self, arg0: str) -> None:
         generate_c_function_stub(mod, '__init__', TestClass.__init__, output, imports,
                                  self_var='self', class_name='TestClass')
         assert_equal(output, [
+            '@overload',
+            'def __init__(*args, **kwargs) -> Any: ...',
             '@overload',
             'def __init__(self, arg0: str) -> None: ...',
             '@overload',