Skip to content

Commit

Permalink
Use tokenize to parse function declarations in docstr
Browse files Browse the repository at this point in the history
  • Loading branch information
wiktorn committed Jan 12, 2019
1 parent cf4de08 commit 38872dc
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 63 deletions.
4 changes: 3 additions & 1 deletion mypy/stubgenc.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ def generate_c_function_stub(module: ModuleType,
sig.append(arg.name)
else:
# type info
sig.append('{}: {}'.format(arg.name, strip_or_import(arg.type, module, imports)))
sig.append('{}: {}'.format(arg.name, strip_or_import(arg.type,
module,
imports)))

if is_overloaded:
output.append('@overload')
Expand Down
161 changes: 106 additions & 55 deletions mypy/stubutil.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import enum
import io
import re
import sys
import os
import tokenize

from typing import Optional, Tuple, Sequence, MutableSequence, List, MutableMapping, IO, NamedTuple
from types import ModuleType
Expand Down Expand Up @@ -120,64 +123,112 @@ def write_header(file: IO[str], module_name: Optional[str] = None,
'# NOTE: This dynamically typed stub was automatically generated by stubgen.\n\n')


class State(enum.Enum):
INIT = 1
FUNCTION_NAME = 2
ARGUMENT_LIST = 3
ARGUMENT_TYPE = 4
ARGUMENT_DEFAULT = 5
RETURN_VALUE = 6
OPEN_BRACKET = 7


def infer_sig_from_docstring(docstr: str, name: str) -> Optional[List[TypedFunctionSig]]:
if not docstr:
return None
docstr = docstr.lstrip()
is_overloaded = any(('Overloaded function.' == x.strip()) for x in docstr.split('\n'))
# look for function signature, which is any string of the format
# <function_name>(<signature>) -> <return type>
# or perhaps without the return type

# in the signature, we allow the following characters:
# colon/equal: to match default values, like "a: int=1"
# comma/space/brackets: for type hints like "a: Tuple[int, float]"
# dot: for classes annotating using full path, like "a: foo.bar.baz"
# to capture return type,
sig_str = r'\([a-zA-Z0-9_=:, \[\]\.]*\)'
sig_match = r'%s(%s)' % (name, sig_str)
sig_match_ret = sig_match + ' -> ([a-zA-Z].*)$'

if is_overloaded:
def find_sig_ret() -> List[Tuple[str, str]]:
return re.findall(sig_match_ret, docstr, re.MULTILINE)

def find_sig() -> List[str]:
return re.findall(sig_match, docstr, re.MULTILINE)
else:
def find_sig_ret() -> List[Tuple[str, str]]:
m = re.match(sig_match_ret, docstr, re.MULTILINE)
if m:
return [(m.group(1), m.group(2))]
return []

def find_sig() -> List[str]:
m = re.match(sig_match, docstr)
if m:
return [m.group(1)]
return []

sig_match_ret_res = find_sig_ret()
if sig_match_ret_res:
ret = []
for match_ret in sig_match_ret_res:
ret.append(TypedFunctionSig(
name=name,
args=infer_arg_sig_from_docstring(match_ret[0]),
ret_type=match_ret[1].rstrip()
))
return ret
sig_match_res = find_sig()
if sig_match_res:
ret = []
for match in sig_match_res:
ret.append(TypedFunctionSig(
name=name,
args=infer_arg_sig_from_docstring(match),
ret_type='Any'
))
return ret
return None

state = [State.INIT, ]
accumulator = ""
arg_type = None
arg_name = ""
arg_default = None
ret_type = "Any"
found = False
args = [] # type: List[TypedArgSig]
signatures = [] # type: List[TypedFunctionSig]
try:
for token in tokenize.tokenize(io.BytesIO(docstr.encode('utf-8')).readline):
if token.type == tokenize.NAME and token.string == name and state[-1] == State.INIT:
state.append(State.FUNCTION_NAME)

elif token.type == tokenize.OP and token.string == '(' and state[-1] == \
State.FUNCTION_NAME:
state.pop()
accumulator = ""
found = True
state.append(State.ARGUMENT_LIST)

elif state[-1] == State.FUNCTION_NAME:
# reset state, function name not followed by '('
state.pop()

elif token.type == tokenize.OP and token.string in ('[', '(', '{'):
accumulator += token.string
state.append(State.OPEN_BRACKET)

elif token.type == tokenize.OP and token.string in (']', ')', '}') and \
state[-1] == State.OPEN_BRACKET:
accumulator += token.string
state.pop()

elif token.type == tokenize.OP and token.string == ':' and \
state[-1] == State.ARGUMENT_LIST:
arg_name = accumulator
accumulator = ""
state.append(State.ARGUMENT_TYPE)

elif token.type == tokenize.OP and token.string == '=' and state[-1] in (
State.ARGUMENT_LIST, State.ARGUMENT_TYPE):
if state[-1] == State.ARGUMENT_TYPE:
arg_type = accumulator
state.pop()
else:
arg_name = accumulator
accumulator = ""
state.append(State.ARGUMENT_DEFAULT)

elif token.type == tokenize.OP and token.string in (',', ')') and state[-1] in (
State.ARGUMENT_LIST, State.ARGUMENT_DEFAULT, State.ARGUMENT_TYPE):
if state[-1] == State.ARGUMENT_DEFAULT:
arg_default = accumulator
state.pop()
elif state[-1] == State.ARGUMENT_TYPE:
arg_type = accumulator
state.pop()
elif state[-1] == State.ARGUMENT_LIST:
arg_name = accumulator

if token.string == ')':
state.pop()
args.append(TypedArgSig(name=arg_name, type=arg_type, default=arg_default))
arg_name = ""
arg_type = None
arg_default = None
accumulator = ""

elif token.type == tokenize.OP and token.string == '->':
accumulator = ""
state.append(State.RETURN_VALUE)

elif token.type == tokenize.NEWLINE and state[-1] in (State.INIT, State.RETURN_VALUE):
if state[-1] == State.RETURN_VALUE:
ret_type = accumulator
accumulator = ""
state.pop()

if found:
signatures.append(TypedFunctionSig(name=name, args=args, ret_type=ret_type))
found = False
args = []
ret_type = 'Any'
# leave state as INIT
else:
accumulator += token.string

return signatures
except tokenize.TokenError:
# return as much as collected
return signatures


def infer_arg_sig_from_docstring(docstr: str) -> ArgList:
Expand Down
23 changes: 16 additions & 7 deletions mypy/test/teststubgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,23 @@ def test_infer_sig_from_docstring(self) -> None:
name='func',
args=[
TypedArgSig(name='x', type=None, default=None),
TypedArgSig(name='Y_a', type=None, default='[1, 2, 3]')
TypedArgSig(name='Y_a', type=None, default='[1,2,3]')
],
ret_type='Any'
)]
)

assert_equal(infer_sig_from_docstring('\nafunc(x) - y', 'func'), None)
assert_equal(infer_sig_from_docstring('\nfunc(x, y', 'func'), None)
assert_equal(infer_sig_from_docstring('\nfunc(x=z(y))', 'func'), None)
assert_equal(infer_sig_from_docstring('\nfunc x', 'func'), None)
assert_equal(infer_sig_from_docstring('\nafunc(x) - y', 'func'), [])
assert_equal(infer_sig_from_docstring('\nfunc(x, y', 'func'), [])
assert_equal(
infer_sig_from_docstring('\nfunc(x=z(y))', 'func'),
[TypedFunctionSig(
name='func',
args=[TypedArgSig(name='x', type=None, default='z(y)')],
ret_type='Any'
)]
)
assert_equal(infer_sig_from_docstring('\nfunc x', 'func'), [])
# try to infer signature from type annotation
assert_equal(
infer_sig_from_docstring('\nfunc(x: int)', 'func'),
Expand Down Expand Up @@ -188,7 +195,7 @@ def test_infer_sig_from_docstring(self) -> None:
infer_sig_from_docstring('\nfunc(x: Tuple[int, str]) -> str', 'func'),
[TypedFunctionSig(
name='func',
args=[TypedArgSig(name='x', type='Tuple[int, str]', default=None)],
args=[TypedArgSig(name='x', type='Tuple[int,str]', default=None)],
ret_type='str'
)]
)
Expand All @@ -198,7 +205,7 @@ def test_infer_sig_from_docstring(self) -> None:
[TypedFunctionSig(
name='func',
args=[
TypedArgSig(name='x', type='Tuple[int, Tuple[str, int], str]', default=None),
TypedArgSig(name='x', type='Tuple[int,Tuple[str,int],str]', default=None),
TypedArgSig(name='y', type='int', default=None),
],
ret_type='str'
Expand Down Expand Up @@ -509,6 +516,8 @@ def __init__(self, arg0: str) -> None:
generate_c_function_stub(mod, '__init__', TestClass.__init__, output, imports,
self_var='self', class_name='TestClass')
assert_equal(output, [
'@overload',
'def __init__(*args, **kwargs) -> Any: ...',
'@overload',
'def __init__(self, arg0: str) -> None: ...',
'@overload',
Expand Down

0 comments on commit 38872dc

Please sign in to comment.