From 7c408b9d7750292760ed255f744211d1ef535668 Mon Sep 17 00:00:00 2001 From: Kyle Gottfried Date: Fri, 9 Jun 2023 01:39:47 -0400 Subject: [PATCH] ylib2to3 -- Fork lib2to3 with some blib2to3 backports; add structured pattern matching (match) and parenthesized-context-managers statement support (#1067) Add ylib2to3, a copy of blib2to3. - Yapf modifications to support match statement - Disable double indent when splitting parameters after keyword - Fix comment indent inside match - Add vscode settings to gitignore - Add testMatchStatementRemoveSpacesInCaseBracket - Add credit to black - Use post develop and install commands in setup.py - Use metadata for yapf.__version__ - Install YAPF as it's own step in GitHub CI - Remove PKG_INFO.ini, using importlib_metadata - Move yapf.third_party to third_party, install at yapf_third_party though This works, but you can't do a editable install with pip. You need to manually add a line to site-packages/easy-install.pth for the fully qualified path to /third_party --------- Co-authored-by: Charles --- .github/workflows/ci.yml | 22 +- .gitignore | 3 +- .isort.cfg | 1 + setup.py | 25 +- .../__init__.py | 0 .../yapf_third_party/_ylib2to3/Grammar.txt | 252 +++++ .../yapf_third_party/_ylib2to3/LICENSE | 254 ++++++ .../_ylib2to3/PatternGrammar.txt | 28 + .../yapf_third_party/_ylib2to3/README.rst | 9 + .../yapf_third_party/_ylib2to3/__init__.py | 1 + .../yapf_third_party/_ylib2to3/fixer_util.py | 491 ++++++++++ .../yapf_third_party/_ylib2to3/patcomp.py | 209 +++++ .../_ylib2to3/pgen2/__init__.py | 3 + .../yapf_third_party/_ylib2to3/pgen2/conv.py | 254 ++++++ .../_ylib2to3/pgen2/driver.py | 300 ++++++ .../_ylib2to3/pgen2/grammar.py | 188 ++++ .../_ylib2to3/pgen2/literals.py | 64 ++ .../yapf_third_party/_ylib2to3/pgen2/parse.py | 378 ++++++++ .../yapf_third_party/_ylib2to3/pgen2/pgen.py | 409 +++++++++ .../yapf_third_party/_ylib2to3/pgen2/token.py | 87 ++ .../_ylib2to3/pgen2/tokenize.py | 611 +++++++++++++ .../yapf_third_party/_ylib2to3/pygram.py | 40 + .../yapf_third_party/_ylib2to3/pytree.py | 861 ++++++++++++++++++ .../{ => yapf_third_party}/yapf_diff/LICENSE | 0 .../yapf_third_party/yapf_diff/__init__.py | 0 .../yapf_diff/yapf_diff.py | 0 yapf/__init__.py | 4 +- yapf/pytree/blank_line_calculator.py | 2 +- yapf/pytree/comment_splicer.py | 6 +- yapf/pytree/continuation_splicer.py | 2 +- yapf/pytree/pytree_unwrapper.py | 26 +- yapf/pytree/pytree_utils.py | 11 +- yapf/pytree/pytree_visitor.py | 3 +- yapf/pytree/split_penalty.py | 5 +- yapf/pytree/subtype_assigner.py | 6 +- yapf/yapflib/errors.py | 2 +- yapf/yapflib/file_resources.py | 1 - yapf/yapflib/format_decision_state.py | 6 +- yapf/yapflib/format_token.py | 10 +- yapf/yapflib/identify_container.py | 2 +- yapf/yapflib/logical_line.py | 2 +- yapf/yapflib/reformatter.py | 5 +- yapf/yapflib/yapf_api.py | 1 - yapftests/format_token_test.py | 5 +- yapftests/logical_line_test.py | 5 +- yapftests/pytree_utils_test.py | 7 +- yapftests/reformatter_basic_test.py | 37 + yapftests/split_penalty_test.py | 3 +- yapftests/style_test.py | 4 +- yapftests/yapf_test.py | 3 +- 50 files changed, 4590 insertions(+), 58 deletions(-) rename third_party/{yapf_diff => yapf_third_party}/__init__.py (100%) create mode 100644 third_party/yapf_third_party/_ylib2to3/Grammar.txt create mode 100644 third_party/yapf_third_party/_ylib2to3/LICENSE create mode 100644 third_party/yapf_third_party/_ylib2to3/PatternGrammar.txt create mode 100644 third_party/yapf_third_party/_ylib2to3/README.rst create mode 100644 third_party/yapf_third_party/_ylib2to3/__init__.py create mode 100644 third_party/yapf_third_party/_ylib2to3/fixer_util.py create mode 100644 third_party/yapf_third_party/_ylib2to3/patcomp.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pgen2/__init__.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pgen2/conv.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pgen2/driver.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pgen2/grammar.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pgen2/literals.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pgen2/parse.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pgen2/pgen.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pgen2/token.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pgen2/tokenize.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pygram.py create mode 100644 third_party/yapf_third_party/_ylib2to3/pytree.py rename third_party/{ => yapf_third_party}/yapf_diff/LICENSE (100%) create mode 100644 third_party/yapf_third_party/yapf_diff/__init__.py rename third_party/{ => yapf_third_party}/yapf_diff/yapf_diff.py (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d5fdba30..c4102ff2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: YAPF +name: Test with pytest on: pull_request: @@ -20,11 +20,17 @@ jobs: uses: actions/setup-python@bd6b4b6205c4dbad673328db7b31b7fab9e241c0 # v4.6.0 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - - name: Test with pytest - run: | - pip install pytest - pip install pytest-cov + - name: Upgrade pip + run: >- + python -m pip install + --upgrade + --disable-pip-version-check + pip + - name: Perform package installs + run: >- + pip install + . pytest + pytest-cov + - name: Test with pytest + run: pytest diff --git a/.gitignore b/.gitignore index e2ff4c7fb..6a6928eae 100644 --- a/.gitignore +++ b/.gitignore @@ -13,8 +13,9 @@ *~ # Merge files created by git. *.orig -# Byte compiled python modules. +# Compiled python. *.pyc +*.pickle # vim swap files .*.sw? .sw? diff --git a/.isort.cfg b/.isort.cfg index 69720b92c..d9ce8b88c 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,5 +1,6 @@ [settings] force_single_line=true +known_third_party=yapf_third_party known_yapftests=yapftests sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER,YAPFTESTS diff --git a/setup.py b/setup.py index 025ecda35..a294ccce3 100644 --- a/setup.py +++ b/setup.py @@ -21,8 +21,6 @@ from setuptools import find_packages from setuptools import setup -import yapf - class RunTests(Command): user_options = [] @@ -44,7 +42,7 @@ def run(self): with codecs.open('README.rst', 'r', 'utf-8') as fd: setup( name='yapf', - version=yapf.__version__, + version='0.33.0', description='A formatter for Python code.', url='https://github.com/google/yapf', long_description=fd.read(), @@ -55,7 +53,9 @@ def run(self): options={'bdist_wheel': { 'python_tag': 'py3' }}, - packages=find_packages('.'), + packages=find_packages(where='.', include=['yapf*', 'yapftests*']) + + find_packages(where='third_party'), + package_dir={'yapf_third_party': 'third_party/yapf_third_party'}, project_urls={ 'Source': 'https://github.com/google/yapf', }, @@ -78,12 +78,25 @@ def run(self): entry_points={ 'console_scripts': [ 'yapf = yapf:run_main', - 'yapf-diff = third_party.yapf_diff.yapf_diff:main', + 'yapf-diff = yapf_third_party.yapf_diff.yapf_diff:main', ], }, cmdclass={ 'test': RunTests, }, + package_data={ + 'yapf_third_party': [ + 'yapf_diff/LICENSE', + '_ylib2to3/Grammar.txt', + '_ylib2to3/PatternGrammar.txt', + '_ylib2to3/LICENSE', + ] + }, + include_package_data=True, python_requires='>=3.7', - install_requires=['tomli>=2.0.1'], + install_requires=[ + 'importlib-metadata>=6.6.0', + 'platformdirs>=3.5.1', + 'tomli>=2.0.1', + ], ) diff --git a/third_party/yapf_diff/__init__.py b/third_party/yapf_third_party/__init__.py similarity index 100% rename from third_party/yapf_diff/__init__.py rename to third_party/yapf_third_party/__init__.py diff --git a/third_party/yapf_third_party/_ylib2to3/Grammar.txt b/third_party/yapf_third_party/_ylib2to3/Grammar.txt new file mode 100644 index 000000000..bd8a452a3 --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/Grammar.txt @@ -0,0 +1,252 @@ +# Grammar for 2to3. This grammar supports Python 2.x and 3.x. + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# file_input is a module or sequence of commands read from an input file; +# single_input is a single interactive statement; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +file_input: (NEWLINE | stmt)* ENDMARKER +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' namedexpr_test NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) +async_funcdef: ASYNC funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite +parameters: '(' [typedargslist] ')' + +# The following definition for typedarglist is equivalent to this set of rules: +# +# arguments = argument (',' argument)* +# argument = tfpdef ['=' test] +# kwargs = '**' tname [','] +# args = '*' [tname_star] +# kwonly_kwargs = (',' argument)* [',' [kwargs]] +# args_kwonly_kwargs = args kwonly_kwargs | kwargs +# poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]] +# typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs +# typedarglist = arguments ',' '/' [',' [typedargslist_no_posonly]])|(typedargslist_no_posonly)" +# +# It needs to be fully expanded to allow our LL(1) parser to work on it. + +typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [ + ',' [((tfpdef ['=' test] ',')* ('*' [tname_star] (',' tname ['=' test])* + [',' ['**' tname [',']]] | '**' tname [',']) + | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])] + ] | ((tfpdef ['=' test] ',')* ('*' [tname_star] (',' tname ['=' test])* + [',' ['**' tname [',']]] | '**' tname [',']) + | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) + +tname: NAME [':' test] +tname_star: NAME [':' (test|star_expr)] +tfpdef: tname | '(' tfplist ')' +tfplist: tfpdef (',' tfpdef)* [','] + +# The following definition for varargslist is equivalent to this set of rules: +# +# arguments = argument (',' argument )* +# argument = vfpdef ['=' test] +# kwargs = '**' vname [','] +# args = '*' [vname] +# kwonly_kwargs = (',' argument )* [',' [kwargs]] +# args_kwonly_kwargs = args kwonly_kwargs | kwargs +# poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]] +# vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs +# varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] | (vararglist_no_posonly) +# +# It needs to be fully expanded to allow our LL(1) parser to work on it. + +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ + ((vfpdef ['=' test] ',')* ('*' [vname] (',' vname ['=' test])* + [',' ['**' vname [',']]] | '**' vname [',']) + | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) + ]] | ((vfpdef ['=' test] ',')* + ('*' [vname] (',' vname ['=' test])* [',' ['**' vname [',']]]| '**' vname [',']) + | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) + +vname: NAME +vfpdef: vname | '(' vfplist ')' +vfplist: vfpdef (',' vfpdef)* [','] + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: ('global' | 'nonlocal') NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt +async_stmt: ASYNC (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist_star_expr ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' asexpr_test (',' asexpr_test)* ':' suite + +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' ['*'] [test [(',' | 'as') test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +namedexpr_test: asexpr_test [':=' asexpr_test] + +# This is actually not a real rule, though since the parser is very +# limited in terms of the strategy about match/case rules, we are inserting +# a virtual case ( as ) as a valid expression. Unless a better +# approach is thought, the only side effect of this seem to be just allowing +# more stuff to be parser (which would fail on the ast). +asexpr_test: test ['as' test] + +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: [AWAIT] atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_gexp] ')' | + '[' [listmaker] ']' | + '{' [dictsetmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | STRING+ | '.' '.' '.') +listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] ) +testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: (subscript|star_expr) (',' (subscript|star_expr))* [','] +subscript: test [':=' test] | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictsetmaker: ( ((test ':' asexpr_test | '**' expr) + (comp_for | (',' (test ':' asexpr_test | '**' expr))* [','])) | + ((test [':=' test] | star_expr) + (comp_for | (',' (test [':=' test] | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test [comp_for] | + test 'as' test | + test '=' asexpr_test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +comp_for: [ASYNC] 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' old_test [comp_iter] + +# As noted above, testlist_safe extends the syntax allowed in list +# comprehensions and generators. We can't use it indiscriminately in all +# derivations using a comp_for-like pattern because the testlist_safe derivation +# contains comma which clashes with trailing comma in arglist. +# +# This was an issue because the parser would not follow the correct derivation +# when parsing syntactically valid Python code. Since testlist_safe was created +# specifically to handle list comprehensions and generator expressions enclosed +# with parentheses, it's safe to only use it in those. That avoids the issue; we +# can parse code like set(x for x in [],). +# +# The syntax supported by this set of rules is not a valid Python 3 syntax, +# hence the prefix "old". +# +# See https://bugs.python.org/issue27494 +old_comp_iter: old_comp_for | old_comp_if +old_comp_for: [ASYNC] 'for' exprlist 'in' testlist_safe [old_comp_iter] +old_comp_if: 'if' old_test [old_comp_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr + + +# 3.10 match statement definition + +# PS: normally the grammar is much much more restricted, but +# at this moment for not trying to bother much with encoding the +# exact same DSL in a LL(1) parser, we will just accept an expression +# and let the ast.parse() step of the safe mode to reject invalid +# grammar. + +# The reason why it is more restricted is that, patterns are some +# sort of a DSL (more advanced than our LHS on assignments, but +# still in a very limited python subset). They are not really +# expressions, but who cares. If we can parse them, that is enough +# to reformat them. + +match_stmt: "match" subject_expr ':' NEWLINE INDENT case_block+ DEDENT + +# This is more permissive than the actual version. For example it +# accepts `match *something:`, even though single-item starred expressions +# are forbidden. +subject_expr: (namedexpr_test|star_expr) (',' (namedexpr_test|star_expr))* [','] + +# cases +case_block: "case" patterns [guard] ':' suite +guard: 'if' namedexpr_test +patterns: pattern (',' pattern)* [','] +pattern: (expr|star_expr) ['as' expr] diff --git a/third_party/yapf_third_party/_ylib2to3/LICENSE b/third_party/yapf_third_party/_ylib2to3/LICENSE new file mode 100644 index 000000000..ef8df0698 --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/LICENSE @@ -0,0 +1,254 @@ +A. HISTORY OF THE SOFTWARE +========================== + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see https://www.cwi.nl) in the Netherlands +as a successor of a language called ABC. Guido remains Python's +principal author, although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for +National Research Initiatives (CNRI, see https://www.cnri.reston.va.us) +in Reston, Virginia where he released several versions of the +software. + +In May 2000, Guido and the Python core development team moved to +BeOpen.com to form the BeOpen PythonLabs team. In October of the same +year, the PythonLabs team moved to Digital Creations, which became +Zope Corporation. In 2001, the Python Software Foundation (PSF, see +https://www.python.org/psf/) was formed, a non-profit organization +created specifically to own Python-related Intellectual Property. +Zope Corporation was a sponsoring member of the PSF. + +All Python releases are Open Source (see https://opensource.org for +the Open Source Definition). Historically, most, but not all, Python +releases have also been GPL-compatible; the table below summarizes +the various releases. + + Release Derived Year Owner GPL- + from compatible? (1) + + 0.9.0 thru 1.2 1991-1995 CWI yes + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes + 1.6 1.5.2 2000 CNRI no + 2.0 1.6 2000 BeOpen.com no + 1.6.1 1.6 2001 CNRI yes (2) + 2.1 2.0+1.6.1 2001 PSF no + 2.0.1 2.0+1.6.1 2001 PSF yes + 2.1.1 2.1+2.0.1 2001 PSF yes + 2.1.2 2.1.1 2002 PSF yes + 2.1.3 2.1.2 2002 PSF yes + 2.2 and above 2.1.1 2001-now PSF yes + +Footnotes: + +(1) GPL-compatible doesn't mean that we're distributing Python under + the GPL. All Python licenses, unlike the GPL, let you distribute + a modified version without making your changes open source. The + GPL-compatible licenses make it possible to combine Python with + other software that is released under the GPL; the others don't. + +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, + because its license has a choice of law clause. According to + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 + is "not incompatible" with the GPL. + +Thanks to the many outside volunteers who have worked under Guido's +direction to make these releases possible. + + +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON +=============================================================== + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 Python Software Foundation; All +Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the Internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the Internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/third_party/yapf_third_party/_ylib2to3/PatternGrammar.txt b/third_party/yapf_third_party/_ylib2to3/PatternGrammar.txt new file mode 100644 index 000000000..36bf81482 --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/PatternGrammar.txt @@ -0,0 +1,28 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# A grammar to describe tree matching patterns. +# Not shown here: +# - 'TOKEN' stands for any token (leaf node) +# - 'any' stands for any node (leaf or interior) +# With 'any' we can still specify the sub-structure. + +# The start symbol is 'Matcher'. + +Matcher: Alternatives ENDMARKER + +Alternatives: Alternative ('|' Alternative)* + +Alternative: (Unit | NegatedUnit)+ + +Unit: [NAME '='] ( STRING [Repeater] + | NAME [Details] [Repeater] + | '(' Alternatives ')' [Repeater] + | '[' Alternatives ']' + ) + +NegatedUnit: 'not' (STRING | NAME [Details] | '(' Alternatives ')') + +Repeater: '*' | '+' | '{' NUMBER [',' NUMBER] '}' + +Details: '<' Alternatives '>' diff --git a/third_party/yapf_third_party/_ylib2to3/README.rst b/third_party/yapf_third_party/_ylib2to3/README.rst new file mode 100644 index 000000000..21d69b89a --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/README.rst @@ -0,0 +1,9 @@ +A fork of python's lib2to3 with select features backported from black's blib2to3. + +Reasons for forking: + +- black's fork of lib2to3 already considers newer features like Structured Pattern matching +- lib2to3 itself is deprecated and no longer getting support + +Maintenance moving forward: +- Most changes moving forward should only have to be done to the grammar files in this project. diff --git a/third_party/yapf_third_party/_ylib2to3/__init__.py b/third_party/yapf_third_party/_ylib2to3/__init__.py new file mode 100644 index 000000000..1de94366c --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/__init__.py @@ -0,0 +1 @@ +"""fork of python's lib2to3 with some backports from black's blib2to3""" diff --git a/third_party/yapf_third_party/_ylib2to3/fixer_util.py b/third_party/yapf_third_party/_ylib2to3/fixer_util.py new file mode 100644 index 000000000..c8ff3ac9b --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/fixer_util.py @@ -0,0 +1,491 @@ +"""Utility functions, node construction macros, etc.""" +# Author: Collin Winter + +from . import patcomp +# Local imports +from .pgen2 import token +from .pygram import python_symbols as syms +from .pytree import Leaf +from .pytree import Node + +########################################################### +# Common node-construction "macros" +########################################################### + + +def KeywordArg(keyword, value): + return Node(syms.argument, [keyword, Leaf(token.EQUAL, '='), value]) + + +def LParen(): + return Leaf(token.LPAR, '(') + + +def RParen(): + return Leaf(token.RPAR, ')') + + +def Assign(target, source): + """Build an assignment statement""" + if not isinstance(target, list): + target = [target] + if not isinstance(source, list): + source.prefix = ' ' + source = [source] + + return Node(syms.atom, target + [Leaf(token.EQUAL, '=', prefix=' ')] + source) + + +def Name(name, prefix=None): + """Return a NAME leaf""" + return Leaf(token.NAME, name, prefix=prefix) + + +def Attr(obj, attr): + """A node tuple for obj.attr""" + return [obj, Node(syms.trailer, [Dot(), attr])] + + +def Comma(): + """A comma leaf""" + return Leaf(token.COMMA, ',') + + +def Dot(): + """A period (.) leaf""" + return Leaf(token.DOT, '.') + + +def ArgList(args, lparen=LParen(), rparen=RParen()): + """A parenthesised argument list, used by Call()""" + node = Node(syms.trailer, [lparen.clone(), rparen.clone()]) + if args: + node.insert_child(1, Node(syms.arglist, args)) + return node + + +def Call(func_name, args=None, prefix=None): + """A function call""" + node = Node(syms.power, [func_name, ArgList(args)]) + if prefix is not None: + node.prefix = prefix + return node + + +def Newline(): + """A newline literal""" + return Leaf(token.NEWLINE, '\n') + + +def BlankLine(): + """A blank line""" + return Leaf(token.NEWLINE, '') + + +def Number(n, prefix=None): + return Leaf(token.NUMBER, n, prefix=prefix) + + +def Subscript(index_node): + """A numeric or string subscript""" + return Node(syms.trailer, + [Leaf(token.LBRACE, '['), index_node, + Leaf(token.RBRACE, ']')]) + + +def String(string, prefix=None): + """A string leaf""" + return Leaf(token.STRING, string, prefix=prefix) + + +def ListComp(xp, fp, it, test=None): + """A list comprehension of the form [xp for fp in it if test]. + + If test is None, the "if test" part is omitted. + """ + xp.prefix = '' + fp.prefix = ' ' + it.prefix = ' ' + for_leaf = Leaf(token.NAME, 'for') + for_leaf.prefix = ' ' + in_leaf = Leaf(token.NAME, 'in') + in_leaf.prefix = ' ' + inner_args = [for_leaf, fp, in_leaf, it] + if test: + test.prefix = ' ' + if_leaf = Leaf(token.NAME, 'if') + if_leaf.prefix = ' ' + inner_args.append(Node(syms.comp_if, [if_leaf, test])) + inner = Node(syms.listmaker, [xp, Node(syms.comp_for, inner_args)]) + return Node(syms.atom, + [Leaf(token.LBRACE, '['), inner, + Leaf(token.RBRACE, ']')]) + + +def FromImport(package_name, name_leafs): + """ Return an import statement in the form: + from package import name_leafs""" + # XXX: May not handle dotted imports properly (eg, package_name='foo.bar') + # #assert package_name == '.' or '.' not in package_name, "FromImport has "\ + # "not been tested with dotted package names -- use at your own "\ + # "peril!" + + for leaf in name_leafs: + # Pull the leaves out of their old tree + leaf.remove() + + children = [ + Leaf(token.NAME, 'from'), + Leaf(token.NAME, package_name, prefix=' '), + Leaf(token.NAME, 'import', prefix=' '), + Node(syms.import_as_names, name_leafs) + ] + imp = Node(syms.import_from, children) + return imp + + +def ImportAndCall(node, results, names): + """Returns an import statement and calls a method + of the module: + + import module + module.name()""" + obj = results['obj'].clone() + if obj.type == syms.arglist: + newarglist = obj.clone() + else: + newarglist = Node(syms.arglist, [obj.clone()]) + after = results['after'] + if after: + after = [n.clone() for n in after] + new = Node( + syms.power, + Attr(Name(names[0]), Name(names[1])) + [ + Node(syms.trailer, + [results['lpar'].clone(), newarglist, results['rpar'].clone()]) + ] + after) + new.prefix = node.prefix + return new + + +########################################################### +# Determine whether a node represents a given literal +########################################################### + + +def is_tuple(node): + """Does the node represent a tuple literal?""" + if isinstance(node, Node) and node.children == [LParen(), RParen()]: + return True + return (isinstance(node, Node) and len(node.children) == 3 and + isinstance(node.children[0], Leaf) and + isinstance(node.children[1], Node) and + isinstance(node.children[2], Leaf) and + node.children[0].value == '(' and node.children[2].value == ')') + + +def is_list(node): + """Does the node represent a list literal?""" + return (isinstance(node, Node) and len(node.children) > 1 and + isinstance(node.children[0], Leaf) and + isinstance(node.children[-1], Leaf) and + node.children[0].value == '[' and node.children[-1].value == ']') + + +########################################################### +# Misc +########################################################### + + +def parenthesize(node): + return Node(syms.atom, [LParen(), node, RParen()]) + + +consuming_calls = { + 'sorted', 'list', 'set', 'any', 'all', 'tuple', 'sum', 'min', 'max', + 'enumerate' +} + + +def attr_chain(obj, attr): + """Follow an attribute chain. + + If you have a chain of objects where a.foo -> b, b.foo-> c, etc, + use this to iterate over all objects in the chain. Iteration is + terminated by getattr(x, attr) is None. + + Args: + obj: the starting object + attr: the name of the chaining attribute + + Yields: + Each successive object in the chain. + """ + next = getattr(obj, attr) + while next: + yield next + next = getattr(next, attr) + + +p0 = """for_stmt< 'for' any 'in' node=any ':' any* > + | comp_for< 'for' any 'in' node=any any* > + """ +p1 = """ +power< + ( 'iter' | 'list' | 'tuple' | 'sorted' | 'set' | 'sum' | + 'any' | 'all' | 'enumerate' | (any* trailer< '.' 'join' >) ) + trailer< '(' node=any ')' > + any* +> +""" +p2 = """ +power< + ( 'sorted' | 'enumerate' ) + trailer< '(' arglist ')' > + any* +> +""" +pats_built = False + + +def in_special_context(node): + """ Returns true if node is in an environment where all that is required + of it is being iterable (ie, it doesn't matter if it returns a list + or an iterator). + See test_map_nochange in test_fixers.py for some examples and tests. + """ + global p0, p1, p2, pats_built + if not pats_built: + p0 = patcomp.compile_pattern(p0) + p1 = patcomp.compile_pattern(p1) + p2 = patcomp.compile_pattern(p2) + pats_built = True + patterns = [p0, p1, p2] + for pattern, parent in zip(patterns, attr_chain(node, 'parent')): + results = {} + if pattern.match(parent, results) and results['node'] is node: + return True + return False + + +def is_probably_builtin(node): + """ + Check that something isn't an attribute or function name etc. + """ + prev = node.prev_sibling + if prev is not None and prev.type == token.DOT: + # Attribute lookup. + return False + parent = node.parent + if parent.type in (syms.funcdef, syms.classdef): + return False + if parent.type == syms.expr_stmt and parent.children[0] is node: + # Assignment. + return False + if parent.type == syms.parameters or (parent.type == syms.typedargslist and ( + (prev is not None and prev.type == token.COMMA) or + parent.children[0] is node)): + # The name of an argument. + return False + return True + + +def find_indentation(node): + """Find the indentation of *node*.""" + while node is not None: + if node.type == syms.suite and len(node.children) > 2: + indent = node.children[1] + if indent.type == token.INDENT: + return indent.value + node = node.parent + return '' + + +########################################################### +# The following functions are to find bindings in a suite +########################################################### + + +def make_suite(node): + if node.type == syms.suite: + return node + node = node.clone() + parent, node.parent = node.parent, None + suite = Node(syms.suite, [node]) + suite.parent = parent + return suite + + +def find_root(node): + """Find the top level namespace.""" + # Scamper up to the top level namespace + while node.type != syms.file_input: + node = node.parent + if not node: + raise ValueError('root found before file_input node was found.') + return node + + +def does_tree_import(package, name, node): + """ Returns true if name is imported from package at the + top level of the tree which node belongs to. + To cover the case of an import like 'import foo', use + None for the package and 'foo' for the name. """ + binding = find_binding(name, find_root(node), package) + return bool(binding) + + +def is_import(node): + """Returns true if the node is an import statement.""" + return node.type in (syms.import_name, syms.import_from) + + +def touch_import(package, name, node): + """ Works like `does_tree_import` but adds an import statement + if it was not imported. """ + + def is_import_stmt(node): + return (node.type == syms.simple_stmt and node.children and + is_import(node.children[0])) + + root = find_root(node) + + if does_tree_import(package, name, root): + return + + # figure out where to insert the new import. First try to find + # the first import and then skip to the last one. + insert_pos = offset = 0 + for idx, node in enumerate(root.children): + if not is_import_stmt(node): + continue + for offset, node2 in enumerate(root.children[idx:]): + if not is_import_stmt(node2): + break + insert_pos = idx + offset + break + + # if there are no imports where we can insert, find the docstring. + # if that also fails, we stick to the beginning of the file + if insert_pos == 0: + for idx, node in enumerate(root.children): + if (node.type == syms.simple_stmt and node.children and + node.children[0].type == token.STRING): + insert_pos = idx + 1 + break + + if package is None: + import_ = Node( + syms.import_name, + [Leaf(token.NAME, 'import'), + Leaf(token.NAME, name, prefix=' ')]) + else: + import_ = FromImport(package, [Leaf(token.NAME, name, prefix=' ')]) + + children = [import_, Newline()] + root.insert_child(insert_pos, Node(syms.simple_stmt, children)) + + +_def_syms = {syms.classdef, syms.funcdef} + + +def find_binding(name, node, package=None): + """ Returns the node which binds variable name, otherwise None. + If optional argument package is supplied, only imports will + be returned. + See test cases for examples.""" + for child in node.children: + ret = None + if child.type == syms.for_stmt: + if _find(name, child.children[1]): + return child + n = find_binding(name, make_suite(child.children[-1]), package) + if n: + ret = n + elif child.type in (syms.if_stmt, syms.while_stmt): + n = find_binding(name, make_suite(child.children[-1]), package) + if n: + ret = n + elif child.type == syms.try_stmt: + n = find_binding(name, make_suite(child.children[2]), package) + if n: + ret = n + else: + for i, kid in enumerate(child.children[3:]): + if kid.type == token.COLON and kid.value == ':': + # i+3 is the colon, i+4 is the suite + n = find_binding(name, make_suite(child.children[i + 4]), package) + if n: + ret = n + elif child.type in _def_syms and child.children[1].value == name: + ret = child + elif _is_import_binding(child, name, package): + ret = child + elif child.type == syms.simple_stmt: + ret = find_binding(name, child, package) + elif child.type == syms.expr_stmt: + if _find(name, child.children[0]): + ret = child + + if ret: + if not package: + return ret + if is_import(ret): + return ret + return None + + +_block_syms = {syms.funcdef, syms.classdef, syms.trailer} + + +def _find(name, node): + nodes = [node] + while nodes: + node = nodes.pop() + if node.type > 256 and node.type not in _block_syms: + nodes.extend(node.children) + elif node.type == token.NAME and node.value == name: + return node + return None + + +def _is_import_binding(node, name, package=None): + """ Will return node if node will import name, or node + will import * from package. None is returned otherwise. + See test cases for examples. """ + + if node.type == syms.import_name and not package: + imp = node.children[1] + if imp.type == syms.dotted_as_names: + for child in imp.children: + if child.type == syms.dotted_as_name: + if child.children[2].value == name: + return node + elif child.type == token.NAME and child.value == name: + return node + elif imp.type == syms.dotted_as_name: + last = imp.children[-1] + if last.type == token.NAME and last.value == name: + return node + elif imp.type == token.NAME and imp.value == name: + return node + elif node.type == syms.import_from: + # str(...) is used to make life easier here, because + # from a.b import parses to ['import', ['a', '.', 'b'], ...] + if package and str(node.children[1]).strip() != package: + return None + n = node.children[3] + if package and _find('as', n): + # See test_from_import_as for explanation + return None + elif n.type == syms.import_as_names and _find(name, n): + return node + elif n.type == syms.import_as_name: + child = n.children[2] + if child.type == token.NAME and child.value == name: + return node + elif n.type == token.NAME and n.value == name: + return node + elif package and n.type == token.STAR: + return node + return None diff --git a/third_party/yapf_third_party/_ylib2to3/patcomp.py b/third_party/yapf_third_party/_ylib2to3/patcomp.py new file mode 100644 index 000000000..20b7893d3 --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/patcomp.py @@ -0,0 +1,209 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +"""Pattern compiler. + +The grammar is taken from PatternGrammar.txt. + +The compiler compiles a pattern to a pytree.*Pattern instance. +""" + +__author__ = 'Guido van Rossum ' + +# Python imports +import io + +# Really local imports +from . import pygram +from . import pytree +# Fairly local imports +from .pgen2 import driver +from .pgen2 import grammar +from .pgen2 import literals +from .pgen2 import parse +from .pgen2 import token +from .pgen2 import tokenize + + +class PatternSyntaxError(Exception): + pass + + +def tokenize_wrapper(input): + """Tokenizes a string suppressing significant whitespace.""" + skip = {token.NEWLINE, token.INDENT, token.DEDENT} + tokens = tokenize.generate_tokens(io.StringIO(input).readline) + for quintuple in tokens: + type, value, start, end, line_text = quintuple + if type not in skip: + yield quintuple + + +class PatternCompiler(object): + + def __init__(self, grammar_file=None): + """Initializer. + + Takes an optional alternative filename for the pattern grammar. + """ + if grammar_file is None: + self.grammar = pygram.pattern_grammar + self.syms = pygram.pattern_symbols + else: + self.grammar = driver.load_grammar(grammar_file) + self.syms = pygram.Symbols(self.grammar) + self.pygrammar = pygram.python_grammar + self.pysyms = pygram.python_symbols + self.driver = driver.Driver(self.grammar, convert=pattern_convert) + + def compile_pattern(self, input, debug=False, with_tree=False): + """Compiles a pattern string to a nested pytree.*Pattern object.""" + tokens = tokenize_wrapper(input) + try: + root = self.driver.parse_tokens(tokens, debug=debug) + except parse.ParseError as e: + raise PatternSyntaxError(str(e)) from None + if with_tree: + return self.compile_node(root), root + else: + return self.compile_node(root) + + def compile_node(self, node): + """Compiles a node, recursively. + + This is one big switch on the node type. + """ + # XXX Optimize certain Wildcard-containing-Wildcard patterns + # that can be merged + if node.type == self.syms.Matcher: + node = node.children[0] # Avoid unneeded recursion + + if node.type == self.syms.Alternatives: + # Skip the odd children since they are just '|' tokens + alts = [self.compile_node(ch) for ch in node.children[::2]] + if len(alts) == 1: + return alts[0] + p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) + return p.optimize() + + if node.type == self.syms.Alternative: + units = [self.compile_node(ch) for ch in node.children] + if len(units) == 1: + return units[0] + p = pytree.WildcardPattern([units], min=1, max=1) + return p.optimize() + + if node.type == self.syms.NegatedUnit: + pattern = self.compile_basic(node.children[1:]) + p = pytree.NegatedPattern(pattern) + return p.optimize() + + assert node.type == self.syms.Unit + + name = None + nodes = node.children + if len(nodes) >= 3 and nodes[1].type == token.EQUAL: + name = nodes[0].value + nodes = nodes[2:] + repeat = None + if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: + repeat = nodes[-1] + nodes = nodes[:-1] + + # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] + pattern = self.compile_basic(nodes, repeat) + + if repeat is not None: + assert repeat.type == self.syms.Repeater + children = repeat.children + child = children[0] + if child.type == token.STAR: + min = 0 + max = pytree.HUGE + elif child.type == token.PLUS: + min = 1 + max = pytree.HUGE + elif child.type == token.LBRACE: + assert children[-1].type == token.RBRACE + assert len(children) in (3, 5) + min = max = self.get_int(children[1]) + if len(children) == 5: + max = self.get_int(children[3]) + else: + assert False + if min != 1 or max != 1: + pattern = pattern.optimize() + pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) + + if name is not None: + pattern.name = name + return pattern.optimize() + + def compile_basic(self, nodes, repeat=None): + # Compile STRING | NAME [Details] | (...) | [...] + assert len(nodes) >= 1 + node = nodes[0] + if node.type == token.STRING: + value = str(literals.evalString(node.value)) + return pytree.LeafPattern(_type_of_literal(value), value) + elif node.type == token.NAME: + value = node.value + if value.isupper(): + if value not in TOKEN_MAP: + raise PatternSyntaxError('Invalid token: %r' % value) + if nodes[1:]: + raise PatternSyntaxError("Can't have details for token") + return pytree.LeafPattern(TOKEN_MAP[value]) + else: + if value == 'any': + type = None + elif not value.startswith('_'): + type = getattr(self.pysyms, value, None) + if type is None: + raise PatternSyntaxError('Invalid symbol: %r' % value) + if nodes[1:]: # Details present + content = [self.compile_node(nodes[1].children[1])] + else: + content = None + return pytree.NodePattern(type, content) + elif node.value == '(': + return self.compile_node(nodes[1]) + elif node.value == '[': + assert repeat is None + subpattern = self.compile_node(nodes[1]) + return pytree.WildcardPattern([[subpattern]], min=0, max=1) + assert False, node + + def get_int(self, node): + assert node.type == token.NUMBER + return int(node.value) + + +# Map named tokens to the type value for a LeafPattern +TOKEN_MAP = { + 'NAME': token.NAME, + 'STRING': token.STRING, + 'NUMBER': token.NUMBER, + 'TOKEN': None +} + + +def _type_of_literal(value): + if value[0].isalpha(): + return token.NAME + elif value in grammar.opmap: + return grammar.opmap[value] + else: + return None + + +def pattern_convert(grammar, raw_node_info): + """Converts raw node information to a Node or Leaf instance.""" + type, value, context, children = raw_node_info + if children or type in grammar.number2symbol: + return pytree.Node(type, children, context=context) + else: + return pytree.Leaf(type, value, context=context) + + +def compile_pattern(pattern): + return PatternCompiler().compile_pattern(pattern) diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/__init__.py b/third_party/yapf_third_party/_ylib2to3/pgen2/__init__.py new file mode 100644 index 000000000..7c8380a16 --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pgen2/__init__.py @@ -0,0 +1,3 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +"""The pgen2 package.""" diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/conv.py b/third_party/yapf_third_party/_ylib2to3/pgen2/conv.py new file mode 100644 index 000000000..a446771b8 --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pgen2/conv.py @@ -0,0 +1,254 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +"""Convert graminit.[ch] spit out by pgen to Python code. + +Pgen is the Python parser generator. It is useful to quickly create a +parser from a grammar file in Python's grammar notation. But I don't +want my parsers to be written in C (yet), so I'm translating the +parsing tables to Python data structures and writing a Python parse +engine. + +Note that the token numbers are constants determined by the standard +Python tokenizer. The standard token module defines these numbers and +their names (the names are not used much). The token numbers are +hardcoded into the Python tokenizer and into pgen. A Python +implementation of the Python tokenizer is also available, in the +standard tokenize module. + +On the other hand, symbol numbers (representing the grammar's +non-terminals) are assigned by pgen based on the actual grammar +input. + +Note: this module is pretty much obsolete; the pgen module generates +equivalent grammar tables directly from the Grammar.txt input file +without having to invoke the Python pgen C program. + +""" + +# Python imports +import re + +# Local imports +from pgen2 import grammar +from pgen2 import token + + +class Converter(grammar.Grammar): + """Grammar subclass that reads classic pgen output files. + + The run() method reads the tables as produced by the pgen parser + generator, typically contained in two C files, graminit.h and + graminit.c. The other methods are for internal use only. + + See the base class for more documentation. + + """ + + def run(self, graminit_h, graminit_c): + """Load the grammar tables from the text files written by pgen.""" + self.parse_graminit_h(graminit_h) + self.parse_graminit_c(graminit_c) + self.finish_off() + + def parse_graminit_h(self, filename): + """Parse the .h file written by pgen. (Internal) + + This file is a sequence of #define statements defining the + nonterminals of the grammar as numbers. We build two tables + mapping the numbers to names and back. + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + self.symbol2number = {} + self.number2symbol = {} + lineno = 0 + for line in f: + lineno += 1 + mo = re.match(r'^#define\s+(\w+)\s+(\d+)$', line) + if not mo and line.strip(): + print("%s(%s): can't parse %s" % (filename, lineno, line.strip())) + else: + symbol, number = mo.groups() + number = int(number) + assert symbol not in self.symbol2number + assert number not in self.number2symbol + self.symbol2number[symbol] = number + self.number2symbol[number] = symbol + return True + + def parse_graminit_c(self, filename): + """Parse the .c file written by pgen. (Internal) + + The file looks as follows. The first two lines are always this: + + #include "pgenheaders.h" + #include "grammar.h" + + After that come four blocks: + + 1) one or more state definitions + 2) a table defining dfas + 3) a table defining labels + 4) a struct defining the grammar + + A state definition has the following form: + - one or more arc arrays, each of the form: + static arc arcs__[] = { + {, }, + ... + }; + - followed by a state array, of the form: + static state states_[] = { + {, arcs__}, + ... + }; + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + # The code below essentially uses f's iterator-ness! + lineno = 0 + + # Expect the two #include lines + lineno, line = lineno + 1, next(f) + assert line == '#include "pgenheaders.h"\n', (lineno, line) + lineno, line = lineno + 1, next(f) + assert line == '#include "grammar.h"\n', (lineno, line) + + # Parse the state definitions + lineno, line = lineno + 1, next(f) + allarcs = {} + states = [] + while line.startswith('static arc '): + while line.startswith('static arc '): + mo = re.match(r'static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$', line) + assert mo, (lineno, line) + n, m, k = list(map(int, mo.groups())) + arcs = [] + for _ in range(k): + lineno, line = lineno + 1, next(f) + mo = re.match(r'\s+{(\d+), (\d+)},$', line) + assert mo, (lineno, line) + i, j = list(map(int, mo.groups())) + arcs.append((i, j)) + lineno, line = lineno + 1, next(f) + assert line == '};\n', (lineno, line) + allarcs[(n, m)] = arcs + lineno, line = lineno + 1, next(f) + mo = re.match(r'static state states_(\d+)\[(\d+)\] = {$', line) + assert mo, (lineno, line) + s, t = list(map(int, mo.groups())) + assert s == len(states), (lineno, line) + state = [] + for _ in range(t): + lineno, line = lineno + 1, next(f) + mo = re.match(r'\s+{(\d+), arcs_(\d+)_(\d+)},$', line) + assert mo, (lineno, line) + k, n, m = list(map(int, mo.groups())) + arcs = allarcs[n, m] + assert k == len(arcs), (lineno, line) + state.append(arcs) + states.append(state) + lineno, line = lineno + 1, next(f) + assert line == '};\n', (lineno, line) + lineno, line = lineno + 1, next(f) + self.states = states + + # Parse the dfas + dfas = {} + mo = re.match(r'static dfa dfas\[(\d+)\] = {$', line) + assert mo, (lineno, line) + ndfas = int(mo.group(1)) + for i in range(ndfas): + lineno, line = lineno + 1, next(f) + mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', line) + assert mo, (lineno, line) + symbol = mo.group(2) + number, x, y, z = list(map(int, mo.group(1, 3, 4, 5))) + assert self.symbol2number[symbol] == number, (lineno, line) + assert self.number2symbol[number] == symbol, (lineno, line) + assert x == 0, (lineno, line) + state = states[z] + assert y == len(state), (lineno, line) + lineno, line = lineno + 1, next(f) + mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line) + assert mo, (lineno, line) + first = {} + rawbitset = eval(mo.group(1)) + for i, c in enumerate(rawbitset): + byte = ord(c) + for j in range(8): + if byte & (1 << j): + first[i * 8 + j] = 1 + dfas[number] = (state, first) + lineno, line = lineno + 1, next(f) + assert line == '};\n', (lineno, line) + self.dfas = dfas + + # Parse the labels + labels = [] + lineno, line = lineno + 1, next(f) + mo = re.match(r'static label labels\[(\d+)\] = {$', line) + assert mo, (lineno, line) + nlabels = int(mo.group(1)) + for i in range(nlabels): + lineno, line = lineno + 1, next(f) + mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line) + assert mo, (lineno, line) + x, y = mo.groups() + x = int(x) + if y == '0': + y = None + else: + y = eval(y) + labels.append((x, y)) + lineno, line = lineno + 1, next(f) + assert line == '};\n', (lineno, line) + self.labels = labels + + # Parse the grammar struct + lineno, line = lineno + 1, next(f) + assert line == 'grammar _PyParser_Grammar = {\n', (lineno, line) + lineno, line = lineno + 1, next(f) + mo = re.match(r'\s+(\d+),$', line) + assert mo, (lineno, line) + ndfas = int(mo.group(1)) + assert ndfas == len(self.dfas) + lineno, line = lineno + 1, next(f) + assert line == '\tdfas,\n', (lineno, line) + lineno, line = lineno + 1, next(f) + mo = re.match(r'\s+{(\d+), labels},$', line) + assert mo, (lineno, line) + nlabels = int(mo.group(1)) + assert nlabels == len(self.labels), (lineno, line) + lineno, line = lineno + 1, next(f) + mo = re.match(r'\s+(\d+)$', line) + assert mo, (lineno, line) + start = int(mo.group(1)) + assert start in self.number2symbol, (lineno, line) + self.start = start + lineno, line = lineno + 1, next(f) + assert line == '};\n', (lineno, line) + try: + lineno, line = lineno + 1, next(f) + except StopIteration: + pass + else: + assert 0, (lineno, line) + + def finish_off(self): + """Create additional useful structures. (Internal).""" + self.keywords = {} # map from keyword strings to arc labels + self.tokens = {} # map from numeric token values to arc labels + for ilabel, (type, value) in enumerate(self.labels): + if type == token.NAME and value is not None: + self.keywords[value] = ilabel + elif value is None: + self.tokens[type] = ilabel diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/driver.py b/third_party/yapf_third_party/_ylib2to3/pgen2/driver.py new file mode 100644 index 000000000..9345fe5aa --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pgen2/driver.py @@ -0,0 +1,300 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +"""Parser driver. + +This provides a high-level interface to parse a file into a syntax tree. + +""" + +__author__ = 'Guido van Rossum ' + +__all__ = ['Driver', 'load_grammar'] + +import io +import logging +import os +import pkgutil +import sys +# Python imports +from configparser import ConfigParser +from contextlib import contextmanager +from dataclasses import dataclass +from dataclasses import field +from pathlib import Path +from pkgutil import get_data +from typing import Any +from typing import Iterator +from typing import List +from typing import Optional + +from importlib_metadata import metadata +from platformdirs import user_cache_dir + +# Pgen imports +from . import grammar +from . import parse +from . import pgen +from . import token +from . import tokenize + + +@dataclass +class ReleaseRange: + start: int + end: Optional[int] = None + tokens: List[Any] = field(default_factory=list) + + def lock(self) -> None: + total_eaten = len(self.tokens) + self.end = self.start + total_eaten + + +class TokenProxy: + + def __init__(self, generator: Any) -> None: + self._tokens = generator + self._counter = 0 + self._release_ranges: List[ReleaseRange] = [] + + @contextmanager + def release(self) -> Iterator['TokenProxy']: + release_range = ReleaseRange(self._counter) + self._release_ranges.append(release_range) + try: + yield self + finally: + # Lock the last release range to the final position that + # has been eaten. + release_range.lock() + + def eat(self, point: int) -> Any: + eaten_tokens = self._release_ranges[-1].tokens + if point < len(eaten_tokens): + return eaten_tokens[point] + else: + while point >= len(eaten_tokens): + token = next(self._tokens) + eaten_tokens.append(token) + return token + + def __iter__(self) -> 'TokenProxy': + return self + + def __next__(self) -> Any: + # If the current position is already compromised (looked up) + # return the eaten token, if not just go further on the given + # token producer. + for release_range in self._release_ranges: + assert release_range.end is not None + + start, end = release_range.start, release_range.end + if start <= self._counter < end: + token = release_range.tokens[self._counter - start] + break + else: + token = next(self._tokens) + self._counter += 1 + return token + + def can_advance(self, to: int) -> bool: + # Try to eat, fail if it can't. The eat operation is cached + # so there wont be any additional cost of eating here + try: + self.eat(to) + except StopIteration: + return False + else: + return True + + +class Driver(object): + + def __init__(self, grammar, convert=None, logger=None): + self.grammar = grammar + if logger is None: + logger = logging.getLogger() + self.logger = logger + self.convert = convert + + def parse_tokens(self, tokens, debug=False): + """Parse a series of tokens and return the syntax tree.""" + # XXX Move the prefix computation into a wrapper around tokenize. + p = parse.Parser(self.grammar, self.convert) + proxy = TokenProxy(tokens) + p.setup(proxy=proxy) + lineno = 1 + column = 0 + type = value = start = end = line_text = None + prefix = '' + for quintuple in proxy: + type, value, start, end, line_text = quintuple + if start != (lineno, column): + assert (lineno, column) <= start, ((lineno, column), start) + s_lineno, s_column = start + if lineno < s_lineno: + prefix += '\n' * (s_lineno - lineno) + lineno = s_lineno + column = 0 + if column < s_column: + prefix += line_text[column:s_column] + column = s_column + if type in (tokenize.COMMENT, tokenize.NL): + prefix += value + lineno, column = end + if value.endswith('\n'): + lineno += 1 + column = 0 + continue + if type == token.OP: + type = grammar.opmap[value] + if debug: + self.logger.debug('%s %r (prefix=%r)', token.tok_name[type], value, + prefix) + if p.addtoken(type, value, (prefix, start)): + if debug: + self.logger.debug('Stop.') + break + prefix = '' + lineno, column = end + if value.endswith('\n'): + lineno += 1 + column = 0 + else: + # We never broke out -- EOF is too soon (how can this happen???) + raise parse.ParseError('incomplete input', type, value, (prefix, start)) + return p.rootnode + + def parse_stream_raw(self, stream, debug=False): + """Parse a stream and return the syntax tree.""" + tokens = tokenize.generate_tokens(stream.readline) + return self.parse_tokens(tokens, debug) + + def parse_stream(self, stream, debug=False): + """Parse a stream and return the syntax tree.""" + return self.parse_stream_raw(stream, debug) + + def parse_file(self, filename, encoding=None, debug=False): + """Parse a file and return the syntax tree.""" + with io.open(filename, 'r', encoding=encoding) as stream: + return self.parse_stream(stream, debug) + + def parse_string(self, text, debug=False): + """Parse a string and return the syntax tree.""" + tokens = tokenize.generate_tokens(io.StringIO(text).readline) + return self.parse_tokens(tokens, debug) + + +def _generate_pickle_name(gt): + # type:(str) -> str + """Get the filepath to write a pickle file to + given the path of a grammar textfile. + + The returned filepath should be in a user-specific cache directory. + + Args: + gt (str): path to grammar text file + + Returns: + str: path to pickle file + """ + + grammar_textfile_name = os.path.basename(gt) + head, tail = os.path.splitext(grammar_textfile_name) + if tail == '.txt': + tail = '' + cache_dir = user_cache_dir( + appname=metadata('yapf')['Name'].upper(), + appauthor=metadata('yapf')['Author'].split(' ')[0], + version=metadata('yapf')['Version'], + ) + return cache_dir + os.sep + head + tail + '-py' + '.'.join( + map(str, sys.version_info)) + '.pickle' + + +def load_grammar(gt='Grammar.txt', + gp=None, + save=True, + force=False, + logger=None): + # type:(str, str | None, bool, bool, logging.Logger | None) -> grammar.Grammar + """Load the grammar (maybe from a pickle).""" + if logger is None: + logger = logging.getLogger() + gp = _generate_pickle_name(gt) if gp is None else gp + grammar_text = gt + try: + newer = _newer(gp, gt) + except OSError as err: + logger.debug('OSError, could not check if newer: %s', err.args) + newer = True + if not os.path.exists(gt): + # Assume package data + gt_basename = os.path.basename(gt) + pd = pkgutil.get_data('yapf_third_party._ylib2to3', gt_basename) + if pd is None: + raise RuntimeError('Failed to load grammer %s from package' % gt_basename) + grammar_text = io.StringIO(pd.decode(encoding='utf-8')) + if force or not newer: + g = pgen.generate_grammar(grammar_text) + if save: + try: + Path(gp).parent.mkdir(parents=True, exist_ok=True) + g.dump(gp) + except OSError: + # Ignore error, caching is not vital. + pass + else: + g = grammar.Grammar() + g.load(gp) + return g + + +def _newer(a, b): + """Inquire whether file a was written since file b.""" + if not os.path.exists(a): + return False + if not os.path.exists(b): + return True + return os.path.getmtime(a) >= os.path.getmtime(b) + + +def load_packaged_grammar(package, grammar_source): + """Normally, loads a pickled grammar by doing + pkgutil.get_data(package, pickled_grammar) + where *pickled_grammar* is computed from *grammar_source* by adding the + Python version and using a ``.pickle`` extension. + + However, if *grammar_source* is an extant file, load_grammar(grammar_source) + is called instead. This facilitates using a packaged grammar file when needed + but preserves load_grammar's automatic regeneration behavior when possible. + + """ # noqa: E501 + if os.path.isfile(grammar_source): + return load_grammar(grammar_source) + pickled_name = _generate_pickle_name(os.path.basename(grammar_source)) + data = pkgutil.get_data(package, pickled_name) + g = grammar.Grammar() + g.loads(data) + return g + + +def main(*args): + """Main program, when run as a script: produce grammar pickle files. + + Calls load_grammar for each argument, a path to a grammar text file. + """ + if not args: + args = sys.argv[1:] + logging.basicConfig( + level=logging.INFO, stream=sys.stdout, format='%(message)s') + for gt in args: + load_grammar(gt, save=True, force=True) + return True + + +if __name__ == '__main__': + sys.exit(int(not main())) diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/grammar.py b/third_party/yapf_third_party/_ylib2to3/pgen2/grammar.py new file mode 100644 index 000000000..0840c3c71 --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pgen2/grammar.py @@ -0,0 +1,188 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +"""This module defines the data structures used to represent a grammar. + +These are a bit arcane because they are derived from the data +structures used by Python's 'pgen' parser generator. + +There's also a table here mapping operators to their names in the +token module; the Python tokenize module reports all operators as the +fallback token code OP, but the parser needs the actual token code. + +""" + +# Python imports +import pickle + +# Local imports +from . import token + + +class Grammar(object): + """Pgen parsing tables conversion class. + + Once initialized, this class supplies the grammar tables for the + parsing engine implemented by parse.py. The parsing engine + accesses the instance variables directly. The class here does not + provide initialization of the tables; several subclasses exist to + do this (see the conv and pgen modules). + + The load() method reads the tables from a pickle file, which is + much faster than the other ways offered by subclasses. The pickle + file is written by calling dump() (after loading the grammar + tables using a subclass). The report() method prints a readable + representation of the tables to stdout, for debugging. + + The instance variables are as follows: + + symbol2number -- a dict mapping symbol names to numbers. Symbol + numbers are always 256 or higher, to distinguish + them from token numbers, which are between 0 and + 255 (inclusive). + + number2symbol -- a dict mapping numbers to symbol names; + these two are each other's inverse. + + states -- a list of DFAs, where each DFA is a list of + states, each state is a list of arcs, and each + arc is a (i, j) pair where i is a label and j is + a state number. The DFA number is the index into + this list. (This name is slightly confusing.) + Final states are represented by a special arc of + the form (0, j) where j is its own state number. + + dfas -- a dict mapping symbol numbers to (DFA, first) + pairs, where DFA is an item from the states list + above, and first is a set of tokens that can + begin this grammar rule (represented by a dict + whose values are always 1). + + labels -- a list of (x, y) pairs where x is either a token + number or a symbol number, and y is either None + or a string; the strings are keywords. The label + number is the index in this list; label numbers + are used to mark state transitions (arcs) in the + DFAs. + + start -- the number of the grammar's start symbol. + + keywords -- a dict mapping keyword strings to arc labels. + + tokens -- a dict mapping token numbers to arc labels. + + """ + + def __init__(self): + self.symbol2number = {} + self.number2symbol = {} + self.states = [] + self.dfas = {} + self.labels = [(0, 'EMPTY')] + self.keywords = {} + self.soft_keywords = {} + self.tokens = {} + self.symbol2label = {} + self.start = 256 + + def dump(self, filename): + """Dump the grammar tables to a pickle file.""" + with open(filename, 'wb') as f: + pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL) + + def load(self, filename): + """Load the grammar tables from a pickle file.""" + with open(filename, 'rb') as f: + d = pickle.load(f) + self.__dict__.update(d) + + def loads(self, pkl): + """Load the grammar tables from a pickle bytes object.""" + self.__dict__.update(pickle.loads(pkl)) + + def copy(self): + """ + Copy the grammar. + """ + new = self.__class__() + for dict_attr in ('symbol2number', 'number2symbol', 'dfas', 'keywords', + 'soft_keywords', 'tokens', 'symbol2label'): + setattr(new, dict_attr, getattr(self, dict_attr).copy()) + new.labels = self.labels[:] + new.states = self.states[:] + new.start = self.start + return new + + def report(self): + """Dump the grammar tables to standard output, for debugging.""" + from pprint import pprint + print('s2n') + pprint(self.symbol2number) + print('n2s') + pprint(self.number2symbol) + print('states') + pprint(self.states) + print('dfas') + pprint(self.dfas) + print('labels') + pprint(self.labels) + print('start', self.start) + + +# Map from operator to number (since tokenize doesn't do this) + +opmap_raw = """ +( LPAR +) RPAR +[ LSQB +] RSQB +: COLON +, COMMA +; SEMI ++ PLUS +- MINUS +* STAR +/ SLASH +| VBAR +& AMPER +< LESS +> GREATER += EQUAL +. DOT +% PERCENT +` BACKQUOTE +{ LBRACE +} RBRACE +@ AT +@= ATEQUAL +== EQEQUAL +!= NOTEQUAL +<> NOTEQUAL +<= LESSEQUAL +>= GREATEREQUAL +~ TILDE +^ CIRCUMFLEX +<< LEFTSHIFT +>> RIGHTSHIFT +** DOUBLESTAR ++= PLUSEQUAL +-= MINEQUAL +*= STAREQUAL +/= SLASHEQUAL +%= PERCENTEQUAL +&= AMPEREQUAL +|= VBAREQUAL +^= CIRCUMFLEXEQUAL +<<= LEFTSHIFTEQUAL +>>= RIGHTSHIFTEQUAL +**= DOUBLESTAREQUAL +// DOUBLESLASH +//= DOUBLESLASHEQUAL +-> RARROW +:= COLONEQUAL +""" + +opmap = {} +for line in opmap_raw.splitlines(): + if line: + op, name = line.split() + opmap[op] = getattr(token, name) diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/literals.py b/third_party/yapf_third_party/_ylib2to3/pgen2/literals.py new file mode 100644 index 000000000..62d1d2681 --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pgen2/literals.py @@ -0,0 +1,64 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +"""Safely evaluate Python string literals without using eval().""" + +import re + +simple_escapes = { + 'a': '\a', + 'b': '\b', + 'f': '\f', + 'n': '\n', + 'r': '\r', + 't': '\t', + 'v': '\v', + "'": "'", + '"': '"', + '\\': '\\' +} + + +def escape(m): + all, tail = m.group(0, 1) + assert all.startswith('\\') + esc = simple_escapes.get(tail) + if esc is not None: + return esc + if tail.startswith('x'): + hexes = tail[1:] + if len(hexes) < 2: + raise ValueError("invalid hex string escape ('\\%s')" % tail) + try: + i = int(hexes, 16) + except ValueError: + raise ValueError("invalid hex string escape ('\\%s')" % tail) from None + else: + try: + i = int(tail, 8) + except ValueError: + raise ValueError("invalid octal string escape ('\\%s')" % tail) from None + return chr(i) + + +def evalString(s): + assert s.startswith("'") or s.startswith('"'), repr(s[:1]) + q = s[0] + if s[:3] == q * 3: + q = q * 3 + assert s.endswith(q), repr(s[-len(q):]) + assert len(s) >= 2 * len(q) + s = s[len(q):-len(q)] + return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s) + + +def test(): + for i in range(256): + c = chr(i) + s = repr(c) + e = evalString(s) + if e != c: + print(i, c, s, e) + + +if __name__ == '__main__': + test() diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/parse.py b/third_party/yapf_third_party/_ylib2to3/pgen2/parse.py new file mode 100644 index 000000000..924b0e74b --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pgen2/parse.py @@ -0,0 +1,378 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +"""Parser engine for the grammar tables generated by pgen. + +The grammar table must be loaded first. + +See Parser/parser.c in the Python distribution for additional info on +how this parsing engine works. + +""" +from contextlib import contextmanager +from typing import Any +from typing import Callable +from typing import Dict +from typing import Iterator +from typing import List +from typing import Optional +from typing import Set +from typing import Text +from typing import Tuple +from typing import cast + +from ..pytree import Context +from ..pytree import RawNode +from ..pytree import convert +# Local imports +from . import grammar +from . import token +from . import tokenize + +DFA = List[List[Tuple[int, int]]] +DFAS = Tuple[DFA, Dict[int, int]] + +# A placeholder node, used when parser is backtracking. +DUMMY_NODE = (-1, None, None, None) + + +def stack_copy( + stack: List[Tuple[DFAS, int, RawNode]]) -> List[Tuple[DFAS, int, RawNode]]: + """Nodeless stack copy.""" + return [(dfa, label, DUMMY_NODE) for dfa, label, _ in stack] + + +class Recorder: + + def __init__(self, parser: 'Parser', ilabels: List[int], + context: Context) -> None: + self.parser = parser + self._ilabels = ilabels + self.context = context # not really matter + + self._dead_ilabels: Set[int] = set() + self._start_point = self.parser.stack + self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels} + + @property + def ilabels(self) -> Set[int]: + return self._dead_ilabels.symmetric_difference(self._ilabels) + + @contextmanager + def switch_to(self, ilabel: int) -> Iterator[None]: + with self.backtrack(): + self.parser.stack = self._points[ilabel] + try: + yield + except ParseError: + self._dead_ilabels.add(ilabel) + finally: + self.parser.stack = self._start_point + + @contextmanager + def backtrack(self) -> Iterator[None]: + """ + Use the node-level invariant ones for basic parsing operations (push/pop/shift). + These still will operate on the stack; but they won't create any new nodes, or + modify the contents of any other existing nodes. + This saves us a ton of time when we are backtracking, since we + want to restore to the initial state as quick as possible, which + can only be done by having as little mutatations as possible. + """ # noqa: E501 + is_backtracking = self.parser.is_backtracking + try: + self.parser.is_backtracking = True + yield + finally: + self.parser.is_backtracking = is_backtracking + + def add_token(self, tok_type: int, tok_val: Text, raw: bool = False) -> None: + func: Callable[..., Any] + if raw: + func = self.parser._addtoken + else: + func = self.parser.addtoken + + for ilabel in self.ilabels: + with self.switch_to(ilabel): + args = [tok_type, tok_val, self.context] + if raw: + args.insert(0, ilabel) + func(*args) + + def determine_route(self, + value: Text = None, + force: bool = False) -> Optional[int]: + alive_ilabels = self.ilabels + if len(alive_ilabels) == 0: + *_, most_successful_ilabel = self._dead_ilabels + raise ParseError('bad input', most_successful_ilabel, value, self.context) + + ilabel, *rest = alive_ilabels + if force or not rest: + return ilabel + else: + return None + + +class ParseError(Exception): + """Exception to signal the parser is stuck.""" + + def __init__(self, msg, type, value, context): + Exception.__init__( + self, '%s: type=%r, value=%r, context=%r' % (msg, type, value, context)) + self.msg = msg + self.type = type + self.value = value + self.context = context + + def __reduce__(self): + return type(self), (self.msg, self.type, self.value, self.context) + + +class Parser(object): + """Parser engine. + + The proper usage sequence is: + + p = Parser(grammar, [converter]) # create instance + p.setup([start]) # prepare for parsing + : + if p.addtoken(...): # parse a token; may raise ParseError + break + root = p.rootnode # root of abstract syntax tree + + A Parser instance may be reused by calling setup() repeatedly. + + A Parser instance contains state pertaining to the current token + sequence, and should not be used concurrently by different threads + to parse separate token sequences. + + See driver.py for how to get input tokens by tokenizing a file or + string. + + Parsing is complete when addtoken() returns True; the root of the + abstract syntax tree can then be retrieved from the rootnode + instance variable. When a syntax error occurs, addtoken() raises + the ParseError exception. There is no error recovery; the parser + cannot be used after a syntax error was reported (but it can be + reinitialized by calling setup()). + + """ + + def __init__(self, grammar, convert=None): + """Constructor. + + The grammar argument is a grammar.Grammar instance; see the + grammar module for more information. + + The parser is not ready yet for parsing; you must call the + setup() method to get it started. + + The optional convert argument is a function mapping concrete + syntax tree nodes to abstract syntax tree nodes. If not + given, no conversion is done and the syntax tree produced is + the concrete syntax tree. If given, it must be a function of + two arguments, the first being the grammar (a grammar.Grammar + instance), and the second being the concrete syntax tree node + to be converted. The syntax tree is converted from the bottom + up. + + A concrete syntax tree node is a (type, value, context, nodes) + tuple, where type is the node type (a token or symbol number), + value is None for symbols and a string for tokens, context is + None or an opaque value used for error reporting (typically a + (lineno, offset) pair), and nodes is a list of children for + symbols, and None for tokens. + + An abstract syntax tree node may be anything; this is entirely + up to the converter function. + + """ + self.grammar = grammar + self.convert = convert or (lambda grammar, node: node) + self.is_backtracking = False + + def setup(self, proxy, start=None): + """Prepare for parsing. + + This *must* be called before starting to parse. + + The optional argument is an alternative start symbol; it + defaults to the grammar's start symbol. + + You can use a Parser instance to parse any number of programs; + each time you call setup() the parser is reset to an initial + state determined by the (implicit or explicit) start symbol. + + """ + if start is None: + start = self.grammar.start + # Each stack entry is a tuple: (dfa, state, node). + # A node is a tuple: (type, value, context, children), + # where children is a list of nodes or None, and context may be None. + newnode = (start, None, None, []) + stackentry = (self.grammar.dfas[start], 0, newnode) + self.stack = [stackentry] + self.rootnode = None + self.used_names = set() # Aliased to self.rootnode.used_names in pop() + self.proxy = proxy + + def addtoken(self, type, value, context): + """Add a token; return True iff this is the end of the program.""" + # Map from token to label + ilabels = self.classify(type, value, context) + assert len(ilabels) >= 1 + + # If we have only one state to advance, we'll directly + # take it as is. + if len(ilabels) == 1: + [ilabel] = ilabels + return self._addtoken(ilabel, type, value, context) + + # If there are multiple states which we can advance (only + # happen under soft-keywords), then we will try all of them + # in parallel and as soon as one state can reach further than + # the rest, we'll choose that one. This is a pretty hacky + # and hopefully temporary algorithm. + # + # For a more detailed explanation, check out this post: + # https://tree.science/what-the-backtracking.html + + with self.proxy.release() as proxy: + counter, force = 0, False + recorder = Recorder(self, ilabels, context) + recorder.add_token(type, value, raw=True) + + next_token_value = value + while recorder.determine_route(next_token_value) is None: + if not proxy.can_advance(counter): + force = True + break + + next_token_type, next_token_value, *_ = proxy.eat(counter) + if next_token_type in (tokenize.COMMENT, tokenize.NL): + counter += 1 + continue + + if next_token_type == tokenize.OP: + next_token_type = grammar.opmap[next_token_value] + + recorder.add_token(next_token_type, next_token_value) + counter += 1 + + ilabel = cast(int, + recorder.determine_route(next_token_value, force=force)) + assert ilabel is not None + + return self._addtoken(ilabel, type, value, context) + + def _addtoken(self, ilabel: int, type: int, value: Text, + context: Context) -> bool: + # Loop until the token is shifted; may raise exceptions + while True: + dfa, state, node = self.stack[-1] + states, first = dfa + arcs = states[state] + # Look for a state with this label + for i, newstate in arcs: + t = self.grammar.labels[i][0] + if t >= 256: + # See if it's a symbol and if we're in its first set + itsdfa = self.grammar.dfas[t] + itsstates, itsfirst = itsdfa + if ilabel in itsfirst: + # Push a symbol + self.push(t, itsdfa, newstate, context) + break # To continue the outer while loop + + elif ilabel == i: + # Look it up in the list of labels + # Shift a token; we're done with it + self.shift(type, value, newstate, context) + # Pop while we are in an accept-only state + state = newstate + while states[state] == [(0, state)]: + self.pop() + if not self.stack: + # Done parsing! + return True + dfa, state, node = self.stack[-1] + states, first = dfa + # Done with this token + return False + + else: + if (0, state) in arcs: + # An accepting state, pop it and try something else + self.pop() + if not self.stack: + # Done parsing, but another token is input + raise ParseError('too much input', type, value, context) + else: + # No success finding a transition + raise ParseError('bad input', type, value, context) + + def classify(self, type, value, context): + """Turn a token into a label. (Internal) + + Depending on whether the value is a soft-keyword or not, + this function may return multiple labels to choose from.""" + if type == token.NAME: + # Keep a listing of all used names + self.used_names.add(value) + # Check for reserved words + if value in self.grammar.keywords: + return [self.grammar.keywords[value]] + elif value in self.grammar.soft_keywords: + assert type in self.grammar.tokens + return [ + self.grammar.soft_keywords[value], + self.grammar.tokens[type], + ] + + ilabel = self.grammar.tokens.get(type) + if ilabel is None: + raise ParseError('bad token', type, value, context) + return [ilabel] + + def shift(self, type: int, value: Text, newstate: int, + context: Context) -> None: + """Shift a token. (Internal)""" + if self.is_backtracking: + dfa, state, _ = self.stack[-1] + self.stack[-1] = (dfa, newstate, DUMMY_NODE) + else: + dfa, state, node = self.stack[-1] + rawnode: RawNode = (type, value, context, None) + newnode = convert(self.grammar, rawnode) + assert node[-1] is not None + node[-1].append(newnode) + self.stack[-1] = (dfa, newstate, node) + + def push(self, type: int, newdfa: DFAS, newstate: int, + context: Context) -> None: + """Push a nonterminal. (Internal)""" + if self.is_backtracking: + dfa, state, _ = self.stack[-1] + self.stack[-1] = (dfa, newstate, DUMMY_NODE) + self.stack.append((newdfa, 0, DUMMY_NODE)) + else: + dfa, state, node = self.stack[-1] + newnode: RawNode = (type, None, context, []) + self.stack[-1] = (dfa, newstate, node) + self.stack.append((newdfa, 0, newnode)) + + def pop(self) -> None: + """Pop a nonterminal. (Internal)""" + if self.is_backtracking: + self.stack.pop() + else: + popdfa, popstate, popnode = self.stack.pop() + newnode = convert(self.grammar, popnode) + if self.stack: + dfa, state, node = self.stack[-1] + assert node[-1] is not None + node[-1].append(newnode) + else: + self.rootnode = newnode + self.rootnode.used_names = self.used_names diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/pgen.py b/third_party/yapf_third_party/_ylib2to3/pgen2/pgen.py new file mode 100644 index 000000000..6b96478ad --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pgen2/pgen.py @@ -0,0 +1,409 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Pgen imports +from io import StringIO + +from . import grammar +from . import token +from . import tokenize + + +class PgenGrammar(grammar.Grammar): + pass + + +class ParserGenerator(object): + + def __init__(self, filename=None, stream=None): + close_stream = None + if filename is None and stream is None: + raise RuntimeError( + 'Either a filename or a stream is expected, both were none') + if stream is None: + stream = open(filename, encoding='utf-8') + close_stream = stream.close + self.filename = filename + self.stream = stream + self.generator = tokenize.generate_tokens(stream.readline) + self.gettoken() # Initialize lookahead + self.dfas, self.startsymbol = self.parse() + if close_stream is not None: + close_stream() + self.first = {} # map from symbol name to set of tokens + self.addfirstsets() + + def make_grammar(self): + c = PgenGrammar() + names = list(self.dfas.keys()) + names.sort() + names.remove(self.startsymbol) + names.insert(0, self.startsymbol) + for name in names: + i = 256 + len(c.symbol2number) + c.symbol2number[name] = i + c.number2symbol[i] = name + for name in names: + dfa = self.dfas[name] + states = [] + for state in dfa: + arcs = [] + for label, next in sorted(state.arcs.items()): + arcs.append((self.make_label(c, label), dfa.index(next))) + if state.isfinal: + arcs.append((0, dfa.index(state))) + states.append(arcs) + c.states.append(states) + c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name)) + c.start = c.symbol2number[self.startsymbol] + return c + + def make_first(self, c, name): + rawfirst = self.first[name] + first = {} + for label in sorted(rawfirst): + ilabel = self.make_label(c, label) + # assert ilabel not in first # XXX failed on <> ... != + first[ilabel] = 1 + return first + + def make_label(self, c, label): + # XXX Maybe this should be a method on a subclass of converter? + ilabel = len(c.labels) + if label[0].isalpha(): + # Either a symbol name or a named token + if label in c.symbol2number: + # A symbol name (a non-terminal) + if label in c.symbol2label: + return c.symbol2label[label] + else: + c.labels.append((c.symbol2number[label], None)) + c.symbol2label[label] = ilabel + return ilabel + else: + # A named token (NAME, NUMBER, STRING) + itoken = getattr(token, label, None) + assert isinstance(itoken, int), label + assert itoken in token.tok_name, label + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + value = eval(label) + if value[0].isalpha(): + if label[0] == '"': + keywords = c.soft_keywords + else: + keywords = c.keywords + + # A keyword + if value in keywords: + return keywords[value] + else: + c.labels.append((token.NAME, value)) + keywords[value] = ilabel + return ilabel + else: + # An operator (any non-numeric token) + itoken = grammar.opmap[value] # Fails if unknown token + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + + def addfirstsets(self): + names = list(self.dfas.keys()) + names.sort() + for name in names: + if name not in self.first: + self.calcfirst(name) + # print name, self.first[name].keys() + + def calcfirst(self, name): + dfa = self.dfas[name] + self.first[name] = None # dummy to detect left recursion + state = dfa[0] + totalset = {} + overlapcheck = {} + for label, next in state.arcs.items(): + if label in self.dfas: + if label in self.first: + fset = self.first[label] + if fset is None: + raise ValueError('recursion for rule %r' % name) + else: + self.calcfirst(label) + fset = self.first[label] + totalset.update(fset) + overlapcheck[label] = fset + else: + totalset[label] = 1 + overlapcheck[label] = {label: 1} + inverse = {} + for label, itsfirst in overlapcheck.items(): + for symbol in itsfirst: + if symbol in inverse: + raise ValueError('rule %s is ambiguous; %s is in the' + ' first sets of %s as well as %s' % + (name, symbol, label, inverse[symbol])) + inverse[symbol] = label + self.first[name] = totalset + + def parse(self): + dfas = {} + startsymbol = None + # MSTART: (NEWLINE | RULE)* ENDMARKER + while self.type != token.ENDMARKER: + while self.type == token.NEWLINE: + self.gettoken() + # RULE: NAME ':' RHS NEWLINE + name = self.expect(token.NAME) + self.expect(token.OP, ':') + a, z = self.parse_rhs() + self.expect(token.NEWLINE) + # self.dump_nfa(name, a, z) + dfa = self.make_dfa(a, z) + # self.dump_dfa(name, dfa) + self.simplify_dfa(dfa) + dfas[name] = dfa + # print name, oldlen, newlen + if startsymbol is None: + startsymbol = name + return dfas, startsymbol + + def make_dfa(self, start, finish): + # To turn an NFA into a DFA, we define the states of the DFA + # to correspond to *sets* of states of the NFA. Then do some + # state reduction. Let's represent sets as dicts with 1 for + # values. + assert isinstance(start, NFAState) + assert isinstance(finish, NFAState) + + def closure(state): + base = {} + addclosure(state, base) + return base + + def addclosure(state, base): + assert isinstance(state, NFAState) + if state in base: + return + base[state] = 1 + for label, next in state.arcs: + if label is None: + addclosure(next, base) + + states = [DFAState(closure(start), finish)] + for state in states: # NB states grows while we're iterating + arcs = {} + for nfastate in state.nfaset: + for label, next in nfastate.arcs: + if label is not None: + addclosure(next, arcs.setdefault(label, {})) + for label, nfaset in sorted(arcs.items()): + for st in states: + if st.nfaset == nfaset: + break + else: + st = DFAState(nfaset, finish) + states.append(st) + state.addarc(st, label) + return states # List of DFAState instances; first one is start + + def dump_nfa(self, name, start, finish): + print('Dump of NFA for', name) + todo = [start] + for i, state in enumerate(todo): + print(' State', i, state is finish and '(final)' or '') + for label, next in state.arcs: + if next in todo: + j = todo.index(next) + else: + j = len(todo) + todo.append(next) + if label is None: + print(' -> %d' % j) + else: + print(' %s -> %d' % (label, j)) + + def dump_dfa(self, name, dfa): + print('Dump of DFA for', name) + for i, state in enumerate(dfa): + print(' State', i, state.isfinal and '(final)' or '') + for label, next in sorted(state.arcs.items()): + print(' %s -> %d' % (label, dfa.index(next))) + + def simplify_dfa(self, dfa): + # This is not theoretically optimal, but works well enough. + # Algorithm: repeatedly look for two states that have the same + # set of arcs (same labels pointing to the same nodes) and + # unify them, until things stop changing. + + # dfa is a list of DFAState instances + changes = True + while changes: + changes = False + for i, state_i in enumerate(dfa): + for j in range(i + 1, len(dfa)): + state_j = dfa[j] + if state_i == state_j: + # print " unify", i, j + del dfa[j] + for state in dfa: + state.unifystate(state_j, state_i) + changes = True + break + + def parse_rhs(self): + # RHS: ALT ('|' ALT)* + a, z = self.parse_alt() + if self.value != '|': + return a, z + else: + aa = NFAState() + zz = NFAState() + aa.addarc(a) + z.addarc(zz) + while self.value == '|': + self.gettoken() + a, z = self.parse_alt() + aa.addarc(a) + z.addarc(zz) + return aa, zz + + def parse_alt(self): + # ALT: ITEM+ + a, b = self.parse_item() + while (self.value in ('(', '[') or self.type in (token.NAME, token.STRING)): + c, d = self.parse_item() + b.addarc(c) + b = d + return a, b + + def parse_item(self): + # ITEM: '[' RHS ']' | ATOM ['+' | '*'] + if self.value == '[': + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, ']') + a.addarc(z) + return a, z + else: + a, z = self.parse_atom() + value = self.value + if value not in ('+', '*'): + return a, z + self.gettoken() + z.addarc(a) + if value == '+': + return a, z + else: + return a, a + + def parse_atom(self): + # ATOM: '(' RHS ')' | NAME | STRING + if self.value == '(': + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, ')') + return a, z + elif self.type in (token.NAME, token.STRING): + a = NFAState() + z = NFAState() + a.addarc(z, self.value) + self.gettoken() + return a, z + else: + self.raise_error('expected (...) or NAME or STRING, got %s/%s', self.type, + self.value) + + def expect(self, type, value=None): + if self.type != type or (value is not None and self.value != value): + self.raise_error('expected %s/%s, got %s/%s', type, value, self.type, + self.value) + value = self.value + self.gettoken() + return value + + def gettoken(self): + tup = next(self.generator) + while tup[0] in (tokenize.COMMENT, tokenize.NL): + tup = next(self.generator) + self.type, self.value, self.begin, self.end, self.line = tup + # print token.tok_name[self.type], repr(self.value) + + def raise_error(self, msg, *args): + if args: + try: + msg = msg % args + except Exception: + msg = ' '.join([msg] + list(map(str, args))) + raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line)) + + +class NFAState(object): + + def __init__(self): + self.arcs = [] # list of (label, NFAState) pairs + + def addarc(self, next, label=None): + assert label is None or isinstance(label, str) + assert isinstance(next, NFAState) + self.arcs.append((label, next)) + + +class DFAState(object): + + def __init__(self, nfaset, final): + assert isinstance(nfaset, dict) + assert isinstance(next(iter(nfaset)), NFAState) + assert isinstance(final, NFAState) + self.nfaset = nfaset + self.isfinal = final in nfaset + self.arcs = {} # map from label to DFAState + + def addarc(self, next, label): + assert isinstance(label, str) + assert label not in self.arcs + assert isinstance(next, DFAState) + self.arcs[label] = next + + def unifystate(self, old, new): + for label, next in self.arcs.items(): + if next is old: + self.arcs[label] = new + + def __eq__(self, other): + # Equality test -- ignore the nfaset instance variable + assert isinstance(other, DFAState) + if self.isfinal != other.isfinal: + return False + # Can't just return self.arcs == other.arcs, because that + # would invoke this method recursively, with cycles... + if len(self.arcs) != len(other.arcs): + return False + for label, next in self.arcs.items(): + if next is not other.arcs.get(label): + return False + return True + + __hash__ = None # For Py3 compatibility. + + +def generate_grammar(filename_or_stream='Grammar.txt'): + # type:(str | StringIO) -> PgenGrammar + if isinstance(filename_or_stream, str): + p = ParserGenerator(filename_or_stream) + elif isinstance(filename_or_stream, StringIO): + p = ParserGenerator(stream=filename_or_stream) + else: + raise NotImplementedError('Type %s not implemented' % + type(filename_or_stream)) + return p.make_grammar() diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/token.py b/third_party/yapf_third_party/_ylib2to3/pgen2/token.py new file mode 100644 index 000000000..5d099707e --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pgen2/token.py @@ -0,0 +1,87 @@ +#! /usr/bin/env python3 +"""Token constants (from "token.h").""" + +# Taken from Python (r53757) and modified to include some tokens +# originally monkeypatched in by pgen2.tokenize + +# --start constants-- +ENDMARKER = 0 +NAME = 1 +NUMBER = 2 +STRING = 3 +NEWLINE = 4 +INDENT = 5 +DEDENT = 6 +LPAR = 7 +RPAR = 8 +LSQB = 9 +RSQB = 10 +COLON = 11 +COMMA = 12 +SEMI = 13 +PLUS = 14 +MINUS = 15 +STAR = 16 +SLASH = 17 +VBAR = 18 +AMPER = 19 +LESS = 20 +GREATER = 21 +EQUAL = 22 +DOT = 23 +PERCENT = 24 +BACKQUOTE = 25 +LBRACE = 26 +RBRACE = 27 +EQEQUAL = 28 +NOTEQUAL = 29 +LESSEQUAL = 30 +GREATEREQUAL = 31 +TILDE = 32 +CIRCUMFLEX = 33 +LEFTSHIFT = 34 +RIGHTSHIFT = 35 +DOUBLESTAR = 36 +PLUSEQUAL = 37 +MINEQUAL = 38 +STAREQUAL = 39 +SLASHEQUAL = 40 +PERCENTEQUAL = 41 +AMPEREQUAL = 42 +VBAREQUAL = 43 +CIRCUMFLEXEQUAL = 44 +LEFTSHIFTEQUAL = 45 +RIGHTSHIFTEQUAL = 46 +DOUBLESTAREQUAL = 47 +DOUBLESLASH = 48 +DOUBLESLASHEQUAL = 49 +AT = 50 +ATEQUAL = 51 +OP = 52 +COMMENT = 53 +NL = 54 +RARROW = 55 +AWAIT = 56 +ASYNC = 57 +ERRORTOKEN = 58 +COLONEQUAL = 59 +N_TOKENS = 60 +NT_OFFSET = 256 +# --end constants-- + +tok_name = {} +for _name, _value in list(globals().items()): + if isinstance(_value, int): + tok_name[_value] = _name + + +def ISTERMINAL(x): + return x < NT_OFFSET + + +def ISNONTERMINAL(x): + return x >= NT_OFFSET + + +def ISEOF(x): + return x == ENDMARKER diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/tokenize.py b/third_party/yapf_third_party/_ylib2to3/pgen2/tokenize.py new file mode 100644 index 000000000..dda83296f --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pgen2/tokenize.py @@ -0,0 +1,611 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. +# All rights reserved. +"""Tokenization help for Python programs. + +generate_tokens(readline) is a generator that breaks a stream of +text into Python tokens. It accepts a readline-like method which is called +repeatedly to get the next line of input (or "" for EOF). It generates +5-tuples with these members: + + the token type (see token.py) + the token (a string) + the starting (row, column) indices of the token (a 2-tuple of ints) + the ending (row, column) indices of the token (a 2-tuple of ints) + the original line (string) + +It is designed to match the working of the Python tokenizer exactly, except +that it produces COMMENT tokens for comments and gives type OP for all +operators + +Older entry points + tokenize_loop(readline, tokeneater) + tokenize(readline, tokeneater=printtoken) +are the same, except instead of generating tokens, tokeneater is a callback +function to which the 5 fields described above are passed as 5 arguments, +each time a new token is found.""" + +__author__ = 'Ka-Ping Yee ' +__credits__ = \ + 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro' + +import re +import string +from codecs import BOM_UTF8 +from codecs import lookup + +from . import token +from .token import ASYNC +from .token import AWAIT +from .token import COMMENT +from .token import DEDENT +from .token import ENDMARKER +from .token import ERRORTOKEN +from .token import INDENT +from .token import NAME +from .token import NEWLINE +from .token import NL +from .token import NUMBER +from .token import OP +from .token import STRING +from .token import tok_name + +__all__ = [x for x in dir(token) if x[0] != '_' + ] + ['tokenize', 'generate_tokens', 'untokenize'] +del token + +try: + bytes +except NameError: + # Support bytes type in Python <= 2.5, so 2to3 turns itself into + # valid Python 3 code. + bytes = str + + +def group(*choices): + return '(' + '|'.join(choices) + ')' + + +def any(*choices): + return group(*choices) + '*' + + +def maybe(*choices): + return group(*choices) + '?' + + +def _combinations(*l): # noqa: E741 + return set( + x + y for x in l for y in l + ('',) if x.casefold() != y.casefold()) + + +Whitespace = r'[ \f\t]*' +Comment = r'#[^\r\n]*' +Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) +Name = r'\w+' + +Binnumber = r'0[bB]_?[01]+(?:_[01]+)*' +Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?' +Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?' +Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?') +Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber) +Exponent = r'[eE][-+]?\d+(?:_\d+)*' +Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', + r'\.\d+(?:_\d+)*') + maybe(Exponent) +Expfloat = r'\d+(?:_\d+)*' + Exponent +Floatnumber = group(Pointfloat, Expfloat) +Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]') +Number = group(Imagnumber, Floatnumber, Intnumber) + +# Tail end of ' string. +Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +# Tail end of " string. +Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +# Tail end of ''' string. +Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +# Tail end of """ string. +Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +_litprefix = r'(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?' +Triple = group(_litprefix + "'''", _litprefix + '"""') +# Single-line ' or " string. +String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') + +# Because of leftmost-then-longest match semantics, be sure to put the +# longest operators first (e.g., if = came before ==, == would get +# recognized as two instances of =). +Operator = group(r'\*\*=?', r'>>=?', r'<<=?', r'<>', r'!=', r'//=?', r'->', + r'[+\-*/%&@|^=<>]=?', r'~') + +Bracket = '[][(){}]' +Special = group(r'\r?\n', r':=', r'[:;.,`@]') +Funny = group(Operator, Bracket, Special) + +PlainToken = group(Number, Funny, String, Name) +Token = Ignore + PlainToken + +# First (or only) line of ' or " string. +ContStr = group( + _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r'\\\r?\n'), + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r'\\\r?\n')) +PseudoExtras = group(r'\\\r?\n', Comment, Triple) +PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) + +tokenprog, pseudoprog, single3prog, double3prog = map( + re.compile, (Token, PseudoToken, Single3, Double3)) + +_strprefixes = ( + _combinations('r', 'R', 'f', 'F') | _combinations('r', 'R', 'b', 'B') + | {'u', 'U', 'ur', 'uR', 'Ur', 'UR'}) + +endprogs = { + "'": re.compile(Single), + '"': re.compile(Double), + "'''": single3prog, + '"""': double3prog, + **{ + f"{prefix}'''": single3prog for prefix in _strprefixes + }, + **{ + f'{prefix}"""': double3prog for prefix in _strprefixes + }, + **{ + prefix: None for prefix in _strprefixes + } +} + +triple_quoted = ({"'''", '"""'} | {f"{prefix}'''" for prefix in _strprefixes} + | {f'{prefix}"""' for prefix in _strprefixes}) +single_quoted = ({"'", '"'} | {f"{prefix}'" for prefix in _strprefixes} + | {f'{prefix}"' for prefix in _strprefixes}) + +tabsize = 8 + + +class TokenError(Exception): + pass + + +class StopTokenizing(Exception): + pass + + +def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, + line): # for testing + (srow, scol) = xxx_todo_changeme + (erow, ecol) = xxx_todo_changeme1 + print('%d,%d-%d,%d:\t%s\t%s' % + (srow, scol, erow, ecol, tok_name[type], repr(token))) + + +def tokenize(readline, tokeneater=printtoken): + """ + The tokenize() function accepts two parameters: one representing the + input stream, and one providing an output mechanism for tokenize(). + + The first parameter, readline, must be a callable object which provides + the same interface as the readline() method of built-in file objects. + Each call to the function should return one line of input as a string. + + The second parameter, tokeneater, must also be a callable object. It is + called once for each token, with five arguments, corresponding to the + tuples generated by generate_tokens(). + """ + try: + tokenize_loop(readline, tokeneater) + except StopTokenizing: + pass + + +# backwards compatible interface +def tokenize_loop(readline, tokeneater): + for token_info in generate_tokens(readline): + tokeneater(*token_info) + + +class Untokenizer: + + def __init__(self): + self.tokens = [] + self.prev_row = 1 + self.prev_col = 0 + + def add_whitespace(self, start): + row, col = start + assert row <= self.prev_row + col_offset = col - self.prev_col + if col_offset: + self.tokens.append(' ' * col_offset) + + def untokenize(self, iterable): + for t in iterable: + if len(t) == 2: + self.compat(t, iterable) + break + tok_type, token, start, end, line = t + self.add_whitespace(start) + self.tokens.append(token) + self.prev_row, self.prev_col = end + if tok_type in (NEWLINE, NL): + self.prev_row += 1 + self.prev_col = 0 + return ''.join(self.tokens) + + def compat(self, token, iterable): + startline = False + indents = [] + toks_append = self.tokens.append + toknum, tokval = token + if toknum in (NAME, NUMBER): + tokval += ' ' + if toknum in (NEWLINE, NL): + startline = True + for tok in iterable: + toknum, tokval = tok[:2] + + if toknum in (NAME, NUMBER, ASYNC, AWAIT): + tokval += ' ' + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + + +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) +blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) + + +def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace('_', '-') + if enc == 'utf-8' or enc.startswith('utf-8-'): + return 'utf-8' + if enc in ('latin-1', 'iso-8859-1', 'iso-latin-1') or \ + enc.startswith(('latin-1-', 'iso-8859-1-', 'iso-latin-1-')): + return 'iso-8859-1' + return orig_enc + + +def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argument, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read + in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, but + disagree, a SyntaxError will be raised. If the encoding cookie is an invalid + charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + bom_found = False + encoding = None + default = 'utf-8' + + def read_or_stop(): + try: + return readline() + except StopIteration: + return bytes() + + def find_cookie(line): + try: + line_string = line.decode('ascii') + except UnicodeDecodeError: + return None + match = cookie_re.match(line_string) + if not match: + return None + encoding = _get_normal_name(match.group(1)) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + raise SyntaxError('unknown encoding: ' + encoding) + + if bom_found: + if codec.name != 'utf-8': + # This behaviour mimics the Python interpreter + raise SyntaxError('encoding problem: utf-8') + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [] + + encoding = find_cookie(first) + if encoding: + return encoding, [first] + if not blank_re.match(first): + return default, [first] + + second = read_or_stop() + if not second: + return default, [first] + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second] + + return default, [first, second] + + +def untokenize(iterable): + """Transform tokens back into Python source code. + + Each element returned by the iterable must be a token sequence + with at least two elements, a token number and token value. If + only two tokens are passed, the resulting output is poor. + + Round-trip invariant for full input: + Untokenized source will match input source exactly + + Round-trip invariant for limited input: + # Output text will tokenize the back to the input + t1 = [tok[:2] for tok in generate_tokens(f.readline)] + newcode = untokenize(t1) + readline = iter(newcode.splitlines(1)).next + t2 = [tok[:2] for tokin generate_tokens(readline)] + assert t1 == t2 + """ + ut = Untokenizer() + return ut.untokenize(iterable) + + +def generate_tokens(readline): + """ + The generate_tokens() generator requires one argument, readline, which + must be a callable object which provides the same interface as the + readline() method of built-in file objects. Each call to the function + should return one line of input as a string. Alternately, readline + can be a callable function terminating with StopIteration: + readline = open(myfile).next # Example of alternate readline + + The generator produces 5-tuples with these members: the token type; the + token string; a 2-tuple (srow, scol) of ints specifying the row and + column where the token begins in the source; a 2-tuple (erow, ecol) of + ints specifying the row and column where the token ends in the source; + and the line on which the token was found. The line passed is the + physical line. + """ + strstart = '' + endprog = '' + lnum = parenlev = continued = 0 + contstr, needcont = '', 0 + contline = None + indents = [0] + + # 'stashed' and 'async_*' are used for async/await parsing + stashed = None + async_def = False + async_def_indent = 0 + async_def_nl = False + + while 1: # loop over lines in stream + try: + line = readline() + except StopIteration: + line = '' + lnum = lnum + 1 + pos, max = 0, len(line) + + if contstr: # continued string + if not line: + raise TokenError('EOF in multi-line string', strstart) + endmatch = endprog.match(line) + if endmatch: + pos = end = endmatch.end(0) + yield (STRING, contstr + line[:end], strstart, (lnum, end), + contline + line) + contstr, needcont = '', 0 + contline = None + elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': + yield (ERRORTOKEN, contstr + line, strstart, (lnum, len(line)), + contline) + contstr = '' + contline = None + continue + else: + contstr = contstr + line + contline = contline + line + continue + + elif parenlev == 0 and not continued: # new statement + if not line: + break + column = 0 + while pos < max: # measure leading whitespace + if line[pos] == ' ': + column = column + 1 + elif line[pos] == '\t': + column = (column // tabsize + 1) * tabsize + elif line[pos] == '\f': + column = 0 + else: + break + pos = pos + 1 + if pos == max: + break + + if stashed: + yield stashed + stashed = None + + if line[pos] in '#\r\n': # skip comments or blank lines + if line[pos] == '#': + comment_token = line[pos:].rstrip('\r\n') + nl_pos = pos + len(comment_token) + yield (COMMENT, comment_token, (lnum, pos), + (lnum, pos + len(comment_token)), line) + yield (NL, line[nl_pos:], (lnum, nl_pos), (lnum, len(line)), line) + else: + yield ((NL, COMMENT)[line[pos] == '#'], line[pos:], (lnum, pos), + (lnum, len(line)), line) + continue + + if column > indents[-1]: # count indents or dedents + indents.append(column) + yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line) + while column < indents[-1]: + if column not in indents: + raise IndentationError( + 'unindent does not match any outer indentation level', + ('', lnum, pos, line)) + indents = indents[:-1] + + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + + yield (DEDENT, '', (lnum, pos), (lnum, pos), line) + + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + + else: # continued statement + if not line: + raise TokenError('EOF in multi-line statement', (lnum, 0)) + continued = 0 + + while pos < max: + pseudomatch = pseudoprog.match(line, pos) + if pseudomatch: # scan for tokens + start, end = pseudomatch.span(1) + spos, epos, pos = (lnum, start), (lnum, end), end + token, initial = line[start:end], line[start] + + if initial in string.digits or \ + (initial == '.' and token != '.'): # ordinary number + yield (NUMBER, token, spos, epos, line) + elif initial in '\r\n': + newline = NEWLINE + if parenlev > 0: + newline = NL + elif async_def: + async_def_nl = True + if stashed: + yield stashed + stashed = None + yield (newline, token, spos, epos, line) + + elif initial == '#': + assert not token.endswith('\n') + if stashed: + yield stashed + stashed = None + yield (COMMENT, token, spos, epos, line) + elif token in triple_quoted: + endprog = endprogs[token] + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + if stashed: + yield stashed + stashed = None + yield (STRING, token, spos, (lnum, pos), line) + else: + strstart = (lnum, start) # multiple lines + contstr = line[start:] + contline = line + break + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] == '\n': # continued string + strstart = (lnum, start) # noqa: F841 + endprog = ( + endprogs[initial] or endprogs[token[1]] or endprogs[token[2]]) + contstr, needcont = line[start:], 1 + contline = line + break + else: # ordinary string + if stashed: + yield stashed + stashed = None + yield (STRING, token, spos, epos, line) + elif initial.isidentifier(): # ordinary name + if token in ('async', 'await'): + if async_def: + yield (ASYNC if token == 'async' else AWAIT, token, spos, epos, + line) + continue + + tok = (NAME, token, spos, epos, line) + if token == 'async' and not stashed: + stashed = tok + continue + + if token in ('def', 'for'): + if (stashed and stashed[0] == NAME and stashed[1] == 'async'): + + if token == 'def': + async_def = True + async_def_indent = indents[-1] + + yield (ASYNC, stashed[1], stashed[2], stashed[3], stashed[4]) + stashed = None + + if stashed: + yield stashed + stashed = None + + yield tok + elif initial == '\\': # continued stmt + # This yield is new; needed for better idempotency: + if stashed: + yield stashed + stashed = None + yield (NL, token, spos, (lnum, pos), line) + continued = 1 + else: + if initial in '([{': + parenlev = parenlev + 1 + elif initial in ')]}': + parenlev = parenlev - 1 + if stashed: + yield stashed + stashed = None + yield (OP, token, spos, epos, line) + else: + yield (ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line) + pos = pos + 1 + + if stashed: + yield stashed + stashed = None + + for indent in indents[1:]: # pop remaining indent levels + yield (DEDENT, '', (lnum, 0), (lnum, 0), '') + yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '') + + +if __name__ == '__main__': # testing + import sys + if len(sys.argv) > 1: + tokenize(open(sys.argv[1]).readline) + else: + tokenize(sys.stdin.readline) diff --git a/third_party/yapf_third_party/_ylib2to3/pygram.py b/third_party/yapf_third_party/_ylib2to3/pygram.py new file mode 100644 index 000000000..4267c3610 --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pygram.py @@ -0,0 +1,40 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +"""Export the Python grammar and symbols.""" + +# Python imports +import os + +# Local imports +from .pgen2 import driver + +# The grammar file +_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), 'Grammar.txt') +_PATTERN_GRAMMAR_FILE = os.path.join( + os.path.dirname(__file__), 'PatternGrammar.txt') + + +class Symbols(object): + + def __init__(self, grammar): + """Initializer. + + Creates an attribute for each grammar symbol (nonterminal), + whose value is the symbol's type (an int >= 256). + """ + for name, symbol in grammar.symbol2number.items(): + setattr(self, name, symbol) + + +python_grammar = driver.load_grammar(_GRAMMAR_FILE) + +python_symbols = Symbols(python_grammar) + +python_grammar_no_print_statement = python_grammar.copy() +del python_grammar_no_print_statement.keywords['print'] + +python_grammar_no_print_and_exec_statement = python_grammar_no_print_statement.copy() # yapf: disable # noqa: E501 +del python_grammar_no_print_and_exec_statement.keywords['exec'] + +pattern_grammar = driver.load_grammar(_PATTERN_GRAMMAR_FILE) +pattern_symbols = Symbols(pattern_grammar) diff --git a/third_party/yapf_third_party/_ylib2to3/pytree.py b/third_party/yapf_third_party/_ylib2to3/pytree.py new file mode 100644 index 000000000..0fcdf4b2c --- /dev/null +++ b/third_party/yapf_third_party/_ylib2to3/pytree.py @@ -0,0 +1,861 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +""" +Python parse tree definitions. + +This is a very concrete parse tree; we need to keep every token and +even the comments and whitespace between tokens. + +There's also a pattern matching implementation here. +""" + +__author__ = 'Guido van Rossum ' + +import sys +from io import StringIO +from typing import List +from typing import Optional +from typing import Text +from typing import Tuple +from typing import Union + +HUGE = 0x7FFFFFFF # maximum repeat count, default max + +_type_reprs = {} + + +def type_repr(type_num): + global _type_reprs + if not _type_reprs: + from .pygram import python_symbols + + # printing tokens is possible but not as useful + # from .pgen2 import token // token.__dict__.items(): + for name, val in python_symbols.__dict__.items(): + if type(val) == int: + _type_reprs[val] = name + return _type_reprs.setdefault(type_num, type_num) + + +NL = Union['Node', 'Leaf'] +Context = Tuple[Text, Tuple[int, int]] +RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]] + + +class Base(object): + """ + Abstract base class for Node and Leaf. + + This provides some default functionality and boilerplate using the + template pattern. + + A node may be a subnode of at most one parent. + """ + + # Default values for instance variables + type = None # int: token number (< 256) or symbol number (>= 256) + parent = None # Parent node pointer, or None + children = () # Tuple of subnodes + was_changed = False + was_checked = False + + def __new__(cls, *args, **kwds): + """Constructor that prevents Base from being instantiated.""" + assert cls is not Base, 'Cannot instantiate Base' + return object.__new__(cls) + + def __eq__(self, other): + """ + Compare two nodes for equality. + + This calls the method _eq(). + """ + if self.__class__ is not other.__class__: + return NotImplemented + return self._eq(other) + + __hash__ = None # For Py3 compatibility. + + def _eq(self, other): + """ + Compare two nodes for equality. + + This is called by __eq__ and __ne__. It is only called if the two nodes + have the same type. This must be implemented by the concrete subclass. + Nodes should be considered equal if they have the same structure, + ignoring the prefix string and other context information. + """ + raise NotImplementedError + + def clone(self): + """ + Return a cloned (deep) copy of self. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def post_order(self): + """ + Return a post-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def pre_order(self): + """ + Return a pre-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def replace(self, new): + """Replace this node with a new one in the parent.""" + assert self.parent is not None, str(self) + assert new is not None + if not isinstance(new, list): + new = [new] + l_children = [] + found = False + for ch in self.parent.children: + if ch is self: + assert not found, (self.parent.children, self, new) + if new is not None: + l_children.extend(new) + found = True + else: + l_children.append(ch) + assert found, (self.children, self, new) + self.parent.changed() + self.parent.children = l_children + for x in new: + x.parent = self.parent + self.parent = None + + def get_lineno(self): + """Return the line number which generated the invocant node.""" + node = self + while not isinstance(node, Leaf): + if not node.children: + return + node = node.children[0] + return node.lineno + + def changed(self): + if self.parent: + self.parent.changed() + self.was_changed = True + + def remove(self): + """ + Remove the node from the tree. Returns the position of the node in its + parent's children before it was removed. + """ + if self.parent: + for i, node in enumerate(self.parent.children): + if node is self: + self.parent.changed() + del self.parent.children[i] + self.parent = None + return i + + @property + def next_sibling(self): + """ + The node immediately following the invocant in their parent's children + list. If the invocant does not have a next sibling, it is None + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + try: + return self.parent.children[i + 1] + except IndexError: + return None + + @property + def prev_sibling(self): + """ + The node immediately preceding the invocant in their parent's children + list. If the invocant does not have a previous sibling, it is None. + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + if i == 0: + return None + return self.parent.children[i - 1] + + def leaves(self): + for child in self.children: + yield from child.leaves() + + def depth(self): + if self.parent is None: + return 0 + return 1 + self.parent.depth() + + def get_suffix(self): + """ + Return the string immediately following the invocant node. This is + effectively equivalent to node.next_sibling.prefix + """ + next_sib = self.next_sibling + if next_sib is None: + return '' + return next_sib.prefix + + if sys.version_info < (3, 0): + + def __str__(self): + return str(self).encode('ascii') + + +class Node(Base): + """Concrete implementation for interior nodes.""" + + def __init__(self, + type, + children, + context=None, + prefix=None, + fixers_applied=None): + """ + Initializer. + + Takes a type constant (a symbol number >= 256), a sequence of + child nodes, and an optional context keyword argument. + + As a side effect, the parent pointers of the children are updated. + """ + assert type >= 256, type + self.type = type + self.children = list(children) + for ch in self.children: + assert ch.parent is None, repr(ch) + ch.parent = self + if prefix is not None: + self.prefix = prefix + if fixers_applied: + self.fixers_applied = fixers_applied[:] + else: + self.fixers_applied = None + + def __repr__(self): + """Return a canonical string representation.""" + return '%s(%s, %r)' % (self.__class__.__name__, type_repr( + self.type), self.children) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return ''.join(map(str, self.children)) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.children) == (other.type, other.children) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Node( + self.type, [ch.clone() for ch in self.children], + fixers_applied=self.fixers_applied) + + def post_order(self): + """Return a post-order iterator for the tree.""" + for child in self.children: + yield from child.post_order() + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + for child in self.children: + yield from child.pre_order() + + @property + def prefix(self): + """ + The whitespace and comments preceding this node in the input. + """ + if not self.children: + return '' + return self.children[0].prefix + + @prefix.setter + def prefix(self, prefix): + if self.children: + self.children[0].prefix = prefix + + def set_child(self, i, child): + """ + Equivalent to 'node.children[i] = child'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children[i].parent = None + self.children[i] = child + self.changed() + + def insert_child(self, i, child): + """ + Equivalent to 'node.children.insert(i, child)'. This method also sets + the child's parent attribute appropriately. + """ + child.parent = self + self.children.insert(i, child) + self.changed() + + def append_child(self, child): + """ + Equivalent to 'node.children.append(child)'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children.append(child) + self.changed() + + +class Leaf(Base): + """Concrete implementation for leaf nodes.""" + + # Default values for instance variables + _prefix = '' # Whitespace and comments preceding this token in the input + lineno = 0 # Line where this token starts in the input + column = 0 # Column where this token tarts in the input + + def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]): + """ + Initializer. + + Takes a type constant (a token number < 256), a string value, and an + optional context keyword argument. + """ + assert 0 <= type < 256, type + if context is not None: + self._prefix, (self.lineno, self.column) = context + self.type = type + self.value = value + if prefix is not None: + self._prefix = prefix + self.fixers_applied = fixers_applied[:] + + def __repr__(self): + """Return a canonical string representation.""" + return '%s(%r, %r)' % (self.__class__.__name__, self.type, self.value) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return self.prefix + str(self.value) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.value) == (other.type, other.value) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Leaf( + self.type, + self.value, (self.prefix, (self.lineno, self.column)), + fixers_applied=self.fixers_applied) + + def leaves(self): + yield self + + def post_order(self): + """Return a post-order iterator for the tree.""" + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + + @property + def prefix(self): + """ + The whitespace and comments preceding this token in the input. + """ + return self._prefix + + @prefix.setter + def prefix(self, prefix): + self.changed() + self._prefix = prefix + + +def convert(gr, raw_node): + """ + Convert raw node information to a Node or Leaf instance. + + This is passed to the parser driver which calls it whenever a reduction of a + grammar rule produces a new complete node, so that the tree is build + strictly bottom-up. + """ + type, value, context, children = raw_node + if children or type in gr.number2symbol: + # If there's exactly one child, return that child instead of + # creating a new node. + if len(children) == 1: + return children[0] + return Node(type, children, context=context) + else: + return Leaf(type, value, context=context) + + +class BasePattern(object): + """ + A pattern is a tree matching pattern. + + It looks for a specific node type (token or symbol), and + optionally for a specific content. + + This is an abstract base class. There are three concrete + subclasses: + + - LeafPattern matches a single leaf node; + - NodePattern matches a single node (usually non-leaf); + - WildcardPattern matches a sequence of nodes of variable length. + """ + + # Defaults for instance variables + type = None # Node type (token if < 256, symbol if >= 256) + content = None # Optional content matching pattern + name = None # Optional name used to store match in results dict + + def __new__(cls, *args, **kwds): + """Constructor that prevents BasePattern from being instantiated.""" + assert cls is not BasePattern, 'Cannot instantiate BasePattern' + return object.__new__(cls) + + def __repr__(self): + args = [type_repr(self.type), self.content, self.name] + while args and args[-1] is None: + del args[-1] + return '%s(%s)' % (self.__class__.__name__, ', '.join(map(repr, args))) + + def optimize(self): + """ + A subclass can define this as a hook for optimizations. + + Returns either self or another node with the same effect. + """ + return self + + def match(self, node, results=None): + """ + Does this pattern exactly match a node? + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + Default implementation for non-wildcard patterns. + """ + if self.type is not None and node.type != self.type: + return False + if self.content is not None: + r = None + if results is not None: + r = {} + if not self._submatch(node, r): + return False + if r: + results.update(r) + if results is not None and self.name: + results[self.name] = node + return True + + def match_seq(self, nodes, results=None): + """ + Does this pattern exactly match a sequence of nodes? + + Default implementation for non-wildcard patterns. + """ + if len(nodes) != 1: + return False + return self.match(nodes[0], results) + + def generate_matches(self, nodes): + """ + Generator yielding all matches for this pattern. + + Default implementation for non-wildcard patterns. + """ + r = {} + if nodes and self.match(nodes[0], r): + yield 1, r + + +class LeafPattern(BasePattern): + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given must be a token type (< 256). If not given, + this matches any *leaf* node; the content may still be required. + + The content, if given, must be a string. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert 0 <= type < 256, type + if content is not None: + assert isinstance(content, str), repr(content) + self.type = type + self.content = content + self.name = name + + def match(self, node, results=None): + """Override match() to insist on a leaf node.""" + if not isinstance(node, Leaf): + return False + return BasePattern.match(self, node, results) + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + return self.content == node.value + + +class NodePattern(BasePattern): + + wildcards = False + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given, must be a symbol type (>= 256). If the + type is None this matches *any* single node (leaf or not), + except if content is not None, in which it only matches + non-leaf nodes that also match the content pattern. + + The content, if not None, must be a sequence of Patterns that + must match the node's children exactly. If the content is + given, the type must not be None. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert type >= 256, type + if content is not None: + assert not isinstance(content, str), repr(content) + content = list(content) + for i, item in enumerate(content): + assert isinstance(item, BasePattern), (i, item) + if isinstance(item, WildcardPattern): + self.wildcards = True + self.type = type + self.content = content + self.name = name + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + if self.wildcards: + for c, r in generate_matches(self.content, node.children): + if c == len(node.children): + if results is not None: + results.update(r) + return True + return False + if len(self.content) != len(node.children): + return False + for subpattern, child in zip(self.content, node.children): + if not subpattern.match(child, results): + return False + return True + + +class WildcardPattern(BasePattern): + """ + A wildcard pattern can match zero or more nodes. + + This has all the flexibility needed to implement patterns like: + + .* .+ .? .{m,n} + (a b c | d e | f) + (...)* (...)+ (...)? (...){m,n} + + except it always uses non-greedy matching. + """ + + def __init__(self, content=None, min=0, max=HUGE, name=None): + """ + Initializer. + + Args: + content: optional sequence of subsequences of patterns; + if absent, matches one node; + if present, each subsequence is an alternative [*] + min: optional minimum number of times to match, default 0 + max: optional maximum number of times to match, default HUGE + name: optional name assigned to this match + + [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is + equivalent to (a b c | d e | f g h); if content is None, + this is equivalent to '.' in regular expression terms. + The min and max parameters work as follows: + min=0, max=maxint: .* + min=1, max=maxint: .+ + min=0, max=1: .? + min=1, max=1: . + If content is not None, replace the dot with the parenthesized + list of alternatives, e.g. (a b c | d e | f g h)* + """ + assert 0 <= min <= max <= HUGE, (min, max) + if content is not None: + content = tuple(map(tuple, content)) # Protect against alterations + # Check sanity of alternatives + assert len(content), repr(content) # Can't have zero alternatives + for alt in content: + assert len(alt), repr(alt) # Can have empty alternatives + self.content = content + self.min = min + self.max = max + self.name = name + + def optimize(self): + """Optimize certain stacked wildcard patterns.""" + subpattern = None + if (self.content is not None and len(self.content) == 1 and + len(self.content[0]) == 1): + subpattern = self.content[0][0] + if self.min == 1 and self.max == 1: + if self.content is None: + return NodePattern(name=self.name) + if subpattern is not None and self.name == subpattern.name: + return subpattern.optimize() + if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and + subpattern.min <= 1 and self.name == subpattern.name): + return WildcardPattern(subpattern.content, self.min * subpattern.min, + self.max * subpattern.max, subpattern.name) + return self + + def match(self, node, results=None): + """Does this pattern exactly match a node?""" + return self.match_seq([node], results) + + def match_seq(self, nodes, results=None): + """Does this pattern exactly match a sequence of nodes?""" + for c, r in self.generate_matches(nodes): + if c == len(nodes): + if results is not None: + results.update(r) + if self.name: + results[self.name] = list(nodes) + return True + return False + + def generate_matches(self, nodes): + """ + Generator yielding matches for a sequence of nodes. + + Args: + nodes: sequence of nodes + + Yields: + (count, results) tuples where: + count: the match comprises nodes[:count]; + results: dict containing named submatches. + """ + if self.content is None: + # Shortcut for special case (see __init__.__doc__) + for count in range(self.min, 1 + min(len(nodes), self.max)): + r = {} + if self.name: + r[self.name] = nodes[:count] + yield count, r + elif self.name == 'bare_name': + yield self._bare_name_matches(nodes) + else: + # The reason for this is that hitting the recursion limit usually + # results in some ugly messages about how RuntimeErrors are being + # ignored. We only have to do this on CPython, though, because other + # implementations don't have this nasty bug in the first place. + if hasattr(sys, 'getrefcount'): + save_stderr = sys.stderr + sys.stderr = StringIO() + try: + for count, r in self._recursive_matches(nodes, 0): + if self.name: + r[self.name] = nodes[:count] + yield count, r + except RuntimeError: + # We fall back to the iterative pattern matching scheme if the recursive + # scheme hits the recursion limit. + for count, r in self._iterative_matches(nodes): + if self.name: + r[self.name] = nodes[:count] + yield count, r + finally: + if hasattr(sys, 'getrefcount'): + sys.stderr = save_stderr + + def _iterative_matches(self, nodes): + """Helper to iteratively yield the matches.""" + nodelen = len(nodes) + if 0 >= self.min: + yield 0, {} + + results = [] + # generate matches that use just one alt from self.content + for alt in self.content: + for c, r in generate_matches(alt, nodes): + yield c, r + results.append((c, r)) + + # for each match, iterate down the nodes + while results: + new_results = [] + for c0, r0 in results: + # stop if the entire set of nodes has been matched + if c0 < nodelen and c0 <= self.max: + for alt in self.content: + for c1, r1 in generate_matches(alt, nodes[c0:]): + if c1 > 0: + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + new_results.append((c0 + c1, r)) + results = new_results + + def _bare_name_matches(self, nodes): + """Special optimized matcher for bare_name.""" + count = 0 + r = {} + done = False + max = len(nodes) + while not done and count < max: + done = True + for leaf in self.content: + if leaf[0].match(nodes[count], r): + count += 1 + done = False + break + r[self.name] = nodes[:count] + return count, r + + def _recursive_matches(self, nodes, count): + """Helper to recursively yield the matches.""" + assert self.content is not None + if count >= self.min: + yield 0, {} + if count < self.max: + for alt in self.content: + for c0, r0 in generate_matches(alt, nodes): + for c1, r1 in self._recursive_matches(nodes[c0:], count + 1): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + + +class NegatedPattern(BasePattern): + + def __init__(self, content=None): + """ + Initializer. + + The argument is either a pattern or None. If it is None, this + only matches an empty sequence (effectively '$' in regex + lingo). If it is not None, this matches whenever the argument + pattern doesn't have any matches. + """ + if content is not None: + assert isinstance(content, BasePattern), repr(content) + self.content = content + + def match(self, node): + # We never match a node in its entirety + return False + + def match_seq(self, nodes): + # We only match an empty sequence of nodes in its entirety + return len(nodes) == 0 + + def generate_matches(self, nodes): + if self.content is None: + # Return a match if there is an empty sequence + if len(nodes) == 0: + yield 0, {} + else: + # Return a match if the argument pattern has no matches + for c, r in self.content.generate_matches(nodes): + return + yield 0, {} + + +def generate_matches(patterns, nodes): + """ + Generator yielding matches for a sequence of patterns and nodes. + + Args: + patterns: a sequence of patterns + nodes: a sequence of nodes + + Yields: + (count, results) tuples where: + count: the entire sequence of patterns matches nodes[:count]; + results: dict containing named submatches. + """ + if not patterns: + yield 0, {} + else: + p, rest = patterns[0], patterns[1:] + for c0, r0 in p.generate_matches(nodes): + if not rest: + yield c0, r0 + else: + for c1, r1 in generate_matches(rest, nodes[c0:]): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r diff --git a/third_party/yapf_diff/LICENSE b/third_party/yapf_third_party/yapf_diff/LICENSE similarity index 100% rename from third_party/yapf_diff/LICENSE rename to third_party/yapf_third_party/yapf_diff/LICENSE diff --git a/third_party/yapf_third_party/yapf_diff/__init__.py b/third_party/yapf_third_party/yapf_diff/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/third_party/yapf_diff/yapf_diff.py b/third_party/yapf_third_party/yapf_diff/yapf_diff.py similarity index 100% rename from third_party/yapf_diff/yapf_diff.py rename to third_party/yapf_third_party/yapf_diff/yapf_diff.py diff --git a/yapf/__init__.py b/yapf/__init__.py index ce183e9d0..7ef89a93f 100644 --- a/yapf/__init__.py +++ b/yapf/__init__.py @@ -33,12 +33,14 @@ import os import sys +from importlib_metadata import metadata + from yapf.yapflib import errors from yapf.yapflib import file_resources from yapf.yapflib import style from yapf.yapflib import yapf_api -__version__ = '0.33.0' +__version__ = metadata('yapf')['Version'] def _raw_input(): diff --git a/yapf/pytree/blank_line_calculator.py b/yapf/pytree/blank_line_calculator.py index 7bccb0978..32faaa2f4 100644 --- a/yapf/pytree/blank_line_calculator.py +++ b/yapf/pytree/blank_line_calculator.py @@ -22,7 +22,7 @@ newlines: The number of newlines required before the node. """ -from lib2to3.pgen2 import token as grammar_token +from yapf_third_party._ylib2to3.pgen2 import token as grammar_token from yapf.pytree import pytree_utils from yapf.pytree import pytree_visitor diff --git a/yapf/pytree/comment_splicer.py b/yapf/pytree/comment_splicer.py index ae5ffe66f..a9180f37c 100644 --- a/yapf/pytree/comment_splicer.py +++ b/yapf/pytree/comment_splicer.py @@ -21,9 +21,9 @@ SpliceComments(): the main function exported by this module. """ -from lib2to3 import pygram -from lib2to3 import pytree -from lib2to3.pgen2 import token +from yapf_third_party._ylib2to3 import pygram +from yapf_third_party._ylib2to3 import pytree +from yapf_third_party._ylib2to3.pgen2 import token from yapf.pytree import pytree_utils diff --git a/yapf/pytree/continuation_splicer.py b/yapf/pytree/continuation_splicer.py index b86188cb5..a8aef6606 100644 --- a/yapf/pytree/continuation_splicer.py +++ b/yapf/pytree/continuation_splicer.py @@ -19,7 +19,7 @@ SpliceContinuations(): the main function exported by this module. """ -from lib2to3 import pytree +from yapf_third_party._ylib2to3 import pytree from yapf.yapflib import format_token diff --git a/yapf/pytree/pytree_unwrapper.py b/yapf/pytree/pytree_unwrapper.py index ba1e0c423..4b84cd54c 100644 --- a/yapf/pytree/pytree_unwrapper.py +++ b/yapf/pytree/pytree_unwrapper.py @@ -28,8 +28,8 @@ # The word "token" is overloaded within this module, so for clarity rename # the imported pgen2.token module. -from lib2to3 import pytree -from lib2to3.pgen2 import token as grammar_token +from yapf_third_party._ylib2to3 import pytree +from yapf_third_party._ylib2to3.pgen2 import token as grammar_token from yapf.pytree import pytree_utils from yapf.pytree import pytree_visitor @@ -125,6 +125,8 @@ def _StartNewLine(self): 'try_stmt', 'expect_clause', 'with_stmt', + 'match_stmt', + 'case_block', 'funcdef', 'classdef', }) @@ -144,11 +146,13 @@ def Visit_simple_stmt(self, node): single_stmt_suite = ( node.parent and pytree_utils.NodeName(node.parent) in self._STMT_TYPES) is_comment_stmt = pytree_utils.IsCommentStatement(node) - if single_stmt_suite and not is_comment_stmt: + is_inside_match = node.parent and pytree_utils.NodeName( + node.parent) == 'match_stmt' + if (single_stmt_suite and not is_comment_stmt) or is_inside_match: self._cur_depth += 1 self._StartNewLine() self.DefaultNodeVisit(node) - if single_stmt_suite and not is_comment_stmt: + if (single_stmt_suite and not is_comment_stmt) or is_inside_match: self._cur_depth -= 1 def _VisitCompoundStatement(self, node, substatement_names): @@ -253,6 +257,20 @@ def Visit_decorated(self, node): # pylint: disable=invalid-name def Visit_with_stmt(self, node): # pylint: disable=invalid-name self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS) + _MATCH_STMT_ELEMS = frozenset({'match', 'case'}) + + def Visit_match_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._MATCH_STMT_ELEMS) + + # case_block refers to the grammar element name in Grammar.txt + _CASE_BLOCK_ELEMS = frozenset({'case'}) + + def Visit_case_block(self, node): + self._cur_depth += 1 + self._StartNewLine() + self._VisitCompoundStatement(node, self._CASE_BLOCK_ELEMS) + self._cur_depth -= 1 + def Visit_suite(self, node): # pylint: disable=invalid-name # A 'suite' starts a new indentation level in Python. self._cur_depth += 1 diff --git a/yapf/pytree/pytree_utils.py b/yapf/pytree/pytree_utils.py index 8b3c4065c..e7aa6f59c 100644 --- a/yapf/pytree/pytree_utils.py +++ b/yapf/pytree/pytree_utils.py @@ -26,11 +26,12 @@ import ast import os -from lib2to3 import pygram -from lib2to3 import pytree -from lib2to3.pgen2 import driver -from lib2to3.pgen2 import parse -from lib2to3.pgen2 import token + +from yapf_third_party._ylib2to3 import pygram +from yapf_third_party._ylib2to3 import pytree +from yapf_third_party._ylib2to3.pgen2 import driver +from yapf_third_party._ylib2to3.pgen2 import parse +from yapf_third_party._ylib2to3.pgen2 import token # TODO(eliben): We may want to get rid of this filtering at some point once we # have a better understanding of what information we need from the tree. Then, diff --git a/yapf/pytree/pytree_visitor.py b/yapf/pytree/pytree_visitor.py index 4f6c605ee..ec2cdb7e8 100644 --- a/yapf/pytree/pytree_visitor.py +++ b/yapf/pytree/pytree_visitor.py @@ -25,7 +25,8 @@ """ import sys -from lib2to3 import pytree + +from yapf_third_party._ylib2to3 import pytree from yapf.pytree import pytree_utils diff --git a/yapf/pytree/split_penalty.py b/yapf/pytree/split_penalty.py index ccb3880d4..03b36384a 100644 --- a/yapf/pytree/split_penalty.py +++ b/yapf/pytree/split_penalty.py @@ -14,8 +14,9 @@ """Computation of split penalties before/between tokens.""" import re -from lib2to3 import pytree -from lib2to3.pgen2 import token as grammar_token + +from yapf_third_party._ylib2to3 import pytree +from yapf_third_party._ylib2to3.pgen2 import token as grammar_token from yapf.pytree import pytree_utils from yapf.pytree import pytree_visitor diff --git a/yapf/pytree/subtype_assigner.py b/yapf/pytree/subtype_assigner.py index 97b0e501e..05d88b0fc 100644 --- a/yapf/pytree/subtype_assigner.py +++ b/yapf/pytree/subtype_assigner.py @@ -24,9 +24,9 @@ subtypes. """ -from lib2to3 import pytree -from lib2to3.pgen2 import token as grammar_token -from lib2to3.pygram import python_symbols as syms +from yapf_third_party._ylib2to3 import pytree +from yapf_third_party._ylib2to3.pgen2 import token as grammar_token +from yapf_third_party._ylib2to3.pygram import python_symbols as syms from yapf.pytree import pytree_utils from yapf.pytree import pytree_visitor diff --git a/yapf/yapflib/errors.py b/yapf/yapflib/errors.py index 99e88d9c0..3a0102368 100644 --- a/yapf/yapflib/errors.py +++ b/yapf/yapflib/errors.py @@ -13,7 +13,7 @@ # limitations under the License. """YAPF error objects.""" -from lib2to3.pgen2 import tokenize +from yapf_third_party._ylib2to3.pgen2 import tokenize def FormatErrorMsg(e): diff --git a/yapf/yapflib/file_resources.py b/yapf/yapflib/file_resources.py index 31be6f6c5..977a2568f 100644 --- a/yapf/yapflib/file_resources.py +++ b/yapf/yapflib/file_resources.py @@ -23,7 +23,6 @@ import re import sys from configparser import ConfigParser -from io import StringIO from tokenize import detect_encoding from yapf.yapflib import errors diff --git a/yapf/yapflib/format_decision_state.py b/yapf/yapflib/format_decision_state.py index 2468b1f9a..0c8643f20 100644 --- a/yapf/yapflib/format_decision_state.py +++ b/yapf/yapflib/format_decision_state.py @@ -1096,8 +1096,10 @@ def _ContainerFitsOnStartLine(self, opening): self.stack[-1].indent) <= self.column_limit -_COMPOUND_STMTS = frozenset( - {'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'}) +_COMPOUND_STMTS = frozenset({ + 'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class', 'match', + 'case' +}) def _IsCompoundStatement(token): diff --git a/yapf/yapflib/format_token.py b/yapf/yapflib/format_token.py index c572391e3..141c2650e 100644 --- a/yapf/yapflib/format_token.py +++ b/yapf/yapflib/format_token.py @@ -16,7 +16,9 @@ import keyword import re from functools import lru_cache -from lib2to3.pgen2 import token + +from yapf_third_party._ylib2to3.pgen2 import token +from yapf_third_party._ylib2to3.pytree import type_repr from yapf.pytree import pytree_utils from yapf.yapflib import style @@ -278,7 +280,11 @@ def is_continuation(self): @property @lru_cache() def is_keyword(self): - return keyword.iskeyword(self.value) + return keyword.iskeyword( + self.value) or (self.value == 'match' and + type_repr(self.node.parent.type) == 'match_stmt') or ( + self.value == 'case' and + type_repr(self.node.parent.type) == 'case_block') @property def is_name(self): diff --git a/yapf/yapflib/identify_container.py b/yapf/yapflib/identify_container.py index d027cc5d4..43ba4b9ed 100644 --- a/yapf/yapflib/identify_container.py +++ b/yapf/yapflib/identify_container.py @@ -19,7 +19,7 @@ IdentifyContainers(): the main function exported by this module. """ -from lib2to3.pgen2 import token as grammar_token +from yapf_third_party._ylib2to3.pgen2 import token as grammar_token from yapf.pytree import pytree_utils from yapf.pytree import pytree_visitor diff --git a/yapf/yapflib/logical_line.py b/yapf/yapflib/logical_line.py index 780db8a2a..4433276f7 100644 --- a/yapf/yapflib/logical_line.py +++ b/yapf/yapflib/logical_line.py @@ -19,7 +19,7 @@ perform the wrapping required to comply with the style guide. """ -from lib2to3.fixer_util import syms as python_symbols +from yapf_third_party._ylib2to3.fixer_util import syms as python_symbols from yapf.pytree import pytree_utils from yapf.pytree import split_penalty diff --git a/yapf/yapflib/reformatter.py b/yapf/yapflib/reformatter.py index b7c883e5f..a74b44f6b 100644 --- a/yapf/yapflib/reformatter.py +++ b/yapf/yapflib/reformatter.py @@ -22,8 +22,9 @@ import collections import heapq import re -from lib2to3 import pytree -from lib2to3.pgen2 import token + +from yapf_third_party._ylib2to3 import pytree +from yapf_third_party._ylib2to3.pgen2 import token from yapf.pytree import pytree_utils from yapf.yapflib import format_decision_state diff --git a/yapf/yapflib/yapf_api.py b/yapf/yapflib/yapf_api.py index be5467a38..cac62be52 100644 --- a/yapf/yapflib/yapf_api.py +++ b/yapf/yapflib/yapf_api.py @@ -35,7 +35,6 @@ import codecs import difflib import re -import sys from yapf.pyparser import pyparser from yapf.pytree import blank_line_calculator diff --git a/yapftests/format_token_test.py b/yapftests/format_token_test.py index 5703e5a8b..59c167c13 100644 --- a/yapftests/format_token_test.py +++ b/yapftests/format_token_test.py @@ -14,8 +14,9 @@ """Tests for yapf.format_token.""" import unittest -from lib2to3 import pytree -from lib2to3.pgen2 import token + +from yapf_third_party._ylib2to3 import pytree +from yapf_third_party._ylib2to3.pgen2 import token from yapf.yapflib import format_token diff --git a/yapftests/logical_line_test.py b/yapftests/logical_line_test.py index 9188b32fc..bcb2b6133 100644 --- a/yapftests/logical_line_test.py +++ b/yapftests/logical_line_test.py @@ -15,8 +15,9 @@ import textwrap import unittest -from lib2to3 import pytree -from lib2to3.pgen2 import token + +from yapf_third_party._ylib2to3 import pytree +from yapf_third_party._ylib2to3.pgen2 import token from yapf.pytree import split_penalty from yapf.yapflib import format_token diff --git a/yapftests/pytree_utils_test.py b/yapftests/pytree_utils_test.py index b228fcb46..a0be50d03 100644 --- a/yapftests/pytree_utils_test.py +++ b/yapftests/pytree_utils_test.py @@ -14,9 +14,10 @@ """Tests for yapf.pytree_utils.""" import unittest -from lib2to3 import pygram -from lib2to3 import pytree -from lib2to3.pgen2 import token + +from yapf_third_party._ylib2to3 import pygram +from yapf_third_party._ylib2to3 import pytree +from yapf_third_party._ylib2to3.pgen2 import token from yapf.pytree import pytree_utils diff --git a/yapftests/reformatter_basic_test.py b/yapftests/reformatter_basic_test.py index 2edbc3175..034c45d6c 100644 --- a/yapftests/reformatter_basic_test.py +++ b/yapftests/reformatter_basic_test.py @@ -23,6 +23,7 @@ from yapftests import yapf_test_helper PY38 = sys.version_info[0] >= 3 and sys.version_info[1] >= 8 +PY310 = sys.version_info[0] >= 3 and sys.version_info[1] >= 10 class BasicReformatterTest(yapf_test_helper.YAPFTest): @@ -3177,6 +3178,42 @@ def testWalrus(self): llines = yapf_test_helper.ParseAndUnwrap(unformatted_code) self.assertCodeEqual(expected, reformatter.Reformat(llines)) + @unittest.skipUnless(PY310, 'Requires Python 3.10') + def testStructuredPatternMatching(self): + unformatted_code = textwrap.dedent("""\ + match command.split(): + case[action ]: + ... # interpret single-verb action + case[action, obj]: + ... # interpret action, obj + """) + expected = textwrap.dedent("""\ + match command.split(): + case [action]: + ... # interpret single-verb action + case [action, obj]: + ... # interpret action, obj + """) + llines = yapf_test_helper.ParseAndUnwrap(unformatted_code) + self.assertCodeEqual(expected, reformatter.Reformat(llines)) + + @unittest.skipUnless(PY310, 'Requires Python 3.10') + def testParenthesizedContextManagers(self): + unformatted_code = textwrap.dedent("""\ + with (cert_authority.cert_pem.tempfile() as ca_temp_path, patch.object(os, 'environ', os.environ | {'REQUESTS_CA_BUNDLE': ca_temp_path}),): + httpserver_url = httpserver.url_for('/resource.jar') + """) # noqa: E501 + expected = textwrap.dedent("""\ + with ( + cert_authority.cert_pem.tempfile() as ca_temp_path, + patch.object(os, 'environ', + os.environ | {'REQUESTS_CA_BUNDLE': ca_temp_path}), + ): + httpserver_url = httpserver.url_for('/resource.jar') + """) + llines = yapf_test_helper.ParseAndUnwrap(unformatted_code) + self.assertCodeEqual(expected, reformatter.Reformat(llines)) + if __name__ == '__main__': unittest.main() diff --git a/yapftests/split_penalty_test.py b/yapftests/split_penalty_test.py index c08cf5a28..5f9a2189f 100644 --- a/yapftests/split_penalty_test.py +++ b/yapftests/split_penalty_test.py @@ -16,7 +16,8 @@ import sys import textwrap import unittest -from lib2to3 import pytree + +from yapf_third_party._ylib2to3 import pytree from yapf.pytree import pytree_utils from yapf.pytree import pytree_visitor diff --git a/yapftests/style_test.py b/yapftests/style_test.py index 731c1d98b..12db72383 100644 --- a/yapftests/style_test.py +++ b/yapftests/style_test.py @@ -230,7 +230,7 @@ def testErrorUnknownStyleOption(self): def testPyprojectTomlNoYapfSection(self): try: - import tomli + import tomli # noqa: F401 except ImportError: return @@ -242,7 +242,7 @@ def testPyprojectTomlNoYapfSection(self): def testPyprojectTomlParseYapfSection(self): try: - import tomli + import tomli # noqa: F401 except ImportError: return diff --git a/yapftests/yapf_test.py b/yapftests/yapf_test.py index 7c241bdda..8a31623e1 100644 --- a/yapftests/yapf_test.py +++ b/yapftests/yapf_test.py @@ -24,7 +24,8 @@ import textwrap import unittest from io import StringIO -from lib2to3.pgen2 import tokenize + +from yapf_third_party._ylib2to3.pgen2 import tokenize from yapf.yapflib import errors from yapf.yapflib import style