Skip to content

Commit

Permalink
Rework correction of line continuation in strings
Browse files Browse the repository at this point in the history
- Actually provide line continuation in strings that complies to the
  specification, which the fix provided by rspivak/slimit#24 was
  problematic (breaks position tracking) and incomplete as it amounted
  to no more than a clumsy workaround (come on, duplicated regex code).
- Also provide the expected minified output that would strip out these
  line continuation fragments.
  • Loading branch information
metatoaster committed Apr 22, 2018
1 parent 5074001 commit 3e5ee6c
Show file tree
Hide file tree
Showing 11 changed files with 115 additions and 36 deletions.
6 changes: 6 additions & 0 deletions src/calmjs/parse/handlers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
Indent,
Dedent,
)
from calmjs.parse.lexers.es5 import PATT_LINE_CONTINUATION

required_space = re.compile(r'^(?:\w\w|\+\+|\-\-)$')

Expand Down Expand Up @@ -179,6 +180,11 @@ def layout_handler_space_minimum(dispatcher, node, before, after, prev):
yield space_imply


def deferrable_handler_literal_continuation(dispatcher, node):
# assume the es5 method will continue to work.
return PATT_LINE_CONTINUATION.sub('', node.value)


def default_rules():
return {'layout_handlers': {
OpenBlock: layout_handler_openbrace,
Expand Down
35 changes: 8 additions & 27 deletions src/calmjs/parse/lexers/es5.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@

PATT_LINE_TERMINATOR_SEQUENCE = re.compile(
ur'(\n|\r(?!\n)|\u2028|\u2029|\r\n)', flags=re.S)
PATT_LINE_CONTINUATION = re.compile(
ur'\\(\n|\r(?!\n)|\u2028|\u2029|\r\n)', flags=re.S)


class Lexer(object):
Expand Down Expand Up @@ -505,57 +507,36 @@ def t_regex_error(self, token):
)
"""

string = r"""
string = ur"""
(?:
# double quoted string
(?:" # opening double quote
(?: [^"\\\n\r] # no \, line terminators or "
| \\[a-zA-Z!-\/:-@\[-`{-~] # or escaped characters
(?: [^"\\\n\r\u2028\u2029] # no \, line terminators or "
| \\(\n|\r(?!\n)|\u2028|\u2029|\r\n) # or line continuation
| \\[a-zA-Z!-\/:-@\[-`{-~] # or escaped characters or
| \\x[0-9a-fA-F]{2} # or hex_escape_sequence
| \\u[0-9a-fA-F]{4} # or unicode_escape_sequence
| \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # or octal_escape_sequence
| \\0 # or <NUL> (15.10.2.11)
)*? # zero or many times
(?: \\\n # multiline ?
(?:
[^"\\\n\r] # no \, line terminators or "
| \\[a-zA-Z!-\/:-@\[-`{-~] # or escaped characters
| \\x[0-9a-fA-F]{2} # or hex_escape_sequence
| \\u[0-9a-fA-F]{4} # or unicode_escape_sequence
| \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # or octal_escape_sequence
| \\0 # or <NUL> (15.10.2.11)
)*? # zero or many times
)*
") # closing double quote
|
# single quoted string
(?:' # opening single quote
(?: [^'\\\n\r] # no \, line terminators or '
(?: [^'\\\n\r\u2028\u2029] # no \, line terminators or "
| \\(\n|\r(?!\n)|\u2028|\u2029|\r\n) # or line continuation
| \\[a-zA-Z!-\/:-@\[-`{-~] # or escaped characters
| \\x[0-9a-fA-F]{2} # or hex_escape_sequence
| \\u[0-9a-fA-F]{4} # or unicode_escape_sequence
| \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # or octal_escape_sequence
| \\0 # or <NUL> (15.10.2.11)
)*? # zero or many times
(?: \\\n # multiline ?
(?:
[^'\\\n\r] # no \, line terminators or '
| \\[a-zA-Z!-\/:-@\[-`{-~] # or escaped characters
| \\x[0-9a-fA-F]{2} # or hex_escape_sequence
| \\u[0-9a-fA-F]{4} # or unicode_escape_sequence
| \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # or octal_escape_sequence
| \\0 # or <NUL> (15.10.2.11)
)*? # zero or many times
)*
') # closing single quote
)
""" # "

@ply.lex.TOKEN(string)
def t_STRING(self, token):
# remove escape + new line sequence used for strings
# written across multiple lines of code
token.value = token.value.replace('\\\n', '')
return token

# XXX: <ZWNJ> <ZWJ> ?
Expand Down
10 changes: 9 additions & 1 deletion src/calmjs/parse/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Indent,
Dedent,
Resolve,
Literal,
)
from calmjs.parse.handlers.core import (
rule_handler_noop,
Expand All @@ -34,6 +35,8 @@
layout_handler_space_optional_pretty,
layout_handler_space_minimum,

deferrable_handler_literal_continuation,

default_rules,
minimum_rules,
)
Expand Down Expand Up @@ -111,7 +114,12 @@ def minify(drop_semi=True):
})

def minify_rule():
return {'layout_handlers': layout_handlers}
return {
'layout_handlers': layout_handlers,
'deferrable_handlers': {
Literal: deferrable_handler_literal_continuation,
},
}

return minify_rule

Expand Down
13 changes: 13 additions & 0 deletions src/calmjs/parse/ruletypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,5 +478,18 @@ def __call__(self, dispatcher, node):
return node.value


class Literal(Deferrable):
"""
Provides special handling for literals such as strings.
"""

def __call__(self, dispatcher, node):
handler = dispatcher.deferrable(self)
if handler is not NotImplemented:
# the handler will return the value
return handler(dispatcher, node)
return node.value


children_newline = JoinAttr(Iter(), value=(Newline,))
children_comma = JoinAttr(Iter(), value=(Text(value=','), Space,))
34 changes: 28 additions & 6 deletions src/calmjs/parse/tests/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,12 @@
# multiline string (string written across multiple lines
# of code) https://github.com/rspivak/slimit/issues/24
'slimit_issue_24_multi_line_code_double',
(r"""var a = 'hello \
world'""",
['VAR var', 'ID a', 'EQ =', "STRING 'hello world'"]),
("var a = 'hello \\\n world'""",
['VAR var', 'ID a', 'EQ =', "STRING 'hello \\\n world'"]),
), (
'slimit_issue_24_multi_line_code_single',
(r'''var a = "hello \
world"''',
['VAR var', 'ID a', 'EQ =', 'STRING "hello world"']),
('var a = "hello \\\r world"',
['VAR var', 'ID a', 'EQ =', 'STRING "hello \\\r world"']),
), (
# # Comments
# ("""
Expand Down Expand Up @@ -354,6 +352,30 @@
'PERIOD .', 'ID split', 'LPAREN (', r"STRING '\1'", 'RPAREN )',
'PERIOD .', 'ID split', 'LPAREN (', r"STRING '\0'", 'RPAREN )',
'SEMI ;'])
), (
'section_7_8_4_string_literal_with_7_3_conformance',
("'<LF>\\\n<CR>\\\r<LS>\\\u2028<PS>\\\u2029<CR><LF>\\\r\n'",
["STRING '<LF>\\\n<CR>\\\r<LS>\\\u2028<PS>\\\u2029<CR><LF>\\\r\n'"])
),
]

es5_error_cases = [
(
'naked_line_separator_in_string',
"'test\u2028foo'",
'Illegal character "\'" at 1:1',
), (
'naked_line_feed_in_string',
"'test\u2029foo'",
'Illegal character "\'" at 1:1',
), (
'naked_crnl_in_string',
"'test\r\nfoo'",
'Illegal character "\'" at 1:1',
), (
'naked_cr_in_string',
"'test\\\n\rfoo'",
'Illegal character "\'" at 1:1',
)
]

Expand Down
8 changes: 7 additions & 1 deletion src/calmjs/parse/tests/test_es5_lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@
from calmjs.parse.exceptions import ECMASyntaxError

from calmjs.parse.testing.util import build_equality_testcase
from calmjs.parse.testing.util import build_exception_testcase
from calmjs.parse.tests.lexer import (
run_lexer,
run_lexer_pos,
es5_cases,
es5_pos_cases,
es5_error_cases,
)


Expand Down Expand Up @@ -68,7 +70,7 @@ def test_extra_ending_braces(self):


LexerKeywordTestCase = build_equality_testcase(
'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), (
'LexerKeywordTestCase', partial(run_lexer, lexer_cls=Lexer), (
(label, data[0], data[1],) for label, data in [(
# Keywords
# ('break case ...', ['BREAK break', 'CASE case', ...])
Expand All @@ -84,6 +86,10 @@ def test_extra_ending_braces(self):
'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), (
(label, data[0], data[1],) for label, data in es5_cases))

LexerErrorTestCase = build_exception_testcase(
'LexerErrorTestCase', partial(
run_lexer, lexer_cls=Lexer), es5_error_cases, ECMASyntaxError)

LexerPosTestCase = build_equality_testcase(
'LexerPosTestCase', partial(
run_lexer_pos, lexer_cls=Lexer), es5_pos_cases)
9 changes: 9 additions & 0 deletions src/calmjs/parse/tests/test_es5_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2551,6 +2551,15 @@ def regenerate(value):
'exc';
""",
"Unexpected \"'exc'\" at 2:3 after 'throw' at 1:1",
), (
# note that the line continuation do not swallow
'throw_error_after_line_continuation_lineno',
r"""
s = 'something \
'
throw;
""",
"Unexpected ';' at 3:6 after 'throw' at 3:1",
), (
'setter_single_arg',
"""
Expand Down
15 changes: 15 additions & 0 deletions src/calmjs/parse/tests/test_es5_unparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1759,6 +1759,14 @@ def parse_to_sourcemap_tokens_min(text):
"""
for (index in [1, 2, 3]) /^salign$/;
""",
), (
'line_continuation_string',
r"""
{
var a = "\
";
}
""",
)]))
)

Expand Down Expand Up @@ -2147,6 +2155,13 @@ def parse_to_sourcemap_tokens_minify(text):
})();
""",
'(function $(){(function a(){var a=1;})();})();',
), (
'line_continuation_string',
r"""
var a = "\
";
""",
'var a=" ";',
)])
)

Expand Down
8 changes: 8 additions & 0 deletions src/calmjs/parse/tests/test_handlers_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
layout_handler_openbrace,
layout_handler_closebrace,
layout_handler_semicolon,
deferrable_handler_literal_continuation,
)
from calmjs.parse.unparsers.walker import Dispatcher

Expand Down Expand Up @@ -151,3 +152,10 @@ def run(before, after, prev):

# The first layout rule
self.assertEqual(run(';', 'function', None), newline)

def test_deferrable_handler_literal_continuation(self):
dispatcher = Dispatcher({}, None, {}, {})
node = Node()
node.value = '"foo\\\r\nbar"'
self.assertEqual('"foobar"', deferrable_handler_literal_continuation(
dispatcher, node))
10 changes: 10 additions & 0 deletions src/calmjs/parse/tests/test_ruletypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
import unittest

from calmjs.parse.asttypes import Identifier
from calmjs.parse.asttypes import String
from calmjs.parse.ruletypes import Declare
from calmjs.parse.ruletypes import Resolve
from calmjs.parse.ruletypes import Literal


class Node(object):
Expand Down Expand Up @@ -119,3 +121,11 @@ def test_resolve(self):
# if the handler is not implemented
self.handler = NotImplemented
self.assertEqual('value', rslv(self.dispatcher, identifier))

def test_literal_string(self):
literal = Literal()
string = String('"value"')
self.assertEqual('"VALUE"', literal(self.dispatcher, string))
# if the handler is not implemented
self.handler = NotImplemented
self.assertEqual('"value"', literal(self.dispatcher, string))
3 changes: 2 additions & 1 deletion src/calmjs/parse/unparsers/es5.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
ElisionJoinAttr,
)
from calmjs.parse.ruletypes import (
Literal,
Declare,
Resolve,
ResolveFuncName,
Expand Down Expand Up @@ -154,7 +155,7 @@
Text(value='null'),
),
'String': (
Attr(attr='value'),
Attr(Literal()),
),
'Continue': (
Text(value='continue'), Optional('identifier', (
Expand Down

0 comments on commit 3e5ee6c

Please sign in to comment.