From 244c0edd0f25d384bc3f0a4a0a2963504d79ed10 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 15 Sep 2017 17:34:58 -0700 Subject: [PATCH] * src/lexer.l: properly handle backslash escaping in strings and regex We used to look at a string literal like "\\" as not ending, and instead mistook \" as escaping the ending quotes. This (longstanding, embarrassing) bug would lead to weird syntax errors in lenses when encountered. Fixes https://github.com/hercules-team/augeas/issues/495 --- NEWS | 3 +++ lenses/tests/test_httpd.aug | 4 ++-- lenses/tests/test_json.aug | 4 ++-- src/lexer.l | 6 +++--- tests/modules/pass_lexer.aug | 13 +++++++++++++ 5 files changed, 23 insertions(+), 7 deletions(-) create mode 100644 tests/modules/pass_lexer.aug diff --git a/NEWS b/NEWS index 56b89dd49..9c8c00769 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,9 @@ back to text fails. They now make it clearer what part of the tree was problematic, and what the tree should have looked like. * Fixed the pkg-config file, which should now be usable + * Fix handling of backslash-escaping in strings and regular expressions + in the lens language. We used to handle constructs like "\\" and + /\\\\/ incorrectly. (Issue #495) - API changes * add function aug_ns_attr to allow iterating through a nodeset quickly. See examples/dump.c for an example of how to use them diff --git a/lenses/tests/test_httpd.aug b/lenses/tests/test_httpd.aug index c860860fc..ce74b059e 100644 --- a/lenses/tests/test_httpd.aug +++ b/lenses/tests/test_httpd.aug @@ -156,10 +156,10 @@ test Httpd.lns get c5 = { "arg" = "agent" } } -let c7 = "LogFormat \"%v:%p %h %l %u %t \\"%r\\" %>s %O \\"%{Referer}i\\" \\"%{User-Agent}i\\"\" vhost_combined\n" +let c7 = "LogFormat \"%v:%p %h %l %u %t \\\"%r\\\" %>s %O \\\"%{Referer}i\\\" \\\"%{User-Agent}i\\\"\" vhost_combined\n" test Httpd.lns get c7 = { "directive" = "LogFormat" - { "arg" = "\"%v:%p %h %l %u %t \\"%r\\" %>s %O \\"%{Referer}i\\" \\"%{User-Agent}i\\"\"" } + { "arg" = "\"%v:%p %h %l %u %t \\\"%r\\\" %>s %O \\\"%{Referer}i\\\" \\\"%{User-Agent}i\\\"\"" } { "arg" = "vhost_combined" } } diff --git a/lenses/tests/test_json.aug b/lenses/tests/test_json.aug index d18b03b26..cb61cef10 100644 --- a/lenses/tests/test_json.aug +++ b/lenses/tests/test_json.aug @@ -489,8 +489,8 @@ test lns get s_commented = (* Test lns Allow escaped quotes, backslashes and tabs/newlines *) -test lns get "{ \"filesystem\": \"ext3\\" \\\\ \t \r\n SEC_TYPE=\\"ext2\" }\n" = +test lns get "{ \"filesystem\": \"ext3\\\" \\\\ \t \r\n SEC_TYPE=\\\"ext2\" }\n" = { "dict" { "entry" = "filesystem" - { "string" = "ext3\\" \\\\ \t \r\n SEC_TYPE=\\"ext2" } } + { "string" = "ext3\\\" \\\\ \t \r\n SEC_TYPE=\\\"ext2" } } { } } diff --git a/src/lexer.l b/src/lexer.l index 519f01344..78d4a89a0 100644 --- a/src/lexer.l +++ b/src/lexer.l @@ -87,20 +87,20 @@ ARROW -> { - \"([^\"]|\\\")*\" { + \"([^\\\"]|\\(.|\n))*\" { loc_update(yylloc, yytext, yyleng); yylval->string = unescape(yytext+1, yyleng-2, STR_ESCAPES); return DQUOTED; } - \/([^/]|\\\/)*\/i { + \/([^\\\/]|\\(.|\n))*\/i { loc_update(yylloc, yytext, yyleng); yylval->regexp.nocase = 1; yylval->regexp.pattern = regexp_literal(yytext+1, yyleng-3); return REGEXP; } - \/([^/]|\\\/)*\/ { + \/([^\\\/]|\\(.|\n))*\/ { loc_update(yylloc, yytext, yyleng); yylval->regexp.nocase = 0; yylval->regexp.pattern = regexp_literal(yytext+1, yyleng-2); diff --git a/tests/modules/pass_lexer.aug b/tests/modules/pass_lexer.aug new file mode 100644 index 000000000..703f4e8eb --- /dev/null +++ b/tests/modules/pass_lexer.aug @@ -0,0 +1,13 @@ +module Pass_Lexer = + + (* Some tests for corner cases for the lexer; they will all lead to + * syntax errors if we are not lexing correctly *) + + let s1 = "\\" + let s2 = "\ +" + + let r1 = /\\\\/ + + let slash = "/" (* Just here to cause trouble if the lexer does not + * properly terminate the above expressions *)