diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithEscapedChar.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithEscapedChar.stg index 04dd3c3c58..49ba75faff 100644 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithEscapedChar.stg +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithEscapedChar.stg @@ -19,5 +19,5 @@ Errors() ::= "" grammar(grammarName) ::= << lexer grammar ; DASHBRACK : [\\-\]]+ {} ; -WS : [ \u]+ -> skip ; +WS : [ ]+ -> skip ; >> diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithMissingEscapeChar.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithMissingEscapeChar.stg index 61d1cef90c..188242277e 100644 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithMissingEscapeChar.stg +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithMissingEscapeChar.stg @@ -17,5 +17,5 @@ Errors() ::= "" grammar(grammarName) ::= << lexer grammar ; I : [0-9]+ {} ; -WS : [ \u]+ -> skip ; +WS : [ ]+ -> skip ; >> diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithReversedRange.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithReversedRange.stg deleted file mode 100644 index f2028a6c47..0000000000 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/CharSetWithReversedRange.stg +++ /dev/null @@ -1,21 +0,0 @@ -TestType() ::= "Lexer" - -Grammar ::= [ - "L": {} -] - -Input() ::= "9" - -Output() ::= << -A -[@0,0:0='9',\<1>,1:0] -[@1,1:0='\',\<-1>,1:1]<\n> ->> - -Errors() ::= "" - -grammar(grammarName) ::= << -lexer grammar ; -A : [z-a9]+ {} ; -WS : [ \u]+ -> skip ; ->> diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/Index.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/Index.stg index d6477c881e..7e495ad876 100644 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/Index.stg +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/LexerExec/Index.stg @@ -31,7 +31,6 @@ TestTemplates ::= [ "CharSetWithMissingEndRange": [], "CharSetWithMissingEscapeChar": [], "CharSetWithEscapedChar": [], - "CharSetWithReversedRange": [], "CharSetWithQuote1": [], "CharSetWithQuote2": [], "PositionAdjustingLexer": [], diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/csharp/BaseTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/csharp/BaseTest.java index 23eb6eca40..3a1a96ab6c 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/csharp/BaseTest.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/csharp/BaseTest.java @@ -485,6 +485,8 @@ public boolean createProject() { XPathExpression exp = XPathFactory.newInstance().newXPath() .compile("/Project/ItemGroup/ProjectReference[@Include='" + runtimeName + "']"); Element node = (Element)exp.evaluate(prjXml, XPathConstants.NODE); + if (isWindows() && runtimeProjPath.startsWith("/")) + runtimeProjPath = runtimeProjPath.substring(1); node.setAttribute("Include", runtimeProjPath.replace("/", "\\")); // update project file list exp = XPathFactory.newInstance().newXPath().compile("/Project/ItemGroup[Compile/@Include='AssemblyInfo.cs']"); @@ -533,7 +535,8 @@ public String execTest() { String exec = locateExec(); String[] args = isWindows() ? new String[] { exec, new File(tmpdir, "input").getAbsolutePath() } : - new String[] { "mono", exec, new File(tmpdir, "input").getAbsolutePath() }; + // inline disabling to avoid "Method is too complex" exception. + new String[] { "mono", "-O=-inline", exec, new File(tmpdir, "input").getAbsolutePath() }; ProcessBuilder pb = new ProcessBuilder(args); pb.directory(new File(tmpdir)); Process process = pb.start(); diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/csharp/TestLexerExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/csharp/TestLexerExec.java index 96b057a2e2..10e9958723 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/csharp/TestLexerExec.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/csharp/TestLexerExec.java @@ -157,7 +157,7 @@ public void testCharSetWithEscapedChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(94); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("DASHBRACK : [\\-\\]]+ {Console.WriteLine(\"DASHBRACK\");} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="- ] "; String found = execLexer("L.g4", grammar, "L", input, false); @@ -197,7 +197,7 @@ public void testCharSetWithMissingEscapeChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(77); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("I : [0-9]+ {Console.WriteLine(\"I\");} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="34 "; String found = execLexer("L.g4", grammar, "L", input, false); @@ -245,25 +245,6 @@ public void testCharSetWithQuote2() throws Exception { "[@1,4:3='',<-1>,1:4]\n", found); assertNull(this.stderrDuringParse); - } - /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ - @Test - public void testCharSetWithReversedRange() throws Exception { - mkdir(tmpdir); - - StringBuilder grammarBuilder = new StringBuilder(78); - grammarBuilder.append("lexer grammar L;\n"); - grammarBuilder.append("A : [z-a9]+ {Console.WriteLine(\"A\");} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); - String grammar = grammarBuilder.toString(); - String input ="9"; - String found = execLexer("L.g4", grammar, "L", input, false); - assertEquals( - "A\n" + - "[@0,0:0='9',<1>,1:0]\n" + - "[@1,1:0='',<-1>,1:1]\n", found); - assertNull(this.stderrDuringParse); - } /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ @Test diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestLexerExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestLexerExec.java index ed74b1edae..6d59ce4c6c 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestLexerExec.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestLexerExec.java @@ -170,7 +170,7 @@ public void testCharSetWithEscapedChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(95); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("DASHBRACK : [\\-\\]]+ {System.out.println(\"DASHBRACK\");} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="- ] "; @@ -214,7 +214,7 @@ public void testCharSetWithMissingEscapeChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(78); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("I : [0-9]+ {System.out.println(\"I\");} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="34 "; @@ -269,27 +269,6 @@ public void testCharSetWithQuote2() throws Exception { } - /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ - @Test - public void testCharSetWithReversedRange() throws Exception { - mkdir(tmpdir); - - StringBuilder grammarBuilder = new StringBuilder(79); - grammarBuilder.append("lexer grammar L;\n"); - grammarBuilder.append("A : [z-a9]+ {System.out.println(\"A\");} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); - String grammar = grammarBuilder.toString(); - - String input ="9"; - String found = execLexer("L.g4", grammar, "L", input, false); - assertEquals( - "A\n" + - "[@0,0:0='9',<1>,1:0]\n" + - "[@1,1:0='',<-1>,1:1]\n", found); - assertNull(this.stderrDuringParse); - - } - /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ @Test public void testEOFByItself() throws Exception { diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/javascript/node/TestLexerExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/javascript/node/TestLexerExec.java index 7b353bea7e..ce1fd0ff3a 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/javascript/node/TestLexerExec.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/javascript/node/TestLexerExec.java @@ -159,7 +159,7 @@ public void testCharSetWithEscapedChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(88); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("DASHBRACK : [\\-\\]]+ {console.log(\"DASHBRACK\");} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="- ] "; String found = execLexer("L.g4", grammar, "L", input, false); @@ -199,7 +199,7 @@ public void testCharSetWithMissingEscapeChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(71); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("I : [0-9]+ {console.log(\"I\");} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="34 "; String found = execLexer("L.g4", grammar, "L", input, false); @@ -247,25 +247,6 @@ public void testCharSetWithQuote2() throws Exception { "[@1,4:3='',<-1>,1:4]\n", found); assertNull(this.stderrDuringParse); - } - /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ - @Test - public void testCharSetWithReversedRange() throws Exception { - mkdir(tmpdir); - - StringBuilder grammarBuilder = new StringBuilder(72); - grammarBuilder.append("lexer grammar L;\n"); - grammarBuilder.append("A : [z-a9]+ {console.log(\"A\");} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); - String grammar = grammarBuilder.toString(); - String input ="9"; - String found = execLexer("L.g4", grammar, "L", input, false); - assertEquals( - "A\n" + - "[@0,0:0='9',<1>,1:0]\n" + - "[@1,1:0='',<-1>,1:1]\n", found); - assertNull(this.stderrDuringParse); - } /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ @Test diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/TestLexerExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/TestLexerExec.java index b9b1186e8c..a9c97bb5dc 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/TestLexerExec.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/TestLexerExec.java @@ -170,7 +170,7 @@ public void testCharSetWithEscapedChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(81); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("DASHBRACK : [\\-\\]]+ {print(\"DASHBRACK\")} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="- ] "; @@ -214,7 +214,7 @@ public void testCharSetWithMissingEscapeChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(64); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("I : [0-9]+ {print(\"I\")} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="34 "; @@ -269,27 +269,6 @@ public void testCharSetWithQuote2() throws Exception { } - /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ - @Test - public void testCharSetWithReversedRange() throws Exception { - mkdir(tmpdir); - - StringBuilder grammarBuilder = new StringBuilder(65); - grammarBuilder.append("lexer grammar L;\n"); - grammarBuilder.append("A : [z-a9]+ {print(\"A\")} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); - String grammar = grammarBuilder.toString(); - - String input ="9"; - String found = execLexer("L.g4", grammar, "L", input, false); - assertEquals( - "A\n" + - "[@0,0:0='9',<1>,1:0]\n" + - "[@1,1:0='',<-1>,1:1]\n", found); - assertNull(this.stderrDuringParse); - - } - /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ @Test public void testEOFByItself() throws Exception { diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/python3/TestLexerExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/python3/TestLexerExec.java index 1d87851ef4..4167349975 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/python3/TestLexerExec.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/python3/TestLexerExec.java @@ -170,7 +170,7 @@ public void testCharSetWithEscapedChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(81); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("DASHBRACK : [\\-\\]]+ {print(\"DASHBRACK\")} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="- ] "; @@ -214,7 +214,7 @@ public void testCharSetWithMissingEscapeChar() throws Exception { StringBuilder grammarBuilder = new StringBuilder(64); grammarBuilder.append("lexer grammar L;\n"); grammarBuilder.append("I : [0-9]+ {print(\"I\")} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); + grammarBuilder.append("WS : [ ]+ -> skip ;"); String grammar = grammarBuilder.toString(); String input ="34 "; @@ -269,27 +269,6 @@ public void testCharSetWithQuote2() throws Exception { } - /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ - @Test - public void testCharSetWithReversedRange() throws Exception { - mkdir(tmpdir); - - StringBuilder grammarBuilder = new StringBuilder(65); - grammarBuilder.append("lexer grammar L;\n"); - grammarBuilder.append("A : [z-a9]+ {print(\"A\")} ;\n"); - grammarBuilder.append("WS : [ \\u]+ -> skip ;"); - String grammar = grammarBuilder.toString(); - - String input ="9"; - String found = execLexer("L.g4", grammar, "L", input, false); - assertEquals( - "A\n" + - "[@0,0:0='9',<1>,1:0]\n" + - "[@1,1:0='',<-1>,1:1]\n", found); - assertNull(this.stderrDuringParse); - - } - /* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ @Test public void testEOFByItself() throws Exception { diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java index 08648f5c41..d99a3283bf 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java @@ -111,14 +111,14 @@ public void testA() throws Exception { "A : ('0x' | '0X') ;"); String expecting = "s0->RuleStart_A_1\n" + - "RuleStart_A_1->BlockStart_7\n" + - "BlockStart_7->s3\n" + - "BlockStart_7->s5\n" + - "s3-'0'->s4\n" + - "s5-'0'->s6\n" + - "s4-'x'->BlockEnd_8\n" + - "s6-'X'->BlockEnd_8\n" + - "BlockEnd_8->RuleStop_A_2\n"; + "RuleStart_A_1->BlockStart_11\n" + + "BlockStart_11->s3\n" + + "BlockStart_11->s7\n" + + "s3-'0'->s5\n" + + "s7-'0'->s9\n" + + "s5-'x'->BlockEnd_12\n" + + "s9-'X'->BlockEnd_12\n" + + "BlockEnd_12->RuleStop_A_2\n"; checkTokensRule(g, null, expecting); } @Test public void testRange() throws Exception { @@ -140,14 +140,14 @@ public void testA() throws Exception { ); String expecting = "s0->RuleStart_A_1\n" + - "RuleStart_A_1->BlockStart_7\n" + - "BlockStart_7->s3\n" + - "BlockStart_7->s5\n" + + "RuleStart_A_1->BlockStart_9\n" + + "BlockStart_9->s3\n" + + "BlockStart_9->s6\n" + "s3-'a'..'c'->s4\n" + - "s5-'q'->s6\n" + - "s4-'h'->BlockEnd_8\n" + - "s6-'j'..'l'->BlockEnd_8\n" + - "BlockEnd_8->RuleStop_A_2\n"; + "s6-'q'->s8\n" + + "s4-'h'->BlockEnd_10\n" + + "s8-'j'..'l'->BlockEnd_10\n" + + "BlockEnd_10->RuleStop_A_2\n"; checkTokensRule(g, null, expecting); } @Test public void testStringLiteralInParser() throws Exception { @@ -927,11 +927,11 @@ public void testA() throws Exception { "s0->RuleStart_A_2\n" + "s0->RuleStart_X_4\n" + "RuleStart_A_2->s10\n" + - "RuleStart_X_4->s12\n" + - "s10-'a'->s11\n" + - "s12-'x'->s13\n" + - "s11->RuleStop_A_3\n" + - "s13->RuleStop_X_5\n"; + "RuleStart_X_4->s13\n" + + "s10-'a'->s12\n" + + "s13-'x'->s15\n" + + "s12->RuleStop_A_3\n" + + "s15->RuleStop_X_5\n"; checkTokensRule(g, "DEFAULT_MODE", expecting); } @Test public void testMode() throws Exception { @@ -945,12 +945,12 @@ public void testA() throws Exception { String expecting = "s1->RuleStart_B_6\n" + "s1->RuleStart_C_8\n" + - "RuleStart_B_6->s14\n" + - "RuleStart_C_8->s16\n" + - "s14-'b'->s15\n" + - "s16-'c'->s17\n" + - "s15->RuleStop_B_7\n" + - "s17->RuleStop_C_9\n"; + "RuleStart_B_6->s16\n" + + "RuleStart_C_8->s19\n" + + "s16-'b'->s18\n" + + "s19-'c'->s21\n" + + "s18->RuleStop_B_7\n" + + "s21->RuleStop_C_9\n"; checkTokensRule(g, "FOO", expecting); } void checkTokensRule(LexerGrammar g, String modeName, String expecting) { diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNLexerInterpreter.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNLexerInterpreter.java index aaf28c3b07..48a84d6751 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNLexerInterpreter.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNLexerInterpreter.java @@ -43,6 +43,7 @@ import java.util.List; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; /** * Lexer rules are little quirky when it comes to wildcards. Problem @@ -309,6 +310,142 @@ public class TestATNLexerInterpreter extends BaseTest { checkLexerMatches(lg, "a", expecting); } + @Test public void testLexerCaseInsensitive() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n" + + "\n" + + "options { caseInsensitive = true; }" + + "\n" + + "WS: [ \\t\\r\\n] -> skip;\n" + + "\n" + + "SIMPLE_TOKEN: 'and';\n" + + "TOKEN_WITH_SPACES: 'as' 'd' 'f';\n" + + "TOKEN_WITH_DIGITS: 'INT64';\n" + + "TOKEN_WITH_UNDERSCORE: 'TOKEN_WITH_UNDERSCORE';\n" + + "BOOL: 'true' | 'FALSE';\n" + + "SPECIAL: '==';\n" + + "RANGE: [a-z0-9]+;\n" // [a-zA-Z0-9] + ); + + String inputString = + "and AND aND\n" + + "asdf ASDF\n" + + "int64\n" + + "token_WITH_underscore\n" + + "TRUE FALSE\n" + + "==\n" + + "A0bcDE93\n"; + + String expecting = Utils.join(new String[] { + "SIMPLE_TOKEN", "SIMPLE_TOKEN", "SIMPLE_TOKEN", + "TOKEN_WITH_SPACES", "TOKEN_WITH_SPACES", + "TOKEN_WITH_DIGITS", + "TOKEN_WITH_UNDERSCORE", + "BOOL", "BOOL", + "SPECIAL", + "RANGE", "EOF" }, + ", WS, "); + + checkLexerMatches(lg, inputString, expecting); + } + + @Test public void testLexerCaseInsensitiveWithNot() throws Exception { + String grammar = + "lexer grammar L;\n" + + "options { caseInsensitive = true; }" + + "TOKEN_WITH_NOT: ~'f';\n"; // ~('f' | 'F) + execLexer("L.g4", grammar, "L", "F"); + + assertEquals("line 1:0 token recognition error at: 'F'\n", stderrDuringParse); + } + + @Test public void testLexerCaseInsensitiveFragments() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n" + + "options { caseInsensitive = true; }" + + "TOKEN_0: FRAGMENT 'd'+;\n" + + "TOKEN_1: FRAGMENT 'e'+;\n" + + "FRAGMENT: 'abc';\n"); + + String inputString = + "ABCDDD"; + + String expecting = "TOKEN_0, EOF"; + + checkLexerMatches(lg, inputString, expecting); + } + + @Test public void testLexerCaseInsensitiveInModes() throws Exception { + String lg = + "lexer grammar L;\n" + + + "options { caseInsensitive = true; } \n" + // caseInsensitive + "Token_0: 'abc0_' -> mode(CASE_SENSITIVE);\n" + + + "mode CASE_SENSITIVE, caseSensitive; \n" + // caseSensitive + "Token_1: 'DEF1_' -> mode(CASE_INSENSITIVE);\n" + + + "mode CASE_INSENSITIVE; \n" + // default, i.e. caseInsensitive + "Token_2: 'gHi2_' -> mode(CASE_INSENSITIVE_EXPLICIT);\n" + + + "mode CASE_INSENSITIVE_EXPLICIT, caseInsensitive;\n" + // caseInsensitive + "Token_3: 'JkL3';"; + + String inputString = "ABC0_DEF1_GHI2_JKL3"; + + String result = execLexer("L.g4", lg, "L", "ABC0_DEF1_GHI2_JKL3", false); + + assertEquals("[@0,0:4='ABC0_',<1>,1:0]\n" + + "[@1,5:9='DEF1_',<2>,1:5]\n" + + "[@2,10:14='GHI2_',<3>,1:10]\n" + + "[@3,15:18='JKL3',<4>,1:15]\n" + + "[@4,19:18='',<-1>,1:19]\n", result); + } + + @Test public void testLexerCaseInsensitiveInOneMode() throws Exception { + String lg = + "lexer grammar L;\n" + + + "options { caseInsensitive = true; } \n" + // caseInsensitive + "Token_1: 'a_';\n" + + + "mode DEFAULT_MODE, caseInsensitive; \n" + // caseInsensitive + "Token_2: 'b_';\n" + + + "mode DEFAULT_MODE, caseSensitive; \n" + // caseSensitive + "Token_3: 'd';\n"; + + String result = execLexer("L.g4", lg, "L", "A_B_D", false); + assertEquals("line 1:4 token recognition error at: 'D'\n", stderrDuringParse); + assertEquals("[@0,0:1='A_',<1>,1:0]\n" + + "[@1,2:3='B_',<2>,1:2]\n" + + "[@2,5:4='',<-1>,1:5]\n", result); + } + + @Test public void testLexerCaseInsensitiveWithDifferentCultures() throws Exception { + // From here: http://www.periodni.com/unicode_utf-8_encoding.html + // TODO: Add tokens on Arabic, Japan, Chinese and other languages. + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n" + + "options { caseInsensitive = true; }" + + "ENGLISH_TOKEN: [a-z_]+;\n" + + "GERMAN_TOKEN: [äéöüß_]+;\n" + + "FRENCH_TOKEN: [àâæ-ëîïôœùûüÿ_]+;\n" + + "CROATIAN_TOKEN: [ćčđšž_]+;\n" + + "ITALIAN_TOKEN: [àèéìòù_]+;\n" + + "SPANISH_TOKEN: [áéíñóúü¡¿_]+;\n" + + "GREEK_TOKEN: [α-ω_]+;\n" + + "RUSSIAN_TOKEN: [а-я_]+;\n" + ); + + String inputString = "abcXYZ_äéöüßÄÉÖÜ_àâæçÙÛÜŸ_ćčđĐŠŽ_àèéÌÒÙ_áéÚÜ¡¿_αβγΧΨΩ_абвЭЮЯ_"; + + String expecting = "ENGLISH_TOKEN, GERMAN_TOKEN, FRENCH_TOKEN, CROATIAN_TOKEN, ITALIAN_TOKEN, SPANISH_TOKEN, " + + "GREEK_TOKEN, RUSSIAN_TOKEN, EOF"; + + checkLexerMatches(lg, inputString, expecting); + } + protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) { ATN atn = createATN(lg, true); CharStream input = new ANTLRInputStream(inputString); diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java index 2c1a2b098c..55b3a76bd4 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java @@ -291,19 +291,21 @@ public class TestATNSerialization extends BaseTest { "4:RULE_STOP 1\n" + "5:BASIC 0\n" + "6:BASIC 0\n" + - "7:BASIC 1\n" + + "7:BASIC 0\n" + "8:BASIC 1\n" + + "9:BASIC 1\n" + + "10:BASIC 1\n" + "rule 0:1 1\n" + "rule 1:3 2\n" + "mode 0:0\n" + "0->1 EPSILON 0,0,0\n" + "0->3 EPSILON 0,0,0\n" + "1->5 EPSILON 0,0,0\n" + - "3->7 EPSILON 0,0,0\n" + - "5->6 ATOM 97,0,0\n" + - "6->2 EPSILON 0,0,0\n" + - "7->8 ATOM 98,0,0\n" + - "8->4 EPSILON 0,0,0\n" + + "3->8 EPSILON 0,0,0\n" + + "5->7 ATOM 97,0,0\n" + + "7->2 EPSILON 0,0,0\n" + + "8->10 ATOM 98,0,0\n" + + "10->4 EPSILON 0,0,0\n" + "0:0\n"; ATN atn = createATN(lg, true); String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); @@ -345,13 +347,14 @@ public class TestATNSerialization extends BaseTest { "3:BASIC 0\n" + "4:BASIC 0\n" + "5:BASIC 0\n" + + "6:BASIC 0\n" + "rule 0:1 1\n" + "mode 0:0\n" + "0->1 EPSILON 0,0,0\n" + "1->3 EPSILON 0,0,0\n" + - "3->4 ATOM 97,0,0\n" + - "4->5 ATOM 0,0,1\n" + - "5->2 EPSILON 0,0,0\n" + + "3->5 ATOM 97,0,0\n" + + "5->6 ATOM 0,0,1\n" + + "6->2 EPSILON 0,0,0\n" + "0:0\n"; ATN atn = createATN(lg, true); String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); @@ -369,19 +372,21 @@ public class TestATNSerialization extends BaseTest { "2:RULE_STOP 0\n" + "3:BASIC 0\n" + "4:BASIC 0\n" + - "5:BLOCK_START 0 6\n" + - "6:BLOCK_END 0\n" + + "5:BASIC 0\n" + + "6:BASIC 0\n" + + "7:BLOCK_START 0 8\n" + + "8:BLOCK_END 0\n" + "rule 0:1 1\n" + "mode 0:0\n" + "0:EOF, '\\n'..'\\n'\n" + "0->1 EPSILON 0,0,0\n" + "1->3 EPSILON 0,0,0\n" + - "3->5 ATOM 97,0,0\n" + - "4->6 SET 0,0,0\n" + - "5->4 EPSILON 0,0,0\n" + - "6->2 EPSILON 0,0,0\n" + + "3->7 ATOM 97,0,0\n" + + "5->8 SET 0,0,0\n" + + "7->5 EPSILON 0,0,0\n" + + "8->2 EPSILON 0,0,0\n" + "0:0\n" + - "1:5\n"; + "1:7\n"; ATN atn = createATN(lg, true); String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); assertEquals(expecting, result); @@ -436,11 +441,14 @@ public class TestATNSerialization extends BaseTest { "7:BASIC 0\n" + "8:BASIC 0\n" + "9:BASIC 0\n" + - "10:BASIC 1\n" + + "10:BASIC 0\n" + "11:BASIC 1\n" + - "12:BASIC 2\n" + - "13:BASIC 2\n" + + "12:BASIC 1\n" + + "13:BASIC 1\n" + "14:BASIC 2\n" + + "15:BASIC 2\n" + + "16:BASIC 2\n" + + "17:BASIC 2\n" + "rule 0:1 1\n" + "rule 1:3 2\n" + "rule 2:5 3\n" + @@ -449,16 +457,16 @@ public class TestATNSerialization extends BaseTest { "0->3 EPSILON 0,0,0\n" + "0->5 EPSILON 0,0,0\n" + "1->7 EPSILON 0,0,0\n" + - "3->10 EPSILON 0,0,0\n" + - "5->12 EPSILON 0,0,0\n" + - "7->8 ATOM 97,0,0\n" + - "8->9 ACTION 0,0,0\n" + - "9->2 EPSILON 0,0,0\n" + - "10->11 ATOM 98,0,0\n" + - "11->4 EPSILON 0,0,0\n" + - "12->13 ATOM 99,0,0\n" + - "13->14 ACTION 2,1,0\n" + - "14->6 EPSILON 0,0,0\n" + + "3->11 EPSILON 0,0,0\n" + + "5->14 EPSILON 0,0,0\n" + + "7->9 ATOM 97,0,0\n" + + "9->10 ACTION 0,0,0\n" + + "10->2 EPSILON 0,0,0\n" + + "11->13 ATOM 98,0,0\n" + + "13->4 EPSILON 0,0,0\n" + + "14->16 ATOM 99,0,0\n" + + "16->17 ACTION 2,1,0\n" + + "17->6 EPSILON 0,0,0\n" + "0:0\n"; ATN atn = createATN(lg, true); String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); @@ -564,9 +572,11 @@ public class TestATNSerialization extends BaseTest { "15:BASIC 1\n" + "16:BASIC 1\n" + "17:BASIC 1\n" + - "18:BASIC 2\n" + - "19:BASIC 2\n" + + "18:BASIC 1\n" + + "19:BASIC 1\n" + "20:BASIC 2\n" + + "21:BASIC 2\n" + + "22:BASIC 2\n" + "rule 0:2 1\n" + "rule 1:4 2\n" + "rule 2:6 3\n" + @@ -577,21 +587,21 @@ public class TestATNSerialization extends BaseTest { "1->6 EPSILON 0,0,0\n" + "2->9 EPSILON 0,0,0\n" + "4->13 EPSILON 0,0,0\n" + - "6->18 EPSILON 0,0,0\n" + + "6->20 EPSILON 0,0,0\n" + "8->10 RANGE 97,122,0\n" + "9->8 EPSILON 0,0,0\n" + "10->11 EPSILON 0,0,0\n" + "11->9 EPSILON 0,0,0\n" + "11->12 EPSILON 0,0,0\n" + "12->3 EPSILON 0,0,0\n" + - "13->14 ATOM 42,0,0\n" + - "14->15 ATOM 47,0,0\n" + - "15->16 EPSILON 0,0,0\n" + - "16->17 ACTION 1,0,0\n" + - "17->5 EPSILON 0,0,0\n" + - "18->19 WILDCARD 0,0,0\n" + - "19->20 ACTION 2,1,0\n" + - "20->7 EPSILON 0,0,0\n" + + "13->15 ATOM 42,0,0\n" + + "15->17 ATOM 47,0,0\n" + + "17->18 EPSILON 0,0,0\n" + + "18->19 ACTION 1,0,0\n" + + "19->5 EPSILON 0,0,0\n" + + "20->21 WILDCARD 0,0,0\n" + + "21->22 ACTION 2,1,0\n" + + "22->7 EPSILON 0,0,0\n" + "0:0\n" + "1:1\n" + "2:11\n"; @@ -649,12 +659,16 @@ public class TestATNSerialization extends BaseTest { "9:RULE_STOP 3\n" + "10:BASIC 0\n" + "11:BASIC 0\n" + - "12:BASIC 1\n" + + "12:BASIC 0\n" + "13:BASIC 1\n" + - "14:BASIC 2\n" + - "15:BASIC 2\n" + - "16:BASIC 3\n" + - "17:BASIC 3\n" + + "14:BASIC 1\n" + + "15:BASIC 1\n" + + "16:BASIC 2\n" + + "17:BASIC 2\n" + + "18:BASIC 2\n" + + "19:BASIC 3\n" + + "20:BASIC 3\n" + + "21:BASIC 3\n" + "rule 0:2 1\n" + "rule 1:4 2\n" + "rule 2:6 3\n" + @@ -666,17 +680,17 @@ public class TestATNSerialization extends BaseTest { "1->6 EPSILON 0,0,0\n" + "1->8 EPSILON 0,0,0\n" + "2->10 EPSILON 0,0,0\n" + - "4->12 EPSILON 0,0,0\n" + - "6->14 EPSILON 0,0,0\n" + - "8->16 EPSILON 0,0,0\n" + - "10->11 ATOM 97,0,0\n" + - "11->3 EPSILON 0,0,0\n" + - "12->13 ATOM 98,0,0\n" + - "13->5 EPSILON 0,0,0\n" + - "14->15 ATOM 99,0,0\n" + - "15->7 EPSILON 0,0,0\n" + - "16->17 ATOM 100,0,0\n" + - "17->9 EPSILON 0,0,0\n" + + "4->13 EPSILON 0,0,0\n" + + "6->16 EPSILON 0,0,0\n" + + "8->19 EPSILON 0,0,0\n" + + "10->12 ATOM 97,0,0\n" + + "12->3 EPSILON 0,0,0\n" + + "13->15 ATOM 98,0,0\n" + + "15->5 EPSILON 0,0,0\n" + + "16->18 ATOM 99,0,0\n" + + "18->7 EPSILON 0,0,0\n" + + "19->21 ATOM 100,0,0\n" + + "21->9 EPSILON 0,0,0\n" + "0:0\n" + "1:1\n"; ATN atn = createATN(lg, true); @@ -705,10 +719,13 @@ public class TestATNSerialization extends BaseTest { "8:RULE_STOP 2\n" + "9:BASIC 0\n" + "10:BASIC 0\n" + - "11:BASIC 1\n" + + "11:BASIC 0\n" + "12:BASIC 1\n" + - "13:BASIC 2\n" + - "14:BASIC 2\n" + + "13:BASIC 1\n" + + "14:BASIC 1\n" + + "15:BASIC 2\n" + + "16:BASIC 2\n" + + "17:BASIC 2\n" + "rule 0:3 1\n" + "rule 1:5 2\n" + "rule 2:7 3\n" + @@ -719,14 +736,14 @@ public class TestATNSerialization extends BaseTest { "1->5 EPSILON 0,0,0\n" + "2->7 EPSILON 0,0,0\n" + "3->9 EPSILON 0,0,0\n" + - "5->11 EPSILON 0,0,0\n" + - "7->13 EPSILON 0,0,0\n" + - "9->10 ATOM 97,0,0\n" + - "10->4 EPSILON 0,0,0\n" + - "11->12 ATOM 98,0,0\n" + - "12->6 EPSILON 0,0,0\n" + - "13->14 ATOM 99,0,0\n" + - "14->8 EPSILON 0,0,0\n" + + "5->12 EPSILON 0,0,0\n" + + "7->15 EPSILON 0,0,0\n" + + "9->11 ATOM 97,0,0\n" + + "11->4 EPSILON 0,0,0\n" + + "12->14 ATOM 98,0,0\n" + + "14->6 EPSILON 0,0,0\n" + + "15->17 ATOM 99,0,0\n" + + "17->8 EPSILON 0,0,0\n" + "0:0\n" + "1:1\n" + "2:2\n"; diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserExec.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserExec.java index 3066d4c6b6..bdde766550 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserExec.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserExec.java @@ -31,6 +31,7 @@ package org.antlr.v4.test.tool; import org.antlr.v4.test.runtime.java.BaseTest; +import org.antlr.v4.tool.Grammar; import org.junit.Ignore; import org.junit.Test; @@ -177,4 +178,23 @@ public class TestParserExec extends BaseTest { assertEquals("6\n", found); assertNull(stderrDuringParse); } + + @Test public void testCaseInsensitiveParser() throws Exception { + String grammar = + "grammar CaseInsensitiveGrammar;\n" + + "options { caseInsensitive = true; }\n" + + "e\n" + + " : ID\n" + + " | 'not' e\n" + + " | e 'and' e\n" + + " | 'new' ID '(' e ')'\n" + + " ;\n" + + "ID: [a-z_][a-z_0-9]*;\n" + + "WS: [ \\t\\n\\r]+ -> skip;"; + + String input = "NEW Abc (Not a AND not B)"; + String found = execParser("CaseInsensitiveGrammar.g4", grammar, + "CaseInsensitiveGrammarParser", "CaseInsensitiveGrammarLexer", "e", input, false); + assertNull(stderrDuringParse); + } } diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java index 04abbcceff..30c5a9481d 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java @@ -348,6 +348,30 @@ public class TestParserInterpreter extends BaseTest { testInterp(lg, g, "e", "a+a*a", "(e (e a) + (e (e a) * (e a)))"); } + @Test public void testCaseInsensitiveTokensInParser() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n" + + "options { caseInsensitive = true; }\n" + + "NOT: 'not';\n" + + "AND: 'and';\n" + + "NEW: 'new';\n" + + "LB: '(';\n" + + "RB: ')';\n" + + "ID: [a-z_][a-z_0-9]*;\n" + + "WS: [ \\t\\n\\r]+ -> skip;"); + Grammar g = new Grammar( + "parser grammar T;\n" + + "options { caseInsensitive = true; }\n" + + "e\n" + + " : ID\n" + + " | 'not' e\n" + + " | e 'and' e\n" + + " | 'new' ID '(' e ')'\n" + + " ;", lg); + + testInterp(lg, g, "e", "NEW Abc (Not a AND not B)", "(e NEW Abc ( (e (e Not (e a)) AND (e not (e B))) ))"); + } + ParseTree testInterp(LexerGrammar lg, Grammar g, String startRule, String input, String expectedParseTree) diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java index 1c68ba30a0..2538aab391 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java @@ -223,4 +223,52 @@ public class TestSymbolIssues extends BaseTest { testErrors(test, false); } + + @Test public void testCharsCollision() throws Exception { + String[] test = { + "lexer grammar L;\n" + + "TOKEN_RANGE: [aa-f];\n" + + "TOKEN_RANGE_2: [A-FD-J];\n" + + "TOKEN_RANGE_3: 'Z' | 'K'..'R' | 'O'..'V';\n" + + "TOKEN_RANGE_4: 'g'..'l' | [g-l];\n", // Handling in ATNOptimizer. + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars \"a-f\" declared multiply times in set [aa-f]\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars \"D-J\" declared multiply times in set [A-FD-J]\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars \"O-V\" declared multiply times in set 'Z' | 'K'..'R' | 'O'..'V'\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars \"g-l\" declared multiply times in set [g-l]\n" + }; + + testErrors(test, false); + } + + @Test public void testCaseInsensitiveCharsCollision() throws Exception { + String[] test = { + "lexer grammar L;\n" + + "options { caseInsensitive = true; }\n" + + "TOKEN_RANGE: [a-fA-F0-9];\n" + + "TOKEN_RANGE_2: 'g'..'l' | 'G'..'L';\n", + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars \"a-f\" declared multiply times in set [a-fA-F0-9]\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars \"A-F\" declared multiply times in set [a-fA-F0-9]\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars \"g-l\" declared multiply times in set 'g'..'l' | 'G'..'L'\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars \"G-L\" declared multiply times in set 'g'..'l' | 'G'..'L'\n" + }; + + testErrors(test, false); + } + + @Test public void testIllegalModeOption() throws Exception { + String[] test = { + "lexer grammar L;\n" + + "options { caseInsensitive = badValue; }\n" + + "DEFAULT_TOKEN: [A-F]+;\n" + + "mode M1, wrongModeOption;\n" + + "TOKEN: [a-f]+;\n", + + "warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:2:28: unsupported option value caseInsensitive=badValue\n" + + "warning(" + ErrorType.ILLEGAL_OPTION.code + "): L.g4:4:9: unsupported option wrongModeOption\n" + }; + + testErrors(test, false); + } } diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java index c411434223..b8fe326249 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java @@ -467,11 +467,39 @@ public class TestToolSyntaxErrors extends BaseTest { "Error3: '';\n" + "NotError: ' ';"; String expected = - "error(" + ErrorType.EMPTY_STRINGS_NOT_ALLOWED.code + "): T.g4:2:8: string literals cannot be empty\n" + - "error(" + ErrorType.EMPTY_STRINGS_NOT_ALLOWED.code + "): T.g4:2:16: string literals cannot be empty\n" + - "error(" + ErrorType.EMPTY_STRINGS_NOT_ALLOWED.code + "): T.g4:3:8: string literals cannot be empty\n" + - "error(" + ErrorType.EMPTY_STRINGS_NOT_ALLOWED.code + "): T.g4:4:15: string literals cannot be empty\n" + - "error(" + ErrorType.EMPTY_STRINGS_NOT_ALLOWED.code + "): T.g4:5:8: string literals cannot be empty\n"; + "error(" + ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY.code + "): T.g4:2:8: string literals and sets cannot be empty: ''\n" + + "error(" + ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY.code + "): T.g4:2:16: string literals and sets cannot be empty: ''\n" + + "error(" + ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY.code + "): T.g4:3:8: string literals and sets cannot be empty: ''\n" + + "error(" + ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY.code + "): T.g4:4:15: string literals and sets cannot be empty: ''\n" + + "error(" + ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY.code + "): T.g4:5:8: string literals and sets cannot be empty: ''\n"; + + String[] pair = new String[] { + grammar, + expected + }; + + super.testErrors(pair, true); + } + + @Test public void testInvalidCharSetAndRange() { + String grammar = + "lexer grammar Test;\n" + + "INVALID_RANGE: 'GH'..'LM';\n" + + "INVALID_RANGE_2: 'F'..'A' | 'Z';\n" + + "VALID_STRING_LITERALS: '\\u1234' | '\\t' | [\\-\\]];\n" + + "INVALID_CHAR_SET: [f-az][];\n" + + "INVALID_CHAR_SET_2: [\\u24\\uA2][\\u24];\n" + //https://github.com/antlr/antlr4/issues/1077 + "INVALID_CHAR_SET_3: [\\t\\{];"; + + String expected = + "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:23: multi-character literals are not allowed in lexer sets: 'GH'\n" + + "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:29: multi-character literals are not allowed in lexer sets: 'LM'\n" + + "error(" + ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY.code + "): Test.g4:3:26: string literals and sets cannot be empty: 'F'..'A'\n" + + "error(" + ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY.code + "): Test.g4:5:23: string literals and sets cannot be empty: [f-a]\n" + + "error(" + ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY.code + "): Test.g4:5:29: string literals and sets cannot be empty: []\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:23: invalid escape sequence\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:33: invalid escape sequence\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:23: invalid escape sequence\n"; String[] pair = new String[] { grammar, diff --git a/tool/src/org/antlr/v4/automata/ATNOptimizer.java b/tool/src/org/antlr/v4/automata/ATNOptimizer.java index 4f72b640c3..79bdc925e6 100644 --- a/tool/src/org/antlr/v4/automata/ATNOptimizer.java +++ b/tool/src/org/antlr/v4/automata/ATNOptimizer.java @@ -42,6 +42,7 @@ import org.antlr.v4.runtime.atn.Transition; import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.runtime.misc.IntervalSet; +import org.antlr.v4.tool.ErrorType; import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Rule; @@ -119,7 +120,20 @@ private static void optimizeSets(Grammar g, ATN atn) { if (matchTransition instanceof NotSetTransition) { throw new UnsupportedOperationException("Not yet implemented."); } else { - matchSet.addAll(matchTransition.label()); + IntervalSet set = matchTransition.label(); + int minElem = set.getMinElement(); + int maxElem = set.getMaxElement(); + for (int k = minElem; k <= maxElem; k++) { + if (matchSet.contains(k)) { + char setMin = (char)set.getMinElement(); + char setMax = (char)set.getMaxElement(); + // TODO: Token is missing (i.e. position in source will not be displayed). + g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, + null, (char)minElem + "-" + (char)maxElem, "[" + setMin + "-" + setMax + "]"); + break; + } + } + matchSet.addAll(set); } } diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java index a5e80e47d2..1595f02e61 100644 --- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java +++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java @@ -32,6 +32,7 @@ import org.antlr.runtime.CommonToken; import org.antlr.runtime.Token; +import org.antlr.runtime.tree.CommonTree; import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.misc.CharSupport; import org.antlr.v4.parse.ANTLRParser; @@ -64,6 +65,8 @@ import org.antlr.v4.tool.ast.ActionAST; import org.antlr.v4.tool.ast.GrammarAST; import org.antlr.v4.tool.ast.TerminalAST; +import org.antlr.v4.tool.ast.RangeAST; +import org.antlr.v4.tool.ast.RuleAST; import org.stringtemplate.v4.ST; import org.stringtemplate.v4.STGroup; @@ -268,6 +271,7 @@ public Handle range(GrammarAST a, GrammarAST b) { ATNState right = newState(b); int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText()); int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText()); + checkRange(a, b, t1, t2); left.addTransition(new RangeTransition(right, t1, t2)); a.atnState = left; b.atnState = left; @@ -276,6 +280,7 @@ public Handle range(GrammarAST a, GrammarAST b) { @Override public Handle set(GrammarAST associatedAST, List alts, boolean invert) { + boolean caseInsensitive = isCaseInsensitive(associatedAST); ATNState left = newState(associatedAST); ATNState right = newState(associatedAST); IntervalSet set = new IntervalSet(); @@ -283,7 +288,13 @@ public Handle set(GrammarAST associatedAST, List alts, boolean inver if ( t.getType()==ANTLRParser.RANGE ) { int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText()); int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText()); - set.add(a, b); + if (checkRange((GrammarAST)t.getChild(0), (GrammarAST)t.getChild(1), a, b)) { + if (!caseInsensitive) { + checkAndAddToSet(associatedAST, set, a, b); + } else { + checkAndAddToSetCaseInsensitive(associatedAST, set, a, b); + } + } } else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) { set.addAll(getSetFromCharSetLiteral(t)); @@ -291,12 +302,15 @@ else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) { else if ( t.getType()==ANTLRParser.STRING_LITERAL ) { int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText()); if ( c != -1 ) { - set.add(c); + if (!caseInsensitive) { + checkAndAddToSet(associatedAST, set, c); + } else { + checkAndAddToSetCaseInsensitive(associatedAST, set, c); + } } else { g.tool.errMgr.grammarError(ErrorType.INVALID_LITERAL_IN_LEXER_SET, g.fileName, t.getToken(), t.getText()); - } } else if ( t.getType()==ANTLRParser.TOKEN_REF ) { @@ -322,25 +336,72 @@ else if ( t.getType()==ANTLRParser.TOKEN_REF ) { return new Handle(left, right); } + protected boolean checkRange(GrammarAST leftNode, GrammarAST rightNode, int leftValue, int rightValue) { + boolean result = true; + if (leftValue == -1) { + result = false; + g.tool.errMgr.grammarError(ErrorType.INVALID_LITERAL_IN_LEXER_SET, + g.fileName, leftNode.getToken(), leftNode.getText()); + } + if (rightValue == -1) { + result = false; + g.tool.errMgr.grammarError(ErrorType.INVALID_LITERAL_IN_LEXER_SET, + g.fileName, rightNode.getToken(), rightNode.getText()); + } + if (!result) return result; + + if (rightValue < leftValue) { + g.tool.errMgr.grammarError(ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY, + g.fileName, leftNode.parent.getToken(), leftNode.getText() + ".." + rightNode.getText()); + } + return result; + } + /** For a lexer, a string is a sequence of char to match. That is, * "fog" is treated as 'f' 'o' 'g' not as a single transition in * the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states * for n characters. + * if "caseInsensitive" option enabled, "fog" will be treated as + * o-('f'|'F') -> o-('o'|'O') -> o-('g'|'G') */ @Override public Handle stringLiteral(TerminalAST stringLiteralAST) { String chars = stringLiteralAST.getText(); - chars = CharSupport.getStringFromGrammarStringLiteral(chars); - int n = chars.length(); ATNState left = newState(stringLiteralAST); - ATNState prev = left; - ATNState right = null; - for (int i=0; i=n ) break; // ignore spurious \ on end + if ( (i+1)>=n ) return null; // invalid escape sequence. if ( literal.charAt(i+1) == 'u' ) end = i+6; } - if ( end>n ) break; + if ( end>n ) return null; // invalid escape sequence. String esc = literal.substring(i, end); int c = getCharValueFromCharInGrammarLiteral(esc); - if ( c==-1 ) { buf.append(esc); } + if ( c==-1 ) { + return null; // invalid escape sequence. + } else buf.append((char)c); i = end; } diff --git a/tool/src/org/antlr/v4/parse/ANTLRParser.g b/tool/src/org/antlr/v4/parse/ANTLRParser.g index 42946fa8d2..411cd4ccb7 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRParser.g +++ b/tool/src/org/antlr/v4/parse/ANTLRParser.g @@ -101,7 +101,7 @@ tokens { @header { /* [The "BSD licence"] - Copyright (c) 2005-20012 Terence Parr + Copyright (c) 2005-2012 Terence Parr All rights reserved. Redistribution and use in source and binary forms, with or without @@ -319,9 +319,13 @@ actionScopeName ; modeSpec - : MODE id SEMI sync (lexerRule sync)* -> ^(MODE id lexerRule*) + : MODE id (COMMA modeOption)? SEMI sync (lexerRule sync)* -> ^(MODE id modeOption? lexerRule*) ; +modeOption + : id + ; + rules : sync (rule sync)* // Rewrite with an enclosing node as this is good for counting diff --git a/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g b/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g index 9c8b1be43c..d72df8b868 100644 --- a/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g +++ b/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g @@ -87,6 +87,8 @@ import java.lang.reflect.Method; public String grammarName; public GrammarAST currentRuleAST; public String currentModeName = LexerGrammar.DEFAULT_MODE_NAME; +public boolean defaultCaseInsensitive = false; +public boolean currentCaseInsensitive = false; public String currentRuleName; public GrammarAST currentOuterAltRoot; public int currentOuterAltNumber = 1; // 1..n @@ -406,7 +408,11 @@ option if ( block ) blockOption($ID, $v.start); // most specific first else if ( rule ) ruleOption($ID, $v.start); else grammarOption($ID, $v.start); + if ($ID.text.equals("caseInsensitive")) { + defaultCaseInsensitive = Boolean.parseBoolean(((GrammarAST)$v.start).getText()); + currentCaseInsensitive = defaultCaseInsensitive; } + } ; optionValue returns [String v] @@ -510,7 +516,18 @@ mode @after { exitMode($start); } - : ^( MODE ID {currentModeName=$ID.text; modeDef($MODE, $ID);} lexerRule* ) + : ^( MODE modeName=ID (modeOption=ID)? + {currentModeName=$modeName.text; + modeDef($MODE, $modeName); + if ($modeOption != null) { + if ($modeOption.text.equals("caseInsensitive")) + currentCaseInsensitive = true; + else if ($modeOption.text.equals("caseSensitive")) + currentCaseInsensitive = false; + } else { + currentCaseInsensitive = defaultCaseInsensitive; + }} + lexerRule* ) ; lexerRule diff --git a/tool/src/org/antlr/v4/semantics/BasicSemanticChecks.java b/tool/src/org/antlr/v4/semantics/BasicSemanticChecks.java index 4853c37958..4c1250339c 100644 --- a/tool/src/org/antlr/v4/semantics/BasicSemanticChecks.java +++ b/tool/src/org/antlr/v4/semantics/BasicSemanticChecks.java @@ -165,6 +165,14 @@ public void discoverRules(GrammarAST rules) { @Override protected void enterMode(GrammarAST tree) { nonFragmentRuleCount = 0; + Tree child1 = tree.getChild(1); + if (!(child1 instanceof RuleAST)) { + String modeOptionText = child1.getText(); + if (!modeOptionText.equals("caseSensitive") && !modeOptionText.equals("caseInsensitive")) { + g.tool.errMgr.grammarError(ErrorType.ILLEGAL_OPTION, g.fileName, ((GrammarAST)child1).getToken(), + modeOptionText); + } + } } @Override @@ -248,6 +256,13 @@ public void blockOption(GrammarAST ID, GrammarAST valueAST) { public void grammarOption(GrammarAST ID, GrammarAST valueAST) { boolean ok = checkOptions(g.ast, ID.token, valueAST); //if ( ok ) g.ast.setOption(ID.getText(), value); + if (ID.getText().equals("caseInsensitive")) { + String valueText = valueAST.getText(); + if (!valueText.equals("true") && !valueText.equals("false")) { + g.tool.errMgr.grammarError(ErrorType.ILLEGAL_OPTION_VALUE, g.fileName, valueAST.getToken(), + ID.getText(), valueText); + } + } } @Override @@ -495,7 +510,7 @@ protected void enterLabeledLexerElement(GrammarAST tree) { protected void enterTerminal(GrammarAST tree) { String text = tree.getText(); if (text.equals("''")) { - g.tool.errMgr.grammarError(ErrorType.EMPTY_STRINGS_NOT_ALLOWED, g.fileName, tree.token); + g.tool.errMgr.grammarError(ErrorType.STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY, g.fileName, tree.token, "''"); } } diff --git a/tool/src/org/antlr/v4/semantics/RuleCollector.java b/tool/src/org/antlr/v4/semantics/RuleCollector.java index fa7eb2d31b..262d8cc0ac 100644 --- a/tool/src/org/antlr/v4/semantics/RuleCollector.java +++ b/tool/src/org/antlr/v4/semantics/RuleCollector.java @@ -132,6 +132,7 @@ public void discoverLexerRule(RuleAST rule, GrammarAST ID, List modi int numAlts = block.getChildCount(); Rule r = new Rule(g, ID.getText(), rule, numAlts); r.mode = currentModeName; + r.caseInsensitive = currentCaseInsensitive; if ( !modifiers.isEmpty() ) r.modifiers = modifiers; rules.put(r.name, r); } diff --git a/tool/src/org/antlr/v4/semantics/SymbolChecks.java b/tool/src/org/antlr/v4/semantics/SymbolChecks.java index 999d9f0b29..09d3d0bea1 100644 --- a/tool/src/org/antlr/v4/semantics/SymbolChecks.java +++ b/tool/src/org/antlr/v4/semantics/SymbolChecks.java @@ -275,15 +275,17 @@ protected void checkReservedNames(Collection rules) { public void checkForModeConflicts(Grammar g) { if (g.isLexer()) { LexerGrammar lexerGrammar = (LexerGrammar)g; - for (String modeName : lexerGrammar.modes.keySet()) { - if (!modeName.equals("DEFAULT_MODE") && reservedNames.contains(modeName)) { - Rule rule = lexerGrammar.modes.get(modeName).iterator().next(); - g.tool.errMgr.grammarError(ErrorType.MODE_CONFLICTS_WITH_COMMON_CONSTANTS, g.fileName, rule.ast.parent.getToken(), modeName); - } - - if (g.getTokenType(modeName) != Token.INVALID_TYPE) { - Rule rule = lexerGrammar.modes.get(modeName).iterator().next(); - g.tool.errMgr.grammarError(ErrorType.MODE_CONFLICTS_WITH_TOKEN, g.fileName, rule.ast.parent.getToken(), modeName); + if (lexerGrammar.modes != null) { + for (String modeName : lexerGrammar.modes.keySet()) { + if (!modeName.equals("DEFAULT_MODE") && reservedNames.contains(modeName)) { + Rule rule = lexerGrammar.modes.get(modeName).iterator().next(); + g.tool.errMgr.grammarError(ErrorType.MODE_CONFLICTS_WITH_COMMON_CONSTANTS, g.fileName, rule.ast.parent.getToken(), modeName); + } + + if (g.getTokenType(modeName) != Token.INVALID_TYPE) { + Rule rule = lexerGrammar.modes.get(modeName).iterator().next(); + g.tool.errMgr.grammarError(ErrorType.MODE_CONFLICTS_WITH_TOKEN, g.fileName, rule.ast.parent.getToken(), modeName); + } } } } diff --git a/tool/src/org/antlr/v4/tool/ErrorType.java b/tool/src/org/antlr/v4/tool/ErrorType.java index 0fedfc4a9e..8ff0f7bab6 100644 --- a/tool/src/org/antlr/v4/tool/ErrorType.java +++ b/tool/src/org/antlr/v4/tool/ErrorType.java @@ -1006,11 +1006,33 @@ public enum ErrorType { * *

empty strings not allowed

* - *
A: '''test''';
- *
B: '';
- *
C: 'test' '';
+ *
+	 * A: '''test''';
+	 * B: '';
+	 * C: 'test' '';
+	 * D: [];
+	 * E: [f-a];
+	 * 
+ */ + STRING_LITERALS_AND_SETS_CANNOT_BE_EMPTY(174, "string literals and sets cannot be empty: ", ErrorSeverity.ERROR), + /** + * Compiler Warning 175. + */ + CASE_INSENSITIVE_MAY_WORKS_INCORRECTLY(175, "case insensitive may works incorrectly with \"\" character set", ErrorSeverity.WARNING), + /** + * Compiler Warning 176 + * Rarely used but actual with caseInsensitive option. + * + *

chars "a-f" declared multiply times in set [a-fc-m]

+ * + *
+	 * A:    [aa-z];   // warning
+	 * B:    [a-fc-m]; // warning
+	 * 
+ * + * TODO: Does not work with fragment rules. */ - EMPTY_STRINGS_NOT_ALLOWED(174, "string literals cannot be empty", ErrorSeverity.ERROR), + CHARACTERS_COLLISION_IN_SET(176, "chars \"\" declared multiply times in set ", ErrorSeverity.WARNING), /* * Backward incompatibility errors diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index 5632ab451b..597f3e9c18 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -104,6 +104,7 @@ public class Grammar implements AttributeResolver { parserOptions.add("TokenLabelType"); parserOptions.add("tokenVocab"); parserOptions.add("language"); + parserOptions.add("caseInsensitive"); } public static final Set lexerOptions = parserOptions; @@ -442,7 +443,6 @@ public boolean defineRule(Rule r) { if ( rules.get(r.name)!=null ) { return false; } - rules.put(r.name, r); r.index = ruleNumber++; indexToRule.add(r); diff --git a/tool/src/org/antlr/v4/tool/Rule.java b/tool/src/org/antlr/v4/tool/Rule.java index 38f5c8849b..116d934cf8 100644 --- a/tool/src/org/antlr/v4/tool/Rule.java +++ b/tool/src/org/antlr/v4/tool/Rule.java @@ -90,6 +90,8 @@ public class Rule implements AttributeResolver { /** If we're in a lexer grammar, we might be in a mode */ public String mode; + public boolean caseInsensitive; + /** Map a name to an action for this rule like @init {...}. * The code generator will use this to fill holes in the rule template. * I track the AST node for the action in case I need the line number diff --git a/tool/src/org/antlr/v4/tool/ast/GrammarASTWithOptions.java b/tool/src/org/antlr/v4/tool/ast/GrammarASTWithOptions.java index b492c93ccd..128939ef0e 100644 --- a/tool/src/org/antlr/v4/tool/ast/GrammarASTWithOptions.java +++ b/tool/src/org/antlr/v4/tool/ast/GrammarASTWithOptions.java @@ -32,6 +32,7 @@ import org.antlr.runtime.Token; import org.antlr.v4.misc.CharSupport; +import org.antlr.v4.tool.ErrorType; import java.util.Collections; import java.util.HashMap; @@ -65,6 +66,11 @@ public String getOptionString(String key) { String v = value.getText(); if ( v.startsWith("'") || v.startsWith("\"") ) { v = CharSupport.getStringFromGrammarStringLiteral(v); + if (v == null) { + g.tool.errMgr.grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, + g.fileName, value.getToken()); + v = ""; + } } return v; }