From 722ab87a313587ad5beef7555b1a090d6ef4883e Mon Sep 17 00:00:00 2001 From: Andrea Cattaneo Date: Sat, 18 Jan 2020 20:04:29 +0100 Subject: [PATCH] Add autodiscover for terminal-nonterminal --- grammarlang/grammarlang.g4 | 16 +- grammarlang/grammarlang.interp | 10 +- grammarlang/grammarlang.tokens | 10 +- grammarlang/grammarlangLexer.interp | 13 +- grammarlang/grammarlangLexer.js | 113 +- grammarlang/grammarlangLexer.tokens | 10 +- grammarlang/grammarlangParser.js | 92 +- src/errors.js | 6 +- src/index.js | 12 +- src/ll1.js | 134 +-- src/parser.js | 72 +- test/test-ll1.js | 1591 +++++++++++++++------------ test/test-parser.js | 202 +++- 13 files changed, 1243 insertions(+), 1038 deletions(-) diff --git a/grammarlang/grammarlang.g4 b/grammarlang/grammarlang.g4 index 30b5dcb..a96b51b 100644 --- a/grammarlang/grammarlang.g4 +++ b/grammarlang/grammarlang.g4 @@ -1,16 +1,14 @@ grammar grammarlang; rulelist: start_symbol? rule_+ EOF; -start_symbol: START_SYMBOL_KEYWORD NONTERMINAL SEMICOLON; +start_symbol: START_SYMBOL_KEYWORD SYMBOL SEMICOLON; rule_: l ASSIGN r SEMICOLON; -l: NONTERMINAL; -r: (NONTERMINAL | TERMINAL)*; +l: SYMBOL; +r: SYMBOL*; -START_SYMBOL_KEYWORD: '_start_symbol'; -ASSIGN: '->'; +START_SYMBOL_KEYWORD: '#start_symbol'; +ASSIGN: '->' | '=>'; SEMICOLON: ';'; -NONTERMINAL: ('A' ..'Z') ('A' ..'Z' | '0' ..'9')*; -TERMINAL: ('a' ..'z') ('a' ..'z' | '0' ..'9')*; - -COMMENT: ( '//' ~[\r\n]* (('\r'? '\n') | EOF) | '/*' .*? '*/') -> skip; +SYMBOL: ('A' ..'Z' | 'a' .. 'z' | '0' ..'9' | '_')+; +COMMENT: ('//' ~[\r\n]* (('\r'? '\n') | EOF) | '/*' .*? '*/') -> skip; WS: [ \r\n\t] -> skip; diff --git a/grammarlang/grammarlang.interp b/grammarlang/grammarlang.interp index 685daf0..c22e9c2 100644 --- a/grammarlang/grammarlang.interp +++ b/grammarlang/grammarlang.interp @@ -1,9 +1,8 @@ token literal names: null -'_start_symbol' -'->' -';' +'#start_symbol' null +';' null null null @@ -13,8 +12,7 @@ null START_SYMBOL_KEYWORD ASSIGN SEMICOLON -NONTERMINAL -TERMINAL +SYMBOL COMMENT WS @@ -27,4 +25,4 @@ r atn: -[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 9, 40, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 3, 2, 5, 2, 14, 10, 2, 3, 2, 6, 2, 17, 10, 2, 13, 2, 14, 2, 18, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6, 7, 6, 35, 10, 6, 12, 6, 14, 6, 38, 11, 6, 3, 6, 2, 2, 7, 2, 4, 6, 8, 10, 2, 3, 3, 2, 6, 7, 2, 37, 2, 13, 3, 2, 2, 2, 4, 22, 3, 2, 2, 2, 6, 26, 3, 2, 2, 2, 8, 31, 3, 2, 2, 2, 10, 36, 3, 2, 2, 2, 12, 14, 5, 4, 3, 2, 13, 12, 3, 2, 2, 2, 13, 14, 3, 2, 2, 2, 14, 16, 3, 2, 2, 2, 15, 17, 5, 6, 4, 2, 16, 15, 3, 2, 2, 2, 17, 18, 3, 2, 2, 2, 18, 16, 3, 2, 2, 2, 18, 19, 3, 2, 2, 2, 19, 20, 3, 2, 2, 2, 20, 21, 7, 2, 2, 3, 21, 3, 3, 2, 2, 2, 22, 23, 7, 3, 2, 2, 23, 24, 7, 6, 2, 2, 24, 25, 7, 5, 2, 2, 25, 5, 3, 2, 2, 2, 26, 27, 5, 8, 5, 2, 27, 28, 7, 4, 2, 2, 28, 29, 5, 10, 6, 2, 29, 30, 7, 5, 2, 2, 30, 7, 3, 2, 2, 2, 31, 32, 7, 6, 2, 2, 32, 9, 3, 2, 2, 2, 33, 35, 9, 2, 2, 2, 34, 33, 3, 2, 2, 2, 35, 38, 3, 2, 2, 2, 36, 34, 3, 2, 2, 2, 36, 37, 3, 2, 2, 2, 37, 11, 3, 2, 2, 2, 38, 36, 3, 2, 2, 2, 5, 13, 18, 36] \ No newline at end of file +[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 8, 40, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 3, 2, 5, 2, 14, 10, 2, 3, 2, 6, 2, 17, 10, 2, 13, 2, 14, 2, 18, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6, 7, 6, 35, 10, 6, 12, 6, 14, 6, 38, 11, 6, 3, 6, 2, 2, 7, 2, 4, 6, 8, 10, 2, 2, 2, 37, 2, 13, 3, 2, 2, 2, 4, 22, 3, 2, 2, 2, 6, 26, 3, 2, 2, 2, 8, 31, 3, 2, 2, 2, 10, 36, 3, 2, 2, 2, 12, 14, 5, 4, 3, 2, 13, 12, 3, 2, 2, 2, 13, 14, 3, 2, 2, 2, 14, 16, 3, 2, 2, 2, 15, 17, 5, 6, 4, 2, 16, 15, 3, 2, 2, 2, 17, 18, 3, 2, 2, 2, 18, 16, 3, 2, 2, 2, 18, 19, 3, 2, 2, 2, 19, 20, 3, 2, 2, 2, 20, 21, 7, 2, 2, 3, 21, 3, 3, 2, 2, 2, 22, 23, 7, 3, 2, 2, 23, 24, 7, 6, 2, 2, 24, 25, 7, 5, 2, 2, 25, 5, 3, 2, 2, 2, 26, 27, 5, 8, 5, 2, 27, 28, 7, 4, 2, 2, 28, 29, 5, 10, 6, 2, 29, 30, 7, 5, 2, 2, 30, 7, 3, 2, 2, 2, 31, 32, 7, 6, 2, 2, 32, 9, 3, 2, 2, 2, 33, 35, 7, 6, 2, 2, 34, 33, 3, 2, 2, 2, 35, 38, 3, 2, 2, 2, 36, 34, 3, 2, 2, 2, 36, 37, 3, 2, 2, 2, 37, 11, 3, 2, 2, 2, 38, 36, 3, 2, 2, 2, 5, 13, 18, 36] \ No newline at end of file diff --git a/grammarlang/grammarlang.tokens b/grammarlang/grammarlang.tokens index d9f43f4..a4c61f1 100644 --- a/grammarlang/grammarlang.tokens +++ b/grammarlang/grammarlang.tokens @@ -1,10 +1,8 @@ START_SYMBOL_KEYWORD=1 ASSIGN=2 SEMICOLON=3 -NONTERMINAL=4 -TERMINAL=5 -COMMENT=6 -WS=7 -'_start_symbol'=1 -'->'=2 +SYMBOL=4 +COMMENT=5 +WS=6 +'#start_symbol'=1 ';'=3 diff --git a/grammarlang/grammarlangLexer.interp b/grammarlang/grammarlangLexer.interp index 3f45918..ea3959e 100644 --- a/grammarlang/grammarlangLexer.interp +++ b/grammarlang/grammarlangLexer.interp @@ -1,9 +1,8 @@ token literal names: null -'_start_symbol' -'->' -';' +'#start_symbol' null +';' null null null @@ -13,8 +12,7 @@ null START_SYMBOL_KEYWORD ASSIGN SEMICOLON -NONTERMINAL -TERMINAL +SYMBOL COMMENT WS @@ -22,8 +20,7 @@ rule names: START_SYMBOL_KEYWORD ASSIGN SEMICOLON -NONTERMINAL -TERMINAL +SYMBOL COMMENT WS @@ -35,4 +32,4 @@ mode names: DEFAULT_MODE atn: -[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 9, 85, 8, 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 5, 3, 5, 7, 5, 39, 10, 5, 12, 5, 14, 5, 42, 11, 5, 3, 6, 3, 6, 7, 6, 46, 10, 6, 12, 6, 14, 6, 49, 11, 6, 3, 7, 3, 7, 3, 7, 3, 7, 7, 7, 55, 10, 7, 12, 7, 14, 7, 58, 11, 7, 3, 7, 5, 7, 61, 10, 7, 3, 7, 3, 7, 5, 7, 65, 10, 7, 3, 7, 3, 7, 3, 7, 3, 7, 7, 7, 71, 10, 7, 12, 7, 14, 7, 74, 11, 7, 3, 7, 3, 7, 5, 7, 78, 10, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 72, 2, 9, 3, 3, 5, 4, 7, 5, 9, 6, 11, 7, 13, 8, 15, 9, 3, 2, 6, 4, 2, 50, 59, 67, 92, 4, 2, 50, 59, 99, 124, 4, 2, 12, 12, 15, 15, 5, 2, 11, 12, 15, 15, 34, 34, 2, 91, 2, 3, 3, 2, 2, 2, 2, 5, 3, 2, 2, 2, 2, 7, 3, 2, 2, 2, 2, 9, 3, 2, 2, 2, 2, 11, 3, 2, 2, 2, 2, 13, 3, 2, 2, 2, 2, 15, 3, 2, 2, 2, 3, 17, 3, 2, 2, 2, 5, 31, 3, 2, 2, 2, 7, 34, 3, 2, 2, 2, 9, 36, 3, 2, 2, 2, 11, 43, 3, 2, 2, 2, 13, 77, 3, 2, 2, 2, 15, 81, 3, 2, 2, 2, 17, 18, 7, 97, 2, 2, 18, 19, 7, 117, 2, 2, 19, 20, 7, 118, 2, 2, 20, 21, 7, 99, 2, 2, 21, 22, 7, 116, 2, 2, 22, 23, 7, 118, 2, 2, 23, 24, 7, 97, 2, 2, 24, 25, 7, 117, 2, 2, 25, 26, 7, 123, 2, 2, 26, 27, 7, 111, 2, 2, 27, 28, 7, 100, 2, 2, 28, 29, 7, 113, 2, 2, 29, 30, 7, 110, 2, 2, 30, 4, 3, 2, 2, 2, 31, 32, 7, 47, 2, 2, 32, 33, 7, 64, 2, 2, 33, 6, 3, 2, 2, 2, 34, 35, 7, 61, 2, 2, 35, 8, 3, 2, 2, 2, 36, 40, 4, 67, 92, 2, 37, 39, 9, 2, 2, 2, 38, 37, 3, 2, 2, 2, 39, 42, 3, 2, 2, 2, 40, 38, 3, 2, 2, 2, 40, 41, 3, 2, 2, 2, 41, 10, 3, 2, 2, 2, 42, 40, 3, 2, 2, 2, 43, 47, 4, 99, 124, 2, 44, 46, 9, 3, 2, 2, 45, 44, 3, 2, 2, 2, 46, 49, 3, 2, 2, 2, 47, 45, 3, 2, 2, 2, 47, 48, 3, 2, 2, 2, 48, 12, 3, 2, 2, 2, 49, 47, 3, 2, 2, 2, 50, 51, 7, 49, 2, 2, 51, 52, 7, 49, 2, 2, 52, 56, 3, 2, 2, 2, 53, 55, 10, 4, 2, 2, 54, 53, 3, 2, 2, 2, 55, 58, 3, 2, 2, 2, 56, 54, 3, 2, 2, 2, 56, 57, 3, 2, 2, 2, 57, 64, 3, 2, 2, 2, 58, 56, 3, 2, 2, 2, 59, 61, 7, 15, 2, 2, 60, 59, 3, 2, 2, 2, 60, 61, 3, 2, 2, 2, 61, 62, 3, 2, 2, 2, 62, 65, 7, 12, 2, 2, 63, 65, 7, 2, 2, 3, 64, 60, 3, 2, 2, 2, 64, 63, 3, 2, 2, 2, 65, 78, 3, 2, 2, 2, 66, 67, 7, 49, 2, 2, 67, 68, 7, 44, 2, 2, 68, 72, 3, 2, 2, 2, 69, 71, 11, 2, 2, 2, 70, 69, 3, 2, 2, 2, 71, 74, 3, 2, 2, 2, 72, 73, 3, 2, 2, 2, 72, 70, 3, 2, 2, 2, 73, 75, 3, 2, 2, 2, 74, 72, 3, 2, 2, 2, 75, 76, 7, 44, 2, 2, 76, 78, 7, 49, 2, 2, 77, 50, 3, 2, 2, 2, 77, 66, 3, 2, 2, 2, 78, 79, 3, 2, 2, 2, 79, 80, 8, 7, 2, 2, 80, 14, 3, 2, 2, 2, 81, 82, 9, 5, 2, 2, 82, 83, 3, 2, 2, 2, 83, 84, 8, 8, 2, 2, 84, 16, 3, 2, 2, 2, 10, 2, 40, 47, 56, 60, 64, 72, 77, 3, 8, 2, 2] \ No newline at end of file +[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 8, 77, 8, 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 34, 10, 3, 3, 4, 3, 4, 3, 5, 6, 5, 39, 10, 5, 13, 5, 14, 5, 40, 3, 6, 3, 6, 3, 6, 3, 6, 7, 6, 47, 10, 6, 12, 6, 14, 6, 50, 11, 6, 3, 6, 5, 6, 53, 10, 6, 3, 6, 3, 6, 5, 6, 57, 10, 6, 3, 6, 3, 6, 3, 6, 3, 6, 7, 6, 63, 10, 6, 12, 6, 14, 6, 66, 11, 6, 3, 6, 3, 6, 5, 6, 70, 10, 6, 3, 6, 3, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 64, 2, 8, 3, 3, 5, 4, 7, 5, 9, 6, 11, 7, 13, 8, 3, 2, 5, 6, 2, 50, 59, 67, 92, 97, 97, 99, 124, 4, 2, 12, 12, 15, 15, 5, 2, 11, 12, 15, 15, 34, 34, 2, 83, 2, 3, 3, 2, 2, 2, 2, 5, 3, 2, 2, 2, 2, 7, 3, 2, 2, 2, 2, 9, 3, 2, 2, 2, 2, 11, 3, 2, 2, 2, 2, 13, 3, 2, 2, 2, 3, 15, 3, 2, 2, 2, 5, 33, 3, 2, 2, 2, 7, 35, 3, 2, 2, 2, 9, 38, 3, 2, 2, 2, 11, 69, 3, 2, 2, 2, 13, 73, 3, 2, 2, 2, 15, 16, 7, 37, 2, 2, 16, 17, 7, 117, 2, 2, 17, 18, 7, 118, 2, 2, 18, 19, 7, 99, 2, 2, 19, 20, 7, 116, 2, 2, 20, 21, 7, 118, 2, 2, 21, 22, 7, 97, 2, 2, 22, 23, 7, 117, 2, 2, 23, 24, 7, 123, 2, 2, 24, 25, 7, 111, 2, 2, 25, 26, 7, 100, 2, 2, 26, 27, 7, 113, 2, 2, 27, 28, 7, 110, 2, 2, 28, 4, 3, 2, 2, 2, 29, 30, 7, 47, 2, 2, 30, 34, 7, 64, 2, 2, 31, 32, 7, 63, 2, 2, 32, 34, 7, 64, 2, 2, 33, 29, 3, 2, 2, 2, 33, 31, 3, 2, 2, 2, 34, 6, 3, 2, 2, 2, 35, 36, 7, 61, 2, 2, 36, 8, 3, 2, 2, 2, 37, 39, 9, 2, 2, 2, 38, 37, 3, 2, 2, 2, 39, 40, 3, 2, 2, 2, 40, 38, 3, 2, 2, 2, 40, 41, 3, 2, 2, 2, 41, 10, 3, 2, 2, 2, 42, 43, 7, 49, 2, 2, 43, 44, 7, 49, 2, 2, 44, 48, 3, 2, 2, 2, 45, 47, 10, 3, 2, 2, 46, 45, 3, 2, 2, 2, 47, 50, 3, 2, 2, 2, 48, 46, 3, 2, 2, 2, 48, 49, 3, 2, 2, 2, 49, 56, 3, 2, 2, 2, 50, 48, 3, 2, 2, 2, 51, 53, 7, 15, 2, 2, 52, 51, 3, 2, 2, 2, 52, 53, 3, 2, 2, 2, 53, 54, 3, 2, 2, 2, 54, 57, 7, 12, 2, 2, 55, 57, 7, 2, 2, 3, 56, 52, 3, 2, 2, 2, 56, 55, 3, 2, 2, 2, 57, 70, 3, 2, 2, 2, 58, 59, 7, 49, 2, 2, 59, 60, 7, 44, 2, 2, 60, 64, 3, 2, 2, 2, 61, 63, 11, 2, 2, 2, 62, 61, 3, 2, 2, 2, 63, 66, 3, 2, 2, 2, 64, 65, 3, 2, 2, 2, 64, 62, 3, 2, 2, 2, 65, 67, 3, 2, 2, 2, 66, 64, 3, 2, 2, 2, 67, 68, 7, 44, 2, 2, 68, 70, 7, 49, 2, 2, 69, 42, 3, 2, 2, 2, 69, 58, 3, 2, 2, 2, 70, 71, 3, 2, 2, 2, 71, 72, 8, 6, 2, 2, 72, 12, 3, 2, 2, 2, 73, 74, 9, 4, 2, 2, 74, 75, 3, 2, 2, 2, 75, 76, 8, 7, 2, 2, 76, 14, 3, 2, 2, 2, 10, 2, 33, 40, 48, 52, 56, 64, 69, 3, 8, 2, 2] \ No newline at end of file diff --git a/grammarlang/grammarlangLexer.js b/grammarlang/grammarlangLexer.js index d98becb..ff96f7d 100644 --- a/grammarlang/grammarlangLexer.js +++ b/grammarlang/grammarlangLexer.js @@ -5,58 +5,54 @@ var antlr4 = require('antlr4/index'); var serializedATN = ["\u0003\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964", - "\u0002\tU\b\u0001\u0004\u0002\t\u0002\u0004\u0003\t\u0003\u0004\u0004", + "\u0002\bM\b\u0001\u0004\u0002\t\u0002\u0004\u0003\t\u0003\u0004\u0004", "\t\u0004\u0004\u0005\t\u0005\u0004\u0006\t\u0006\u0004\u0007\t\u0007", - "\u0004\b\t\b\u0003\u0002\u0003\u0002\u0003\u0002\u0003\u0002\u0003\u0002", "\u0003\u0002\u0003\u0002\u0003\u0002\u0003\u0002\u0003\u0002\u0003\u0002", - "\u0003\u0002\u0003\u0002\u0003\u0002\u0003\u0003\u0003\u0003\u0003\u0003", - "\u0003\u0004\u0003\u0004\u0003\u0005\u0003\u0005\u0007\u0005\'\n\u0005", - "\f\u0005\u000e\u0005*\u000b\u0005\u0003\u0006\u0003\u0006\u0007\u0006", - ".\n\u0006\f\u0006\u000e\u00061\u000b\u0006\u0003\u0007\u0003\u0007\u0003", - "\u0007\u0003\u0007\u0007\u00077\n\u0007\f\u0007\u000e\u0007:\u000b\u0007", - "\u0003\u0007\u0005\u0007=\n\u0007\u0003\u0007\u0003\u0007\u0005\u0007", - "A\n\u0007\u0003\u0007\u0003\u0007\u0003\u0007\u0003\u0007\u0007\u0007", - "G\n\u0007\f\u0007\u000e\u0007J\u000b\u0007\u0003\u0007\u0003\u0007\u0005", - "\u0007N\n\u0007\u0003\u0007\u0003\u0007\u0003\b\u0003\b\u0003\b\u0003", - "\b\u0003H\u0002\t\u0003\u0003\u0005\u0004\u0007\u0005\t\u0006\u000b", - "\u0007\r\b\u000f\t\u0003\u0002\u0006\u0004\u00022;C\\\u0004\u00022;", - "c|\u0004\u0002\f\f\u000f\u000f\u0005\u0002\u000b\f\u000f\u000f\"\"\u0002", - "[\u0002\u0003\u0003\u0002\u0002\u0002\u0002\u0005\u0003\u0002\u0002", - "\u0002\u0002\u0007\u0003\u0002\u0002\u0002\u0002\t\u0003\u0002\u0002", - "\u0002\u0002\u000b\u0003\u0002\u0002\u0002\u0002\r\u0003\u0002\u0002", - "\u0002\u0002\u000f\u0003\u0002\u0002\u0002\u0003\u0011\u0003\u0002\u0002", - "\u0002\u0005\u001f\u0003\u0002\u0002\u0002\u0007\"\u0003\u0002\u0002", - "\u0002\t$\u0003\u0002\u0002\u0002\u000b+\u0003\u0002\u0002\u0002\rM", - "\u0003\u0002\u0002\u0002\u000fQ\u0003\u0002\u0002\u0002\u0011\u0012", - "\u0007a\u0002\u0002\u0012\u0013\u0007u\u0002\u0002\u0013\u0014\u0007", - "v\u0002\u0002\u0014\u0015\u0007c\u0002\u0002\u0015\u0016\u0007t\u0002", - "\u0002\u0016\u0017\u0007v\u0002\u0002\u0017\u0018\u0007a\u0002\u0002", - "\u0018\u0019\u0007u\u0002\u0002\u0019\u001a\u0007{\u0002\u0002\u001a", - "\u001b\u0007o\u0002\u0002\u001b\u001c\u0007d\u0002\u0002\u001c\u001d", - "\u0007q\u0002\u0002\u001d\u001e\u0007n\u0002\u0002\u001e\u0004\u0003", - "\u0002\u0002\u0002\u001f \u0007/\u0002\u0002 !\u0007@\u0002\u0002!\u0006", - "\u0003\u0002\u0002\u0002\"#\u0007=\u0002\u0002#\b\u0003\u0002\u0002", - "\u0002$(\u0004C\\\u0002%\'\t\u0002\u0002\u0002&%\u0003\u0002\u0002\u0002", - "\'*\u0003\u0002\u0002\u0002(&\u0003\u0002\u0002\u0002()\u0003\u0002", - "\u0002\u0002)\n\u0003\u0002\u0002\u0002*(\u0003\u0002\u0002\u0002+/", - "\u0004c|\u0002,.\t\u0003\u0002\u0002-,\u0003\u0002\u0002\u0002.1\u0003", - "\u0002\u0002\u0002/-\u0003\u0002\u0002\u0002/0\u0003\u0002\u0002\u0002", - "0\f\u0003\u0002\u0002\u00021/\u0003\u0002\u0002\u000223\u00071\u0002", - "\u000234\u00071\u0002\u000248\u0003\u0002\u0002\u000257\n\u0004\u0002", - "\u000265\u0003\u0002\u0002\u00027:\u0003\u0002\u0002\u000286\u0003\u0002", - "\u0002\u000289\u0003\u0002\u0002\u00029@\u0003\u0002\u0002\u0002:8\u0003", - "\u0002\u0002\u0002;=\u0007\u000f\u0002\u0002<;\u0003\u0002\u0002\u0002", - "<=\u0003\u0002\u0002\u0002=>\u0003\u0002\u0002\u0002>A\u0007\f\u0002", - "\u0002?A\u0007\u0002\u0002\u0003@<\u0003\u0002\u0002\u0002@?\u0003\u0002", - "\u0002\u0002AN\u0003\u0002\u0002\u0002BC\u00071\u0002\u0002CD\u0007", - ",\u0002\u0002DH\u0003\u0002\u0002\u0002EG\u000b\u0002\u0002\u0002FE", - "\u0003\u0002\u0002\u0002GJ\u0003\u0002\u0002\u0002HI\u0003\u0002\u0002", - "\u0002HF\u0003\u0002\u0002\u0002IK\u0003\u0002\u0002\u0002JH\u0003\u0002", - "\u0002\u0002KL\u0007,\u0002\u0002LN\u00071\u0002\u0002M2\u0003\u0002", - "\u0002\u0002MB\u0003\u0002\u0002\u0002NO\u0003\u0002\u0002\u0002OP\b", - "\u0007\u0002\u0002P\u000e\u0003\u0002\u0002\u0002QR\t\u0005\u0002\u0002", - "RS\u0003\u0002\u0002\u0002ST\b\b\u0002\u0002T\u0010\u0003\u0002\u0002", - "\u0002\n\u0002(/8<@HM\u0003\b\u0002\u0002"].join(""); + "\u0003\u0002\u0003\u0002\u0003\u0002\u0003\u0002\u0003\u0002\u0003\u0002", + "\u0003\u0002\u0003\u0002\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003", + "\u0005\u0003\"\n\u0003\u0003\u0004\u0003\u0004\u0003\u0005\u0006\u0005", + "\'\n\u0005\r\u0005\u000e\u0005(\u0003\u0006\u0003\u0006\u0003\u0006", + "\u0003\u0006\u0007\u0006/\n\u0006\f\u0006\u000e\u00062\u000b\u0006\u0003", + "\u0006\u0005\u00065\n\u0006\u0003\u0006\u0003\u0006\u0005\u00069\n\u0006", + "\u0003\u0006\u0003\u0006\u0003\u0006\u0003\u0006\u0007\u0006?\n\u0006", + "\f\u0006\u000e\u0006B\u000b\u0006\u0003\u0006\u0003\u0006\u0005\u0006", + "F\n\u0006\u0003\u0006\u0003\u0006\u0003\u0007\u0003\u0007\u0003\u0007", + "\u0003\u0007\u0003@\u0002\b\u0003\u0003\u0005\u0004\u0007\u0005\t\u0006", + "\u000b\u0007\r\b\u0003\u0002\u0005\u0006\u00022;C\\aac|\u0004\u0002", + "\f\f\u000f\u000f\u0005\u0002\u000b\f\u000f\u000f\"\"\u0002S\u0002\u0003", + "\u0003\u0002\u0002\u0002\u0002\u0005\u0003\u0002\u0002\u0002\u0002\u0007", + "\u0003\u0002\u0002\u0002\u0002\t\u0003\u0002\u0002\u0002\u0002\u000b", + "\u0003\u0002\u0002\u0002\u0002\r\u0003\u0002\u0002\u0002\u0003\u000f", + "\u0003\u0002\u0002\u0002\u0005!\u0003\u0002\u0002\u0002\u0007#\u0003", + "\u0002\u0002\u0002\t&\u0003\u0002\u0002\u0002\u000bE\u0003\u0002\u0002", + "\u0002\rI\u0003\u0002\u0002\u0002\u000f\u0010\u0007%\u0002\u0002\u0010", + "\u0011\u0007u\u0002\u0002\u0011\u0012\u0007v\u0002\u0002\u0012\u0013", + "\u0007c\u0002\u0002\u0013\u0014\u0007t\u0002\u0002\u0014\u0015\u0007", + "v\u0002\u0002\u0015\u0016\u0007a\u0002\u0002\u0016\u0017\u0007u\u0002", + "\u0002\u0017\u0018\u0007{\u0002\u0002\u0018\u0019\u0007o\u0002\u0002", + "\u0019\u001a\u0007d\u0002\u0002\u001a\u001b\u0007q\u0002\u0002\u001b", + "\u001c\u0007n\u0002\u0002\u001c\u0004\u0003\u0002\u0002\u0002\u001d", + "\u001e\u0007/\u0002\u0002\u001e\"\u0007@\u0002\u0002\u001f \u0007?\u0002", + "\u0002 \"\u0007@\u0002\u0002!\u001d\u0003\u0002\u0002\u0002!\u001f\u0003", + "\u0002\u0002\u0002\"\u0006\u0003\u0002\u0002\u0002#$\u0007=\u0002\u0002", + "$\b\u0003\u0002\u0002\u0002%\'\t\u0002\u0002\u0002&%\u0003\u0002\u0002", + "\u0002\'(\u0003\u0002\u0002\u0002(&\u0003\u0002\u0002\u0002()\u0003", + "\u0002\u0002\u0002)\n\u0003\u0002\u0002\u0002*+\u00071\u0002\u0002+", + ",\u00071\u0002\u0002,0\u0003\u0002\u0002\u0002-/\n\u0003\u0002\u0002", + ".-\u0003\u0002\u0002\u0002/2\u0003\u0002\u0002\u00020.\u0003\u0002\u0002", + "\u000201\u0003\u0002\u0002\u000218\u0003\u0002\u0002\u000220\u0003\u0002", + "\u0002\u000235\u0007\u000f\u0002\u000243\u0003\u0002\u0002\u000245\u0003", + "\u0002\u0002\u000256\u0003\u0002\u0002\u000269\u0007\f\u0002\u00027", + "9\u0007\u0002\u0002\u000384\u0003\u0002\u0002\u000287\u0003\u0002\u0002", + "\u00029F\u0003\u0002\u0002\u0002:;\u00071\u0002\u0002;<\u0007,\u0002", + "\u0002<@\u0003\u0002\u0002\u0002=?\u000b\u0002\u0002\u0002>=\u0003\u0002", + "\u0002\u0002?B\u0003\u0002\u0002\u0002@A\u0003\u0002\u0002\u0002@>\u0003", + "\u0002\u0002\u0002AC\u0003\u0002\u0002\u0002B@\u0003\u0002\u0002\u0002", + "CD\u0007,\u0002\u0002DF\u00071\u0002\u0002E*\u0003\u0002\u0002\u0002", + "E:\u0003\u0002\u0002\u0002FG\u0003\u0002\u0002\u0002GH\b\u0006\u0002", + "\u0002H\f\u0003\u0002\u0002\u0002IJ\t\u0004\u0002\u0002JK\u0003\u0002", + "\u0002\u0002KL\b\u0007\u0002\u0002L\u000e\u0003\u0002\u0002\u0002\n", + "\u0002!(048@E\u0003\b\u0002\u0002"].join(""); var atn = new antlr4.atn.ATNDeserializer().deserialize(serializedATN); @@ -82,25 +78,24 @@ grammarlangLexer.EOF = antlr4.Token.EOF; grammarlangLexer.START_SYMBOL_KEYWORD = 1; grammarlangLexer.ASSIGN = 2; grammarlangLexer.SEMICOLON = 3; -grammarlangLexer.NONTERMINAL = 4; -grammarlangLexer.TERMINAL = 5; -grammarlangLexer.COMMENT = 6; -grammarlangLexer.WS = 7; +grammarlangLexer.SYMBOL = 4; +grammarlangLexer.COMMENT = 5; +grammarlangLexer.WS = 6; grammarlangLexer.prototype.channelNames = [ "DEFAULT_TOKEN_CHANNEL", "HIDDEN" ]; grammarlangLexer.prototype.modeNames = [ "DEFAULT_MODE" ]; -grammarlangLexer.prototype.literalNames = [ null, "'_start_symbol'", "'->'", +grammarlangLexer.prototype.literalNames = [ null, "'#start_symbol'", null, "';'" ]; grammarlangLexer.prototype.symbolicNames = [ null, "START_SYMBOL_KEYWORD", - "ASSIGN", "SEMICOLON", "NONTERMINAL", - "TERMINAL", "COMMENT", "WS" ]; + "ASSIGN", "SEMICOLON", "SYMBOL", + "COMMENT", "WS" ]; grammarlangLexer.prototype.ruleNames = [ "START_SYMBOL_KEYWORD", "ASSIGN", - "SEMICOLON", "NONTERMINAL", "TERMINAL", - "COMMENT", "WS" ]; + "SEMICOLON", "SYMBOL", "COMMENT", + "WS" ]; grammarlangLexer.prototype.grammarFileName = "grammarlang.g4"; diff --git a/grammarlang/grammarlangLexer.tokens b/grammarlang/grammarlangLexer.tokens index d9f43f4..a4c61f1 100644 --- a/grammarlang/grammarlangLexer.tokens +++ b/grammarlang/grammarlangLexer.tokens @@ -1,10 +1,8 @@ START_SYMBOL_KEYWORD=1 ASSIGN=2 SEMICOLON=3 -NONTERMINAL=4 -TERMINAL=5 -COMMENT=6 -WS=7 -'_start_symbol'=1 -'->'=2 +SYMBOL=4 +COMMENT=5 +WS=6 +'#start_symbol'=1 ';'=3 diff --git a/grammarlang/grammarlangParser.js b/grammarlang/grammarlangParser.js index 4c60e97..3fc2763 100644 --- a/grammarlang/grammarlangParser.js +++ b/grammarlang/grammarlangParser.js @@ -6,30 +6,30 @@ var grammarFileName = "grammarlang.g4"; var serializedATN = ["\u0003\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964", - "\u0003\t(\u0004\u0002\t\u0002\u0004\u0003\t\u0003\u0004\u0004\t\u0004", + "\u0003\b(\u0004\u0002\t\u0002\u0004\u0003\t\u0003\u0004\u0004\t\u0004", "\u0004\u0005\t\u0005\u0004\u0006\t\u0006\u0003\u0002\u0005\u0002\u000e", "\n\u0002\u0003\u0002\u0006\u0002\u0011\n\u0002\r\u0002\u000e\u0002\u0012", "\u0003\u0002\u0003\u0002\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003", "\u0003\u0004\u0003\u0004\u0003\u0004\u0003\u0004\u0003\u0004\u0003\u0005", "\u0003\u0005\u0003\u0006\u0007\u0006#\n\u0006\f\u0006\u000e\u0006&\u000b", - "\u0006\u0003\u0006\u0002\u0002\u0007\u0002\u0004\u0006\b\n\u0002\u0003", - "\u0003\u0002\u0006\u0007\u0002%\u0002\r\u0003\u0002\u0002\u0002\u0004", - "\u0016\u0003\u0002\u0002\u0002\u0006\u001a\u0003\u0002\u0002\u0002\b", - "\u001f\u0003\u0002\u0002\u0002\n$\u0003\u0002\u0002\u0002\f\u000e\u0005", - "\u0004\u0003\u0002\r\f\u0003\u0002\u0002\u0002\r\u000e\u0003\u0002\u0002", - "\u0002\u000e\u0010\u0003\u0002\u0002\u0002\u000f\u0011\u0005\u0006\u0004", - "\u0002\u0010\u000f\u0003\u0002\u0002\u0002\u0011\u0012\u0003\u0002\u0002", - "\u0002\u0012\u0010\u0003\u0002\u0002\u0002\u0012\u0013\u0003\u0002\u0002", - "\u0002\u0013\u0014\u0003\u0002\u0002\u0002\u0014\u0015\u0007\u0002\u0002", - "\u0003\u0015\u0003\u0003\u0002\u0002\u0002\u0016\u0017\u0007\u0003\u0002", - "\u0002\u0017\u0018\u0007\u0006\u0002\u0002\u0018\u0019\u0007\u0005\u0002", - "\u0002\u0019\u0005\u0003\u0002\u0002\u0002\u001a\u001b\u0005\b\u0005", - "\u0002\u001b\u001c\u0007\u0004\u0002\u0002\u001c\u001d\u0005\n\u0006", - "\u0002\u001d\u001e\u0007\u0005\u0002\u0002\u001e\u0007\u0003\u0002\u0002", - "\u0002\u001f \u0007\u0006\u0002\u0002 \t\u0003\u0002\u0002\u0002!#\t", - "\u0002\u0002\u0002\"!\u0003\u0002\u0002\u0002#&\u0003\u0002\u0002\u0002", - "$\"\u0003\u0002\u0002\u0002$%\u0003\u0002\u0002\u0002%\u000b\u0003\u0002", - "\u0002\u0002&$\u0003\u0002\u0002\u0002\u0005\r\u0012$"].join(""); + "\u0006\u0003\u0006\u0002\u0002\u0007\u0002\u0004\u0006\b\n\u0002\u0002", + "\u0002%\u0002\r\u0003\u0002\u0002\u0002\u0004\u0016\u0003\u0002\u0002", + "\u0002\u0006\u001a\u0003\u0002\u0002\u0002\b\u001f\u0003\u0002\u0002", + "\u0002\n$\u0003\u0002\u0002\u0002\f\u000e\u0005\u0004\u0003\u0002\r", + "\f\u0003\u0002\u0002\u0002\r\u000e\u0003\u0002\u0002\u0002\u000e\u0010", + "\u0003\u0002\u0002\u0002\u000f\u0011\u0005\u0006\u0004\u0002\u0010\u000f", + "\u0003\u0002\u0002\u0002\u0011\u0012\u0003\u0002\u0002\u0002\u0012\u0010", + "\u0003\u0002\u0002\u0002\u0012\u0013\u0003\u0002\u0002\u0002\u0013\u0014", + "\u0003\u0002\u0002\u0002\u0014\u0015\u0007\u0002\u0002\u0003\u0015\u0003", + "\u0003\u0002\u0002\u0002\u0016\u0017\u0007\u0003\u0002\u0002\u0017\u0018", + "\u0007\u0006\u0002\u0002\u0018\u0019\u0007\u0005\u0002\u0002\u0019\u0005", + "\u0003\u0002\u0002\u0002\u001a\u001b\u0005\b\u0005\u0002\u001b\u001c", + "\u0007\u0004\u0002\u0002\u001c\u001d\u0005\n\u0006\u0002\u001d\u001e", + "\u0007\u0005\u0002\u0002\u001e\u0007\u0003\u0002\u0002\u0002\u001f ", + "\u0007\u0006\u0002\u0002 \t\u0003\u0002\u0002\u0002!#\u0007\u0006\u0002", + "\u0002\"!\u0003\u0002\u0002\u0002#&\u0003\u0002\u0002\u0002$\"\u0003", + "\u0002\u0002\u0002$%\u0003\u0002\u0002\u0002%\u000b\u0003\u0002\u0002", + "\u0002&$\u0003\u0002\u0002\u0002\u0005\r\u0012$"].join(""); var atn = new antlr4.atn.ATNDeserializer().deserialize(serializedATN); @@ -38,10 +38,10 @@ var decisionsToDFA = atn.decisionToState.map( function(ds, index) { return new a var sharedContextCache = new antlr4.PredictionContextCache(); -var literalNames = [ null, "'_start_symbol'", "'->'", "';'" ]; +var literalNames = [ null, "'#start_symbol'", null, "';'" ]; var symbolicNames = [ null, "START_SYMBOL_KEYWORD", "ASSIGN", "SEMICOLON", - "NONTERMINAL", "TERMINAL", "COMMENT", "WS" ]; + "SYMBOL", "COMMENT", "WS" ]; var ruleNames = [ "rulelist", "start_symbol", "rule_", "l", "r" ]; @@ -67,10 +67,9 @@ grammarlangParser.EOF = antlr4.Token.EOF; grammarlangParser.START_SYMBOL_KEYWORD = 1; grammarlangParser.ASSIGN = 2; grammarlangParser.SEMICOLON = 3; -grammarlangParser.NONTERMINAL = 4; -grammarlangParser.TERMINAL = 5; -grammarlangParser.COMMENT = 6; -grammarlangParser.WS = 7; +grammarlangParser.SYMBOL = 4; +grammarlangParser.COMMENT = 5; +grammarlangParser.WS = 6; grammarlangParser.RULE_rulelist = 0; grammarlangParser.RULE_start_symbol = 1; @@ -155,7 +154,7 @@ grammarlangParser.prototype.rulelist = function() { this.state = 16; this._errHandler.sync(this); _la = this._input.LA(1); - } while(_la===grammarlangParser.NONTERMINAL); + } while(_la===grammarlangParser.SYMBOL); this.state = 18; this.match(grammarlangParser.EOF); } catch (re) { @@ -193,8 +192,8 @@ Start_symbolContext.prototype.START_SYMBOL_KEYWORD = function() { return this.getToken(grammarlangParser.START_SYMBOL_KEYWORD, 0); }; -Start_symbolContext.prototype.NONTERMINAL = function() { - return this.getToken(grammarlangParser.NONTERMINAL, 0); +Start_symbolContext.prototype.SYMBOL = function() { + return this.getToken(grammarlangParser.SYMBOL, 0); }; Start_symbolContext.prototype.SEMICOLON = function() { @@ -227,7 +226,7 @@ grammarlangParser.prototype.start_symbol = function() { this.state = 20; this.match(grammarlangParser.START_SYMBOL_KEYWORD); this.state = 21; - this.match(grammarlangParser.NONTERMINAL); + this.match(grammarlangParser.SYMBOL); this.state = 22; this.match(grammarlangParser.SEMICOLON); } catch (re) { @@ -339,8 +338,8 @@ function LContext(parser, parent, invokingState) { LContext.prototype = Object.create(antlr4.ParserRuleContext.prototype); LContext.prototype.constructor = LContext; -LContext.prototype.NONTERMINAL = function() { - return this.getToken(grammarlangParser.NONTERMINAL, 0); +LContext.prototype.SYMBOL = function() { + return this.getToken(grammarlangParser.SYMBOL, 0); }; LContext.prototype.enterRule = function(listener) { @@ -367,7 +366,7 @@ grammarlangParser.prototype.l = function() { try { this.enterOuterAlt(localctx, 1); this.state = 29; - this.match(grammarlangParser.NONTERMINAL); + this.match(grammarlangParser.SYMBOL); } catch (re) { if(re instanceof antlr4.error.RecognitionException) { localctx.exception = re; @@ -399,26 +398,14 @@ function RContext(parser, parent, invokingState) { RContext.prototype = Object.create(antlr4.ParserRuleContext.prototype); RContext.prototype.constructor = RContext; -RContext.prototype.NONTERMINAL = function(i) { +RContext.prototype.SYMBOL = function(i) { if(i===undefined) { i = null; } if(i===null) { - return this.getTokens(grammarlangParser.NONTERMINAL); + return this.getTokens(grammarlangParser.SYMBOL); } else { - return this.getToken(grammarlangParser.NONTERMINAL, i); - } -}; - - -RContext.prototype.TERMINAL = function(i) { - if(i===undefined) { - i = null; - } - if(i===null) { - return this.getTokens(grammarlangParser.TERMINAL); - } else { - return this.getToken(grammarlangParser.TERMINAL, i); + return this.getToken(grammarlangParser.SYMBOL, i); } }; @@ -450,16 +437,9 @@ grammarlangParser.prototype.r = function() { this.state = 34; this._errHandler.sync(this); _la = this._input.LA(1); - while(_la===grammarlangParser.NONTERMINAL || _la===grammarlangParser.TERMINAL) { + while(_la===grammarlangParser.SYMBOL) { this.state = 31; - _la = this._input.LA(1); - if(!(_la===grammarlangParser.NONTERMINAL || _la===grammarlangParser.TERMINAL)) { - this._errHandler.recoverInline(this); - } - else { - this._errHandler.reportMatch(this); - this.consume(); - } + this.match(grammarlangParser.SYMBOL); this.state = 36; this._errHandler.sync(this); _la = this._input.LA(1); diff --git a/src/errors.js b/src/errors.js index e03b4d1..f14bb6c 100644 --- a/src/errors.js +++ b/src/errors.js @@ -1,5 +1,7 @@ class LexerError extends Error { } class ParserError extends Error { } +class StartSymbolNotFound extends ParserError { } -module.exports.LexerError = LexerError; -module.exports.ParserError = ParserError; +module.exports = Object.freeze({ + LexerError, ParserError, StartSymbolNotFound +}); \ No newline at end of file diff --git a/src/index.js b/src/index.js index 289575e..9b4457e 100644 --- a/src/index.js +++ b/src/index.js @@ -1,4 +1,8 @@ -module.exports.ll1 = require('./ll1'); -module.exports.parser = require('./parser'); -module.exports.GrammarlangLexer = require('../grammarlang/grammarlangLexer').grammarlangLexer; -module.exports.GrammarlangParser = require('../grammarlang/grammarlangParser').grammarlangParser; +const ll1 = require('./ll1'); +const parser = require('./parser'); +const GrammarlangLexer = require('../grammarlang/grammarlangLexer').grammarlangLexer; +const GrammarlangParser = require('../grammarlang/grammarlangParser').grammarlangParser; + +module.exports = Object.freeze({ + ll1, parser, GrammarlangLexer, GrammarlangParser +}) diff --git a/src/ll1.js b/src/ll1.js index 1e31c78..cf38310 100644 --- a/src/ll1.js +++ b/src/ll1.js @@ -1,41 +1,27 @@ -const GrammarlangLexer = require('../grammarlang/grammarlangLexer').grammarlangLexer; +const parser = require('./parser'); const errors = require('./errors'); -function getNonTerminals(grammar) { - return Object.keys(grammar).filter(v => !v.startsWith('_')) -} - -function getProductions(nonTerminals,grammar){ - let count=0; - nonTerminals.forEach(l => { - grammar[l].forEach(rule => { - count++; - }); - }); - return count; -} - -function calculateNullables(grammar) { - - const nonTerminals = getNonTerminals(grammar); +function calculateNullables(input) { + const grammar = input.grammar; + const nonTerminals = input.nonTerminals; const nullableRules = {}; const nullableNonTerminals = {}; let doLoop = true; - let remainingCycles = getProductions(nonTerminals,grammar); + let remainingCycles = input.rulesNumber; nonTerminals.forEach(l => nullableRules[l] = []); while (doLoop) { - if(remainingCycles<0){ - let involvedNT="" - nonTerminals.forEach(l=>{ + if (remainingCycles < 0) { + let involvedNT = "" + nonTerminals.forEach(l => { if (nullableNonTerminals[l] === undefined) - involvedNT+= (involvedNT.length>=0?" and ":"") + l + involvedNT += (involvedNT.length >= 0 ? " and " : "") + l }); throw new errors.ParserError(`Loop detected. The non terminals involved are ${involvedNT}`); } doLoop = false; - remainingCycles-=1; + remainingCycles -= 1; nonTerminals.forEach(l => { grammar[l].forEach((rule, index) => { @@ -63,16 +49,13 @@ function calculateNullables(grammar) { } - return { - nullableRules: nullableRules, - nullableNonTerminals: nullableNonTerminals - } + return { nullableRules, nullableNonTerminals } } function ruleIsNullable(rule, nullableNonTerminals) { let currentResult = true; for (const item of rule) { - if (item.type === GrammarlangLexer.TERMINAL) { + if (item.type === parser.TERMINAL) { return false; } if (nullableNonTerminals[item.value] === false) { @@ -85,19 +68,20 @@ function ruleIsNullable(rule, nullableNonTerminals) { return currentResult; } -function initializeFirstSets(grammar) { +function initializeFirstSets(input) { + const grammar = input.grammar; const result = {}; - const nullableNonTerminals = calculateNullables(grammar).nullableNonTerminals; // TODO reuse precomputed values - getNonTerminals(grammar).forEach(l => { + const nullableNonTerminals = calculateNullables(input).nullableNonTerminals; // TODO reuse precomputed values + input.nonTerminals.forEach(l => { result[l] = []; grammar[l].forEach((r, index) => { result[l][index] = [[]]; for (const item of r) { - if (item.type === GrammarlangLexer.TERMINAL) { + if (item.type === parser.TERMINAL) { result[l][index][0].push(item.value); return; } - if (item.type === GrammarlangLexer.NONTERMINAL && !nullableNonTerminals[item.value]) { + if (item.type === parser.NONTERMINAL && !nullableNonTerminals[item.value]) { return; } } @@ -107,15 +91,16 @@ function initializeFirstSets(grammar) { return result; } -function calculateFirstSetsDependencies(grammar) { +function calculateFirstSetsDependencies(input) { + const grammar = input.grammar; const result = {}; - const nullableNonTerminals = calculateNullables(grammar).nullableNonTerminals; // TODO reuse precomputed values - getNonTerminals(grammar).forEach(l => { + const nullableNonTerminals = calculateNullables(input).nullableNonTerminals; // TODO reuse precomputed values + input.nonTerminals.forEach(l => { result[l] = []; grammar[l].forEach((r, index) => { result[l][index] = new Set(); for (const item of r) { - if (item.type === GrammarlangLexer.TERMINAL) { + if (item.type === parser.TERMINAL) { break; } else { result[l][index].add(item.value); @@ -137,9 +122,9 @@ function getAggregateFirstSet(set, nonTerminal, index) { return result; } -function calculateFirstSets(grammar) { - const firstSets = initializeFirstSets(grammar); - const depencencies = calculateFirstSetsDependencies(grammar); +function calculateFirstSets(input) { + const firstSets = initializeFirstSets(input); + const depencencies = calculateFirstSetsDependencies(input); let doLoop = true; let loopIndex = 0; @@ -167,24 +152,24 @@ function calculateFirstSets(grammar) { return firstSets; } -function calculateFollowSetDependencies(grammar, axiom = 'S') //First run for follow sets: gets non terminals and terminals next to each non terminal +function calculateFollowSetDependencies(input, axiom = 'S') //First run for follow sets: gets non terminals and terminals next to each non terminal { + const grammar = input.grammar; var follow_nonTerminals = {} var follow_terminals = {} - getNonTerminals(grammar).forEach(it => { + input.nonTerminals.forEach(it => { follow_nonTerminals[it] = []; follow_terminals[it] = [[]]; }); follow_terminals[axiom][0].push("↙"); - getNonTerminals(grammar).forEach(l => { - getNonTerminals(grammar).forEach(f => { + input.nonTerminals.forEach(l => { + input.nonTerminals.forEach(f => { grammar[f].forEach(r => { var pushNext = false; //if true, the item that comes next is in the follow set of l - //var lastNT = undefined; for (const item of r) { if (pushNext) { - if (item.type === GrammarlangLexer.NONTERMINAL) { - const tmp_itemInits = calculateFirstSets(grammar)[item.value]; + if (item.type === parser.NONTERMINAL) { + const tmp_itemInits = calculateFirstSets(input)[item.value]; tmp_itemInits.forEach(x => { const tmp_follows = x[x.length - 1]; tmp_follows.forEach(t => { @@ -194,11 +179,11 @@ function calculateFollowSetDependencies(grammar, axiom = 'S') //First run for fo }); }); - if (calculateNullables(grammar).nullableNonTerminals[item.value] === false) { + if (calculateNullables(input).nullableNonTerminals[item.value] === false) { pushNext = false; } } - else if (item.type === GrammarlangLexer.TERMINAL) { + else if (item.type === parser.TERMINAL) { if (!follow_terminals[l][0].includes(item.value)) follow_terminals[l][0].push(item.value); pushNext = false; @@ -207,21 +192,12 @@ function calculateFollowSetDependencies(grammar, axiom = 'S') //First run for fo if (item.value === l) { pushNext = true; } - //duplicated control - TO DELETE - /*if (item.type === GrammarlangLexer.NONTERMINAL) { - lastNT = item.value; - } else { - lastNT = undefined; - }*/ } if (pushNext) { if (!follow_nonTerminals[l].includes(f)) follow_nonTerminals[l].push(f); //if I find l at the end, f's follows are inherited } - /*if (lastNT) { - if (!follow_nonTerminals[lastNT].includes(f)) - follow_nonTerminals[lastNT].push(f); - }*/ + }); }); }); @@ -231,11 +207,12 @@ function calculateFollowSetDependencies(grammar, axiom = 'S') //First run for fo } } -function calculateFollowSets(grammar) { +function calculateFollowSets(input) { var followsets = {} - const axiom = grammar._start_symbol - const non_terminals = calculateFollowSetDependencies(grammar, axiom).follow_nonTerminals; - const initial_followsets = calculateFollowSetDependencies(grammar, axiom).follow_terminals; + const axiom = input.startSymbol; + const followSetsDep = calculateFollowSetDependencies(input, axiom) + const non_terminals = followSetsDep.follow_nonTerminals; + const initial_followsets = followSetsDep.follow_terminals; followsets = initial_followsets; var iteration = 0; var goahead = true; @@ -271,13 +248,14 @@ function isDifferent(obj, iter) { return ret; } -function calculateLookAheads(grammar) { +function calculateLookAheads(input) { + const grammar = input.grammar; var ret = {}; - const axiom = grammar._start_symbol - const firstSets = calculateFirstSets(grammar); - const followSets = calculateFollowSets(grammar, axiom); - const nullableRules = calculateNullables(grammar).nullableRules; - getNonTerminals(grammar).forEach(l => { + const axiom = input.startSymbol; + const firstSets = calculateFirstSets(input); + const followSets = calculateFollowSets(input, axiom); + const nullableRules = calculateNullables(input).nullableRules; + input.nonTerminals.forEach(l => { ret[l] = []; grammar[l].forEach((r, index) => { ret[l][index] = []; @@ -299,11 +277,11 @@ function calculateLookAheads(grammar) { return ret; } -function isLL1(grammar) { - const lookaheads = calculateLookAheads(grammar); +function isLL1(input) { + const lookaheads = calculateLookAheads(input); var res = true; Object.keys(lookaheads).forEach(l => { - const conf = calculateConflicts(l, grammar, lookaheads).length; + const conf = calculateConflicts(l, input, lookaheads).length; if (conf > 0) { res = false; @@ -312,11 +290,11 @@ function isLL1(grammar) { return res; } -function calculateConflicts(nonTerminal, grammar = [], lookaheads = []) { //grammar and/or followsets MUST BE passed +function calculateConflicts(nonTerminal, input = {}, lookaheads = []) { // input and/or followsets MUST BE passed var terminals = []; var ret = []; if (lookaheads == []) { - lookaheads = calculateLookAheads(grammar); + lookaheads = calculateLookAheads(input); } lookaheads[nonTerminal].forEach(r => { r.forEach(t => { @@ -332,11 +310,11 @@ function calculateConflicts(nonTerminal, grammar = [], lookaheads = []) { //gram return ret; } -function calculateAllConflicts(grammar) { - const lookaheads = calculateLookAheads(grammar); +function calculateAllConflicts(input) { + const lookaheads = calculateLookAheads(input); var res = {}; Object.keys(lookaheads).forEach(l => { - const conf = calculateConflicts(l, grammar, lookaheads); + const conf = calculateConflicts(l, input, lookaheads); res[l] = conf.slice(); }); return res; diff --git a/src/parser.js b/src/parser.js index 22b25e7..772cf8d 100644 --- a/src/parser.js +++ b/src/parser.js @@ -3,6 +3,10 @@ const GrammarlangLexer = require('../grammarlang/grammarlangLexer').grammarlangL const GrammarlangParser = require('../grammarlang/grammarlangParser').grammarlangParser; const errors = require('./errors'); +const NONTERMINAL = 0; +const TERMINAL = 1; + + class Visitor { visitChildren(ctx) { return this.visitRuleList(ctx); @@ -10,27 +14,64 @@ class Visitor { visitRuleList(ctx) { const rules = []; - let _start_symbol = 'S'; + const nonTerminals = new Set(); + let startSymbol = undefined; + let rulesNumber = 0; + ctx.children.forEach(child => { - if (child.constructor.name === 'Rule_Context') { - rules.push(this.visitRule(child)) - } else if (child.constructor.name === 'Start_symbolContext') { - _start_symbol = this.visitStartSymbol(child); + if (child.constructor.name === 'Start_symbolContext') { + startSymbol = this.visitStartSymbol(child); + + } else if (child.constructor.name === 'Rule_Context') { + rulesNumber++; + const rule = this.visitRule(child) + rules.push(rule); + nonTerminals.add(rule.l); + if (!startSymbol) { + startSymbol = rule.l; + } } }); - const result = { _start_symbol }; + if (startSymbol === undefined) { + throw new Error('Fatal error'); + } + if (!nonTerminals.has(startSymbol)) { + throw new errors.StartSymbolNotFound( + `At least one production from the start symbol '${startSymbol}' is required`); + } + + const grammar = {}; + const terminals = new Set(); + rules.forEach(rule => { - const tmp = result[rule.l] || []; - tmp.push(rule.r); - result[rule.l] = tmp; + const items = []; + + rule.r.forEach(symbol => { + const type = nonTerminals.has(symbol) ? NONTERMINAL : TERMINAL; + items.push({ type, value: symbol }); + if (type === TERMINAL) { + terminals.add(symbol); + } + }); + + const tmp = grammar[rule.l] || []; + tmp.push(items); + grammar[rule.l] = tmp; }); - return result; + + return { + grammar, + startSymbol, + rulesNumber, + terminals: Array.from(terminals).sort(), + nonTerminals: Array.from(nonTerminals).sort() + }; } visitStartSymbol(ctx) { return ctx.children - .find(child => child.symbol.type === GrammarlangParser.NONTERMINAL) + .find(child => child.symbol.type === GrammarlangParser.SYMBOL) .getText(); } @@ -52,10 +93,7 @@ class Visitor { } visitR(ctx) { - return ctx.children.map(child => ({ - type: child.symbol.type, - value: child.getText() - })); + return ctx.children.map(child => child.getText()); } } @@ -70,7 +108,7 @@ class ParserErrorListener extends antlr4.error.ErrorListener { } } -module.exports.parseString = function (input) { +const parseString = (input) => { const chars = new antlr4.InputStream(input); const lexer = new GrammarlangLexer(chars); @@ -86,3 +124,5 @@ module.exports.parseString = function (input) { const tree = parser.rulelist(); return tree.accept(new Visitor()); } + +module.exports = Object.freeze({ NONTERMINAL, TERMINAL, parseString }); \ No newline at end of file diff --git a/test/test-ll1.js b/test/test-ll1.js index 80686e1..40192a0 100644 --- a/test/test-ll1.js +++ b/test/test-ll1.js @@ -1,19 +1,23 @@ import test from 'ava'; -import { grammarlangLexer } from '../grammarlang/grammarlangLexer'; -const GrammarlangLexer = require('../grammarlang/grammarlangLexer').grammarlangLexer; +const parser = require('../src/parser'); const ll1 = require('../src/ll1'); test('calculate nullables case 1', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 1, + terminals: ['a'], + nonTerminals: ['S'], }; - t.deepEqual(ll1.calculateNullables(grammar), { + t.deepEqual(ll1.calculateNullables(input), { nullableRules: { S: [false] }, nullableNonTerminals: { S: false } }); @@ -21,17 +25,22 @@ test('calculate nullables case 1', t => { test('calculate nullables case 2', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ], - [] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + [] + ], + }, + startSymbol: 'S', + rulesNumber: 2, + terminals: ['a'], + nonTerminals: ['S'], }; - t.deepEqual(ll1.calculateNullables(grammar), { + t.deepEqual(ll1.calculateNullables(input), { nullableRules: { S: [false, true] }, nullableNonTerminals: { S: true } }); @@ -39,246 +48,287 @@ test('calculate nullables case 2', t => { test('calculate nullables case 3', t => { - const grammar = { - 'S': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'D' }] - ], - 'D': [ - [] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [{ type: parser.NONTERMINAL, value: 'D' }] + ], + 'D': [ + [] + ], + }, + startSymbol: 'S', + rulesNumber: 3, + terminals: [], + nonTerminals: ['D', 'S'], }; - t.deepEqual(ll1.calculateNullables(grammar), { + t.deepEqual(ll1.calculateNullables(input), { nullableRules: { S: [true], D: [true] }, nullableNonTerminals: { S: true, D: true } }); }); test('calculate nullables case 4', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'D' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ], - [] - ], - 'D': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'D' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'E' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'E' }, - ] - ], - 'E': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - ], - [] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'D' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + [] + ], + 'D': [ + [ + { type: parser.NONTERMINAL, value: 'D' }, + { type: parser.NONTERMINAL, value: 'E' } + ], + [ + { type: parser.TERMINAL, value: 'a' }, + ], + [ + { type: parser.NONTERMINAL, value: 'E' }, + ] + ], + 'E': [ + [ + { type: parser.TERMINAL, value: 'b' }, + ], + [] + ], + }, + startSymbol: 'S', + rulesNumber: 7, + terminals: ['a', 'b'], + nonTerminals: ['D', 'E', 'S'], }; - t.deepEqual(ll1.calculateNullables(grammar), { + t.deepEqual(ll1.calculateNullables(input), { nullableRules: { S: [true, true], D: [true, false, true], E: [false, true] }, nullableNonTerminals: { S: true, D: true, E: true } }); }); test('calculate nullables case 5', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'T' } - ] - ], - 'T': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'c' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'c' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'q' } - ] - ], - '_start_symbol': 'S', - + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'T' } + ] + ], + 'T': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.TERMINAL, value: 'b' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'b' } + ], + [ + { type: parser.TERMINAL, value: 'c' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'c' } + ], + [ + { type: parser.TERMINAL, value: 'q' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 5, + terminals: ['a', 'b', 'c', 'q'], + nonTerminals: ['S', 'T'], }; - t.deepEqual(ll1.calculateNullables(grammar), { + t.deepEqual(ll1.calculateNullables(input), { nullableRules: { S: [false], T: [false, false, false, false] }, nullableNonTerminals: { S: false, T: false } }); - }); + test('calculate nullables case 6', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'SS' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'RULE' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'RULELIST' } - ] - ], - 'SS': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'ssk' }, - { type: GrammarlangLexer.TERMINAL, value: 'nt' }, - { type: GrammarlangLexer.TERMINAL, value: 'semicolon' } - ], - [] - ], - 'RULELIST': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'RULE' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'RULELIST' }, - ], - [] - ], - 'RULE': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'L' }, - { type: GrammarlangLexer.TERMINAL, value: 'assign' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'R' }, - { type: GrammarlangLexer.TERMINAL, value: 'semicolon' } - ] - ], - 'L': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'nt' } - ] - ], - 'R': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'nt' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'R' }, - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 't' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'R' } - ], - [ - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'SS' }, + { type: parser.NONTERMINAL, value: 'RULE' }, + { type: parser.NONTERMINAL, value: 'RULELIST' } + ] + ], + 'SS': [ + [ + { type: parser.TERMINAL, value: 'ssk' }, + { type: parser.TERMINAL, value: 'nt' }, + { type: parser.TERMINAL, value: 'semicolon' } + ], + [] + ], + 'RULELIST': [ + [ + { type: parser.NONTERMINAL, value: 'RULE' }, + { type: parser.NONTERMINAL, value: 'RULELIST' }, + ], + [] + ], + 'RULE': [ + [ + { type: parser.NONTERMINAL, value: 'L' }, + { type: parser.TERMINAL, value: 'assign' }, + { type: parser.NONTERMINAL, value: 'R' }, + { type: parser.TERMINAL, value: 'semicolon' } + ] + ], + 'L': [ + [ + { type: parser.TERMINAL, value: 'nt' } + ] + ], + 'R': [ + [ + { type: parser.TERMINAL, value: 'nt' }, + { type: parser.NONTERMINAL, value: 'R' }, + ], + [ + { type: parser.TERMINAL, value: 't' }, + { type: parser.NONTERMINAL, value: 'R' } + ], + [ + ] + ], + }, + startSymbol: 'S', + rulesNumber: 10, + terminals: ['nt', 'semicolon', 'ssk', 't'], + nonTerminals: ['L', 'S', 'R', 'RULE', 'RULELIST', 'S', 'SS'], }; - t.deepEqual(ll1.calculateNullables(grammar), { + t.deepEqual(ll1.calculateNullables(input), { nullableRules: { S: [false], SS: [false, true], RULELIST: [false, true], RULE: [false], L: [false], R: [false, false, true] }, nullableNonTerminals: { S: false, SS: true, RULELIST: true, RULE: false, L: false, R: true } }); }); + + test('initialize first sets case 1', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } + const input = { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ] ] - ], - '_start_symbol': 'S', + }, + startSymbol: 'S', + rulesNumber: 2, + terminals: ['a'], + nonTerminals: ['S'], }; - t.deepEqual(ll1.initializeFirstSets(grammar), { + t.deepEqual(ll1.initializeFirstSets(input), { 'S': [[['a']]] }); }); test('initialize first sets case 2', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'D' } - ] - ], - 'D': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + [ + { type: parser.NONTERMINAL, value: 'D' } + ] + ], + 'D': [ + [ + { type: parser.TERMINAL, value: 'b' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 3, + terminals: ['a', 'b'], + nonTerminals: ['D', 'S'], }; - t.deepEqual(ll1.initializeFirstSets(grammar), { + t.deepEqual(ll1.initializeFirstSets(input), { 'S': [[['a']], [[]]], 'D': [[['b']]] }); }); test('initialize first sets case 3', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'D' } - ] - ], - 'D': [ - [] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + [ + { type: parser.NONTERMINAL, value: 'D' } + ] + ], + 'D': [ + [] + ], + }, + startSymbol: 'S', + rulesNumber: 3, + terminals: ['a'], + nonTerminals: ['D', 'S'], }; - t.deepEqual(ll1.initializeFirstSets(grammar), { + t.deepEqual(ll1.initializeFirstSets(input), { 'S': [[['a']], [[]]], 'D': [[[]]] }); }); test('initialize first sets case 4', t => { - const grammar = { - 'S': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'A' }] - ], - 'A': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' } - ], - [] - ], - 'Z': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'y' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [{ type: parser.NONTERMINAL, value: 'A' }] + ], + 'A': [ + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'b' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' } + ], + [] + ], + 'Z': [ + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'y' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 8, + terminals: ['a', 'b', 'x', 'y'], + nonTerminals: ['A', 'S', 'Z'], }; - t.deepEqual(ll1.initializeFirstSets(grammar), { + t.deepEqual(ll1.initializeFirstSets(input), { 'S': [[[]]], 'A': [ [['a']], @@ -294,60 +344,67 @@ test('initialize first sets case 4', t => { }); }); - - test('calculate first sets dependencies case 1', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'D' } - ] - ], - 'D': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + [ + { type: parser.NONTERMINAL, value: 'D' } + ] + ], + 'D': [ + [ + { type: parser.TERMINAL, value: 'b' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 3, + terminals: ['a', 'b'], + nonTerminals: ['D', 'S'], }; - t.deepEqual(ll1.calculateFirstSetsDependencies(grammar), { + t.deepEqual(ll1.calculateFirstSetsDependencies(input), { 'S': [new Set(), new Set(['D'])], 'D': [new Set()] }); }); - test('calculate first sets dependencies case 2', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'S' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'E' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'D' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'D' } - ], - [] - ], - 'D': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' } - ] - ], - 'E': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'c' } - ], - [] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'S' }, + { type: parser.NONTERMINAL, value: 'E' }, + { type: parser.NONTERMINAL, value: 'D' } + ], + [ + { type: parser.NONTERMINAL, value: 'D' } + ], + [] + ], + 'D': [ + [ + { type: parser.TERMINAL, value: 'b' } + ] + ], + 'E': [ + [ + { type: parser.TERMINAL, value: 'c' } + ], + [] + ], + }, + startSymbol: 'S', + rulesNumber: 6, + terminals: ['b', 'c'], + nonTerminals: ['D', 'E', 'S'], }; - t.deepEqual(ll1.calculateFirstSetsDependencies(grammar), { + t.deepEqual(ll1.calculateFirstSetsDependencies(input), { 'S': [new Set(['S', 'E', 'D']), new Set(['D']), new Set([])], 'D': [new Set([])], 'E': [new Set([]), new Set([])] @@ -355,24 +412,29 @@ test('calculate first sets dependencies case 2', t => { }); test('calculate first sets case 1', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'D' } - ] - ], - 'D': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + [ + { type: parser.NONTERMINAL, value: 'D' } + ] + ], + 'D': [ + [ + { type: parser.TERMINAL, value: 'b' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 3, + terminals: ['a', 'b'], + nonTerminals: ['D', 'S'], }; - t.deepEqual(ll1.calculateFirstSets(grammar), { + t.deepEqual(ll1.calculateFirstSets(input), { 'S': [ [['a'], ['a'], ['a']], [[], ['b'], ['b']], @@ -384,35 +446,40 @@ test('calculate first sets case 1', t => { }); test('calculate first sets case 2', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'D' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ], - [] - ], - 'D': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'D' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'E' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'E' }, - ] - ], - 'E': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - ], - [] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'D' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + [] + ], + 'D': [ + [ + { type: parser.NONTERMINAL, value: 'D' }, + { type: parser.NONTERMINAL, value: 'E' } + ], + [ + { type: parser.TERMINAL, value: 'a' }, + ], + [ + { type: parser.NONTERMINAL, value: 'E' }, + ] + ], + 'E': [ + [ + { type: parser.TERMINAL, value: 'b' }, + ], + [] + ], + }, + startSymbol: 'S', + rulesNumber: 7, + terminals: ['a', 'b'], + nonTerminals: ['D', 'E', 'S'], }; - t.deepEqual(ll1.calculateFirstSets(grammar), { + t.deepEqual(ll1.calculateFirstSets(input), { 'S': [ [[], ['a'], ['a', 'b'], ['a', 'b']], [[], [], [], []] @@ -431,42 +498,47 @@ test('calculate first sets case 2', t => { test('calculate first sets case 3', t => { - const grammar = { - 'S': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'A' }] - ], - 'A': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' } - ], - [] - ], - 'Z': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'y' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [{ type: parser.NONTERMINAL, value: 'A' }] + ], + 'A': [ + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'b' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' } + ], + [] + ], + 'Z': [ + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'y' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 8, + terminals: ['a', 'b', 'x', 'y'], + nonTerminals: ['A', 'S', 'Z'], }; - t.deepEqual(ll1.calculateFirstSets(grammar), { + t.deepEqual(ll1.calculateFirstSets(input), { 'S': [ [ [], @@ -529,35 +601,40 @@ test('calculate first sets case 3', t => { }); test('calculate follow sets case 1', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'T' } - ] - ], - 'T': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'c' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'c' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'q' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'T' } + ] + ], + 'T': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.TERMINAL, value: 'b' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'b' } + ], + [ + { type: parser.TERMINAL, value: 'c' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'c' } + ], + [ + { type: parser.TERMINAL, value: 'q' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 5, + terminals: ['a', 'b', 'c', 'q'], + nonTerminals: ['S', 'T'], }; - t.deepEqual(ll1.calculateFollowSets(grammar), { + t.deepEqual(ll1.calculateFollowSets(input), { 'S': [ ['↙'], ['↙'], @@ -570,43 +647,49 @@ test('calculate follow sets case 1', t => { ], }); }); + test('calculate follow sets case 2', t => { - const grammar = { - 'S': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'A' }] - ], - 'A': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' } - ], - [] - ], - 'Z': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'y' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [{ type: parser.NONTERMINAL, value: 'A' }] + ], + 'A': [ + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'b' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' } + ], + [] + ], + 'Z': [ + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'y' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 8, + terminals: ['a', 'b', 'x', 'y'], + nonTerminals: ['A', 'S', 'Z'], }; - t.deepEqual(ll1.calculateFollowSets(grammar), { + t.deepEqual(ll1.calculateFollowSets(input), { 'S': [ ['↙'], ['x', 'y', '↙',], @@ -627,46 +710,52 @@ test('calculate follow sets case 2', t => { ], }); }); + test('calculate follow sets case 3', t => { - const grammar = { - 'P': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'S' }] - ], - 'S': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'A' }] - ], - 'A': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' } - ], - [] - ], - 'Z': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'y' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'P', + const input = { + grammar: { + 'P': [ + [{ type: parser.NONTERMINAL, value: 'S' }] + ], + 'S': [ + [{ type: parser.NONTERMINAL, value: 'A' }] + ], + 'A': [ + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'b' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' } + ], + [] + ], + 'Z': [ + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'y' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'P', + rulesNumber: 9, + terminals: ['a', 'b', 'x', 'y'], + nonTerminals: ['A', 'P', 'S', 'Z'], }; - t.deepEqual(ll1.calculateFollowSets(grammar), { + t.deepEqual(ll1.calculateFollowSets(input), { 'P': [ ['↙'], ['↙'], @@ -697,48 +786,54 @@ test('calculate follow sets case 3', t => { ], }); }); + test('calculate follow sets case 4', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'R' }, - { type: GrammarlangLexer.TERMINAL, value: 'i' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'R' }, - { type: GrammarlangLexer.TERMINAL, value: 'i' } - ] - ], - 'R': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'd' }, - { type: GrammarlangLexer.TERMINAL, value: 'i' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'L' }, - { type: GrammarlangLexer.TERMINAL, value: 'v' } - ] - ], - 'L': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'd' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'X' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'n' } - ] - ], - 'X': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'X' }, - { type: GrammarlangLexer.TERMINAL, value: 'n' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'R' } - ], - [ - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'R' }, + { type: parser.TERMINAL, value: 'i' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + [ + { type: parser.NONTERMINAL, value: 'R' }, + { type: parser.TERMINAL, value: 'i' } + ] + ], + 'R': [ + [ + { type: parser.TERMINAL, value: 'd' }, + { type: parser.TERMINAL, value: 'i' }, + { type: parser.NONTERMINAL, value: 'L' }, + { type: parser.TERMINAL, value: 'v' } + ] + ], + 'L': [ + [ + { type: parser.TERMINAL, value: 'd' }, + { type: parser.NONTERMINAL, value: 'X' } + ], + [ + { type: parser.TERMINAL, value: 'n' } + ] + ], + 'X': [ + [ + { type: parser.NONTERMINAL, value: 'X' }, + { type: parser.TERMINAL, value: 'n' }, + { type: parser.NONTERMINAL, value: 'R' } + ], + [ + ] + ], + }, + startSymbol: 'S', + rulesNumber: 7, + terminals: ['d', 'i', 'n', 'v'], + nonTerminals: ['L', 'R', 'S', 'X'], }; - t.deepEqual(ll1.calculateFollowSets(grammar), { + t.deepEqual(ll1.calculateFollowSets(input), { 'S': [ ['↙'], ['↙'], @@ -765,58 +860,64 @@ test('calculate follow sets case 4', t => { ] }); }); + test('calculate follow sets case 5', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'SS' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'RULE' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'RULELIST' } - ] - ], - 'SS': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'ssk' }, - { type: GrammarlangLexer.TERMINAL, value: 'nt' }, - { type: GrammarlangLexer.TERMINAL, value: 'semicolon' } - ], - [] - ], - 'RULELIST': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'RULE' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'RULELIST' }, - ], - [] - ], - 'RULE': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'L' }, - { type: GrammarlangLexer.TERMINAL, value: 'assign' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'R' }, - { type: GrammarlangLexer.TERMINAL, value: 'semicolon' } - ] - ], - 'L': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'nt' } - ] - ], - 'R': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'nt' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'R' }, - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 't' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'R' } - ], - [ - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'SS' }, + { type: parser.NONTERMINAL, value: 'RULE' }, + { type: parser.NONTERMINAL, value: 'RULELIST' } + ] + ], + 'SS': [ + [ + { type: parser.TERMINAL, value: 'ssk' }, + { type: parser.TERMINAL, value: 'nt' }, + { type: parser.TERMINAL, value: 'semicolon' } + ], + [] + ], + 'RULELIST': [ + [ + { type: parser.NONTERMINAL, value: 'RULE' }, + { type: parser.NONTERMINAL, value: 'RULELIST' }, + ], + [] + ], + 'RULE': [ + [ + { type: parser.NONTERMINAL, value: 'L' }, + { type: parser.TERMINAL, value: 'assign' }, + { type: parser.NONTERMINAL, value: 'R' }, + { type: parser.TERMINAL, value: 'semicolon' } + ] + ], + 'L': [ + [ + { type: parser.TERMINAL, value: 'nt' } + ] + ], + 'R': [ + [ + { type: parser.TERMINAL, value: 'nt' }, + { type: parser.NONTERMINAL, value: 'R' }, + ], + [ + { type: parser.TERMINAL, value: 't' }, + { type: parser.NONTERMINAL, value: 'R' } + ], + [ + ] + ], + }, + startSymbol: 'S', + rulesNumber: 10, + terminals: ['nt', 'semicolon', 'ssk', 't'], + nonTerminals: ['L', 'S', 'R', 'RULE', 'RULELIST', 'S', 'SS'], }; - t.deepEqual(ll1.calculateFollowSets(grammar), { + t.deepEqual(ll1.calculateFollowSets(input), { 'S': [ ['↙'], ['↙'], @@ -851,35 +952,40 @@ test('calculate follow sets case 5', t => { }); test('calculate look aheads case 1', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'T' } - ] - ], - 'T': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'c' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'c' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'q' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'T' } + ] + ], + 'T': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.TERMINAL, value: 'b' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'b' } + ], + [ + { type: parser.TERMINAL, value: 'c' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'c' } + ], + [ + { type: parser.TERMINAL, value: 'q' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 5, + terminals: ['a', 'b', 'c', 'q'], + nonTerminals: ['S', 'T'], }; - t.deepEqual(ll1.calculateLookAheads(grammar), { + t.deepEqual(ll1.calculateLookAheads(input), { 'S': [ ['a', 'b', 'c', 'q'] ], @@ -893,42 +999,47 @@ test('calculate look aheads case 1', t => { }); test('calculate look aheads case 2', t => { - const grammar = { - 'S': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'A' }] - ], - 'A': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' } - ], - [] - ], - 'Z': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'y' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [{ type: parser.NONTERMINAL, value: 'A' }] + ], + 'A': [ + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'b' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' } + ], + [] + ], + 'Z': [ + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'y' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 8, + terminals: ['a', 'b', 'x', 'y'], + nonTerminals: ['A', 'S', 'Z'], }; - t.deepEqual(ll1.calculateLookAheads(grammar), { + t.deepEqual(ll1.calculateLookAheads(input), { 'S': [ ['a', 'b', 'x', 'y', '↙'] ], @@ -947,45 +1058,50 @@ test('calculate look aheads case 2', t => { }); test('calculate look aheads case 3', t => { - const grammar = { - 'P': [ - [{ type: grammarlangLexer.NONTERMINAL, value: 'S' }] - ], - 'S': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'A' }] - ], - 'A': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' } - ], - [] - ], - 'Z': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'y' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'P', + const input = { + grammar: { + 'P': [ + [{ type: parser.NONTERMINAL, value: 'S' }] + ], + 'S': [ + [{ type: parser.NONTERMINAL, value: 'A' }] + ], + 'A': [ + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'b' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' } + ], + [] + ], + 'Z': [ + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'y' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'P', + rulesNumber: 9, + terminals: ['a', 'b', 'x', 'y'], + nonTerminals: ['A', 'P', 'S', 'Z'], }; - t.deepEqual(ll1.calculateLookAheads(grammar), { + t.deepEqual(ll1.calculateLookAheads(input), { 'P': [ ['a', 'b', 'x', 'y', '↙'] ], @@ -1005,117 +1121,134 @@ test('calculate look aheads case 3', t => { ], }); }); + test('calculate LL1 case 1', t => { - const grammar = { - 'S': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'T' } - ] - ], - 'T': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'c' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'T' }, - { type: GrammarlangLexer.TERMINAL, value: 'c' } - ], - [ - { type: GrammarlangLexer.TERMINAL, value: 'q' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [ + { type: parser.NONTERMINAL, value: 'T' } + ] + ], + 'T': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.TERMINAL, value: 'b' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'b' } + ], + [ + { type: parser.TERMINAL, value: 'c' }, + { type: parser.NONTERMINAL, value: 'T' }, + { type: parser.TERMINAL, value: 'c' } + ], + [ + { type: parser.TERMINAL, value: 'q' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 5, + terminals: ['a', 'b', 'c', 'q'], + nonTerminals: ['S', 'T'], }; - t.deepEqual(ll1.isLL1(grammar), true); + t.deepEqual(ll1.isLL1(input), true); }); test('calculate LL1 case 2', t => { - const grammar = { - 'S': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'A' }] - ], - 'A': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' } - ], - [] - ], - 'Z': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'y' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [{ type: parser.NONTERMINAL, value: 'A' }] + ], + 'A': [ + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'b' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' } + ], + [] + ], + 'Z': [ + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'y' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 8, + terminals: ['a', 'b', 'x', 'y'], + nonTerminals: ['A', 'S', 'Z'], }; - t.deepEqual(ll1.isLL1(grammar), false); + t.deepEqual(ll1.isLL1(input), false); }); + test('calculate conflicts case 1', t => { - const grammar = { - 'S': [ - [{ type: GrammarlangLexer.NONTERMINAL, value: 'A' }] - ], - 'A': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'A' }, - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.TERMINAL, value: 'a' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' } - ], - [] - ], - 'Z': [ - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'Z' }, - { type: GrammarlangLexer.TERMINAL, value: 'y' }, - { type: GrammarlangLexer.TERMINAL, value: 'x' } - ], - [ - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'S', + const input = { + grammar: { + 'S': [ + [{ type: parser.NONTERMINAL, value: 'A' }] + ], + 'A': [ + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'A' }, + { type: parser.TERMINAL, value: 'b' }, + { type: parser.TERMINAL, value: 'a' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' } + ], + [] + ], + 'Z': [ + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'Z' }, + { type: parser.TERMINAL, value: 'y' }, + { type: parser.TERMINAL, value: 'x' } + ], + [ + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 8, + terminals: ['a', 'b', 'x', 'y'], + nonTerminals: ['A', 'S', 'Z'], }; - t.deepEqual(ll1.calculateAllConflicts(grammar), { + t.deepEqual(ll1.calculateAllConflicts(input), { 'S': [], 'A': ['a', 'b', 'x', 'y', '↙'], 'Z': ['a', 'b', 'x', 'y'], }); -}); \ No newline at end of file +}); diff --git a/test/test-parser.js b/test/test-parser.js index d635a88..5f028f2 100644 --- a/test/test-parser.js +++ b/test/test-parser.js @@ -1,101 +1,185 @@ import test from 'ava'; -const GrammarlangLexer = require('../grammarlang/grammarlangLexer').grammarlangLexer; const parser = require('../src/parser.js'); const errors = require('../src/errors'); -// test('empty string', t => { -// t.deepEqual(parser.parseString(''), {}) -// }); - test('simple case', t => { - const grammar = `S -> a S;` - t.deepEqual(parser.parseString(grammar), { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'S' + const input = `S -> a S;` + t.deepEqual(parser.parseString(input), { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 1, + terminals: ['a'], + nonTerminals: ['S'], + }); +}); + +test('simple case 2', t => { + const input = `S => a S;` + t.deepEqual(parser.parseString(input), { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 1, + terminals: ['a'], + nonTerminals: ['S'], }) }); test('simple case with comments', t => { - const grammar = ` + const input = ` /* this is\n a multiline\n comment */\n \n S -> a S; // this is an inline comment` - t.deepEqual(parser.parseString(grammar), { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ] - ], - '_start_symbol': 'S' + t.deepEqual(parser.parseString(input), { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 1, + terminals: ['a'], + nonTerminals: ['S'], }) }); test('complex case', t => { - const grammar = ` + const input = ` S -> a D; S -> ; D -> b; - ` - t.deepEqual(parser.parseString(grammar), { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'D' } + `; + t.deepEqual(parser.parseString(input), { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'D' } + ], + [] ], - [] - ], - 'D': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' } - ] - ], - '_start_symbol': 'S' + 'D': [ + [ + { type: parser.TERMINAL, value: 'b' } + ] + ], + }, + startSymbol: 'S', + rulesNumber: 3, + terminals: ['a', 'b'], + nonTerminals: ['D', 'S'], + }) +}); + +test('complex case 2', t => { + const input = ` + s -> a d; + s -> ; + d -> b; + `; + t.deepEqual(parser.parseString(input), { + grammar: { + 's': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'd' } + ], + [] + ], + 'd': [ + [ + { type: parser.TERMINAL, value: 'b' } + ] + ], + }, + startSymbol: 's', + rulesNumber: 3, + terminals: ['a', 'b'], + nonTerminals: ['d', 's'], }) }); test('custom start symbol case', t => { - const grammar = `_start_symbol D; S -> a S; S -> ; D -> b S;` - t.deepEqual(parser.parseString(grammar), { - 'S': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'a' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } - ], - [] - ], - 'D': [ - [ - { type: GrammarlangLexer.TERMINAL, value: 'b' }, - { type: GrammarlangLexer.NONTERMINAL, value: 'S' } + const input = `#start_symbol D; S -> a S; S -> ; D -> b S;` + t.deepEqual(parser.parseString(input), { + grammar: { + 'S': [ + [ + { type: parser.TERMINAL, value: 'a' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + [] ], - ], - '_start_symbol': 'D' + 'D': [ + [ + { type: parser.TERMINAL, value: 'b' }, + { type: parser.NONTERMINAL, value: 'S' } + ], + ] + }, + startSymbol: 'D', + rulesNumber: 3, + terminals: ['a', 'b'], + nonTerminals: ['D', 'S'], }) }); test('lexer error case 1', t => { - const grammar = `_start_symbol D;\nS_ -> a S\n S -> ;\nD -> b S;\n`; - const f = () => parser.parseString(grammar); + const input = `#start_sybol D;\nS -> a S\n S -> ;\nD -> b S;\n`; + const f = () => parser.parseString(input); t.throws(f, errors.LexerError); }); test('parser error case 1', t => { - const grammar = `_start_symbol D;\nS -> a S\n S -> ;\nD -> b S;\n`; - const f = () => parser.parseString(grammar); + const input = `#start_symbol D;\nS -> a S\n S -> ;\nD -> b S;\n`; + const f = () => parser.parseString(input); t.throws(f, errors.ParserError); }); test('parser error case 2', t => { - const grammar = `S -> a S\n S -> ;\nD -> b S;\n /* unclosed comment`; - const f = () => parser.parseString(grammar); + const input = `S -> a S\n S -> ;\nD -> b S;\n /* unclosed comment`; + const f = () => parser.parseString(input); + t.throws(f, errors.ParserError); +}); + +test('parser error empty input', t => { + const input = ``; + const f = () => parser.parseString(input); + t.throws(f, errors.ParserError); +}); + +test('parser error no rules input', t => { + const input = `#start_symbol S;`; + const f = () => parser.parseString(input); t.throws(f, errors.ParserError); +}); + +test('start symbol not found error', t => { + const input = ` + #start_symbol P; + S -> a D; + S -> ; + D -> b; + `; + const f = () => parser.parseString(input); + t.throws(f, errors.StartSymbolNotFound); }); \ No newline at end of file