-
Notifications
You must be signed in to change notification settings - Fork 391
/
GrammarNode.java
507 lines (431 loc) · 13.1 KB
/
GrammarNode.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
package play.parser;
import java.util.*;
/**
* 能够表达EBNF的对象。
* 1.每个GrammarNode可以有多个子节点;
* 2.子节点之间可以是And关系,或Or关系,由type属性来确定。
* 3.minTimes和maxTimes属性规定了该节点的重复次数。比如对于+号,minTimes=1,maxTimes=-1,-1代表很多个。
* 4.该节点可以有名称,也就是词法规则和语法规则中左边的部分。如果没有起名称,系统会根据它的父节点的名称生成自己的缺省名称,
* 并且以下划线开头。比如_add_Or_1。
*/
public class GrammarNode {
//子节点
private List<GrammarNode> children = new LinkedList<GrammarNode>();
//节点类型
private GrammarNodeType type = null;
//用于词法规则,指该节点能匹配的字符的集合。其中Charset可以是一个树状结构,由多个子结合构成。比如:[a-z][A-Z][0-9]等。
private CharSet charSet = null;
//改节点可以重复的次数
private int minTimes = 1;
private int maxTimes = 1;
//节点名称,可以作为Token名称或非终结符名称
private String name = null;
//语法规则中的Token,即终结符
private Token token = null;
//是否被词法处理器忽略,比如空白字符
private boolean neglect = false;
public static GrammarNode EPSILON = new GrammarNode(GrammarNodeType.Epsilon);
protected GrammarNode(GrammarNodeType type){
this.type = type;
}
protected GrammarNode(String name, GrammarNodeType type){
this.name = name;
this.type = type;
}
protected GrammarNode(String name, GrammarNodeType type, GrammarNode child, Token token){
this.name = name;
this.type = type;
this.children.add(child);
this.token = token;
}
protected GrammarNode(String name, GrammarNodeType type, List<GrammarNode> children, Token token){
this.name = name;
this.type = type;
this.children.addAll(children);
this.token = token;
}
protected GrammarNode(Token token){
this.type = GrammarNodeType.Token;
this.token = token;
}
protected GrammarNode(CharSet charSet){
this.type = GrammarNodeType.Char;
this.charSet = charSet;
}
protected GrammarNode(String name, CharSet charSet){
this.name = name;
this.type = GrammarNodeType.Char;
this.charSet = charSet;
}
protected GrammarNode createChild(CharSet charSet){
GrammarNode grammarNode = new GrammarNode(charSet);
addChild(grammarNode);
return grammarNode;
}
protected GrammarNode createChild(String name, CharSet charSet){
GrammarNode grammarNode = new GrammarNode(name, charSet);
addChild(grammarNode);
return grammarNode;
}
protected GrammarNode createChild(GrammarNodeType type){
GrammarNode grammarNode = new GrammarNode(type);
addChild(grammarNode);
return grammarNode;
}
protected GrammarNode createChild(String name, GrammarNodeType type){
GrammarNode grammarNode = new GrammarNode(name, type);
addChild(grammarNode);
return grammarNode;
}
protected GrammarNode createChild(Token token){
GrammarNode grammarNode = new GrammarNode(token);
grammarNode.type = GrammarNodeType.Token;
addChild(grammarNode);
return grammarNode;
}
//添加子节点,并创建缺省名称
protected void addChild(GrammarNode child){
children.add(child);
if (child.name == null){
if (child.getGrammarName() != null) {
child.name = "_" + child.getGrammarName() + children.size();
}
else{
child.name = "_" + child.type + children.size();
}
if (this.name!=null){
child.name = this.name + child.name;
}
if (child.name.charAt(0) != '_'){
child.name = "_"+child.name;
}
}
}
/**
* 是否是显式命名的子节点。词法规则中的Token、语法规则中的非终结符,都有名称。
* @return
*/
protected boolean isNamedNode(){
if (name!= null && name.length()> 1 && name.charAt(0) != '_'){
return true;
}
return false;
}
/**
* 子节点列表。只读。
* @return
*/
public List<GrammarNode> children(){
return Collections.unmodifiableList(children);
}
public int getChildCount(){
return children.size();
}
public GrammarNode getChild(int index){
return children.get(index);
}
//节点类型
public GrammarNodeType getType(){ return type; }
public String getName(){
return name;
}
public String getGrammarName(){
if (token!= null){
return token.getType();
}
else if (isNamedNode()){
return name;
}
return null;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
GrammarNode node = (GrammarNode)obj;
//类型相同
if (node.type != type){
return false;
}
//名称相同
if (node.name == null){
if (name != null){
return false;
}
}
else {
if (!node.name.equals(name)){
return false;
}
}
//比较Token
if(type == GrammarNodeType.Token){
return token.equals(node.token);
}
//Epsilon
else if (type == GrammarNodeType.Epsilon){
return true;
}
//比较字符集合
else if (type == GrammarNodeType.Char){
return charSet.equals(node.charSet);
}
//子节点也相同
if(children.size() != node.children.size()){
return false;
}
for (int i = 0; i< children().size(); i++){
if (!children.get(i).equals(node.children.get(i))){
return false;
}
}
return true;
}
/**
* 以比较易读的方式显式。
* @return
*/
@Override
public String toString(){
if (type == GrammarNodeType.Epsilon) return "ε";
String rtn = null;
if (charSet!=null){
rtn = charSet.toString();
}
else if (token != null){
if (token.getText() != null){
rtn = "'" + token.getText() + "'";
}
else {
rtn = token.getType();
}
}
else if (name != null){
rtn = name;
}
else if (type != null){
rtn = type.toString();
}
else{
rtn = "GrammarNode";
}
if (minTimes != 1 || maxTimes !=1){
if (minTimes == 0 && maxTimes == -1){
rtn = rtn + "*";
}
else if (minTimes == 0 && maxTimes == 1){
rtn = rtn + "?";
}
else if (minTimes == 1 && maxTimes == -1){
rtn = rtn + "+";
}
else {
rtn = rtn + "{" + minTimes + "," + maxTimes + "}";
}
}
return rtn;
}
protected void setRepeatTimes(int minTimes, int maxTimes){
this.minTimes = minTimes;
this.maxTimes = maxTimes;
}
/**
* 以文本方式显示Node。显式结果的格式与Antlr的文法格式相同。
* 对于命名的节点,要把它的子节点都显示出来。
* 比如:primary节点的:primary: ID | INT_LITERAL | add
* @return
*/
protected String getText(){
String delim = null;
if (type == GrammarNodeType.And){
delim = " ";
}
else if (type == GrammarNodeType.Or){
delim = " | ";
}
StringBuffer sb = new StringBuffer();
if (children.size()>0){
for (int i = 0; i< children.size(); i++){
if (i > 0){
sb.append(delim);
}
GrammarNode child = children.get(i);
if (child.isNamedNode()) {
sb.append(child.toString());
}
else {
sb.append(child.getText());
}
}
}
else{
sb.append(this.toString());
}
String rtn = sb.toString();
if (isNamedNode()) {
rtn = wrapNamedNode(rtn);
}
else{
if (type == GrammarNodeType.Or){
rtn = "(" + rtn + ")";
}
}
return rtn;
}
private String wrapNamedNode(String str){
if (name != null){
String header = name;
if (name.length() <= 3) header += "\t";
header+=("\t: ");
return header + str + " ;";
}
return str;
}
/**
* 把当前节点及下级节点以树状结构打印出来。
*/
protected void dump(){
if (isGraph(this, new HashSet<GrammarNode>())){
dumpGraph(this, new HashSet<GrammarNode>());
}
else {
dumpTree(this, "");
}
}
/**
* 打印正则表达式的树状结构
* @param node
* @param indent 缩进。每一级增加一个tab。
*/
private static void dumpTree(GrammarNode node, String indent){
if (node.isNamedNode()){
System.out.println(indent + node.getText());
}
else {
System.out.println(indent + node);
}
for (GrammarNode child : node.children){
dumpTree(child, indent+"\t");
}
}
/**
* 打印图。因为存在循环引用,所以不能以树状的方式打印。
* @param node
* @param dumpedNodes
*/
private static void dumpGraph(GrammarNode node, Set<GrammarNode> dumpedNodes){
if (node.isNamedNode()){
System.out.println(node.getText());
}
dumpedNodes.add(node);
for (GrammarNode child : node.children){
if (!dumpedNodes.contains(child)){
dumpGraph(child, dumpedNodes);
}
}
}
/**
* 以某节点作为起始节点,判断是树还是图。
* @param node
* @param scannedNodes
* @return
*/
private static boolean isGraph(GrammarNode node, Set<GrammarNode> scannedNodes){
scannedNodes.add(node);
for (GrammarNode child: node.children){
if (scannedNodes.contains(child)){
return true;
}
}
for (GrammarNode child: node.children){
boolean rtn = isGraph(child, scannedNodes);
if (rtn) {
return true;
}
}
return false;
}
/**
* 该节点是否是可空的,也就是能否返回Epsilon。算法:
* 1.如果是Epsilon节点,或者是 *或 ?,可空。
* 2.And节点:所有子节点都可空;
* 3.Or节点:任意子节点可空。
* 4.其他,不为空。
* @return
*/
public boolean isNullable(){
boolean rtn = false;
if (this.minTimes == 0 || this.type == GrammarNodeType.Epsilon){
return true;
}
else if (this.type == GrammarNodeType.And) {
boolean allNullable = true;
for (GrammarNode child : children){
if (!child.isNullable()){
allNullable = false;
break;
}
}
rtn = allNullable;
}
else if (this.type == GrammarNodeType.Or){
boolean anyNullable = false;
for (GrammarNode child : children){
if (child.isNullable()){
anyNullable = true;
break;
}
}
rtn = anyNullable;
}
return rtn;
}
/**
* 获得以本节点为起始节点,能到达的所有语法节点。
* @return
*/
protected List<GrammarNode> getAllNodes(){
List <GrammarNode> allNodes = new LinkedList<GrammarNode>();
getAllNodes(this,allNodes);
return allNodes;
}
private static void getAllNodes(GrammarNode node, List <GrammarNode> allNodes){
allNodes.add(node);
for (GrammarNode child : node.children){
if(!allNodes.contains(child)){
getAllNodes(child, allNodes);
}
}
}
/**
* 是否是叶子节点
* @return
*/
public boolean isLeaf(){
return children.size() == 0;
}
/**
* 是否代表一个Token
* @return
*/
public boolean isToken(){
return token != null;
}
public CharSet getCharSet() {
return charSet;
}
public int getMinTimes() {
return minTimes;
}
public int getMaxTimes() {
return maxTimes;
}
public Token getToken() {
return token;
}
public boolean isNeglect() {
return neglect;
}
public void setNeglect(boolean neglect){
this.neglect = neglect;
}
}