-
Notifications
You must be signed in to change notification settings - Fork 0
/
Program.cs
183 lines (159 loc) · 5.53 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
using System;
using System.Diagnostics;
using System.Text;
using System.Collections.Generic;
namespace LexicalAnalysis
{
delegate bool DgCharacterPredicate(char c);
//let's just make an implemetation that wraps a delegate,
//would seem to make things easy
class DelegateBasedCharPredicate : ICharacterPredicate {
private readonly DgCharacterPredicate pred;
public DelegateBasedCharPredicate(DgCharacterPredicate pred) {
this.pred = pred;
}
public bool IsTrueOf(char c) {
return pred.Invoke (c);
}
}
//suppose our text is actually represented by
//a sequence of things like this...
struct CharIntPair
{
public readonly char c;
public readonly int i;
public CharIntPair(char c, int i) {
this.c = c;
this.i = i;
}
}
//in order to interface CharIntPair with the lexical
//analyzer, we need an adapter class like this
class CharIntPairLexerAdapter : IHasCharProperty
{
public readonly CharIntPair cip;
public CharIntPairLexerAdapter(CharIntPair cip) {
this.cip = cip;
}
public char GetCharProperty() {
return cip.c;
}
}
class MainClass
{
private static void AppendLine(string line, StringBuilder sb) {
sb.Append (" " + line + "\n");
}
private static ICharacterPredicate Whitespace = new DelegateBasedCharPredicate(delegate(char c) {
return Char.IsWhiteSpace(c);
});
private static ICharacterPredicate AnyChar = new DelegateBasedCharPredicate(delegate(char c) {
return true;
});
private static IRecognizerFSA WhitespaceRecognizer() {
return FSM.ForPredicate(Whitespace).Closure().AsRecognizerFSA();
}
private static ICharacterPredicate NotChar(char c) {
return new DelegateBasedCharPredicate(delegate(char c2) {
return c != c2;
});
}
private static IRecognizerFSA QuotePatternRecognizer(string begin, string end) {
return FSM.ForConstantStringPattern (begin).Concatenation (FSM.ForPredicate(AnyChar).Closure())
.Concatenation(FSM.ForConstantStringPattern(end)).AsMinRecognizerFSA();
}
public static void Main (string[] args)
{
//some text to lex
StringBuilder sb = new StringBuilder ();
AppendLine ("for in while //this is a comment", sb);
AppendLine (" ", sb);
AppendLine ("/*", sb);
AppendLine ("multi", sb);
AppendLine ("line", sb);
AppendLine ("comment", sb);
AppendLine ("*/", sb);
AppendLine ("fofds \"afdsafd\" whdsafdsa", sb);
//let's turn the text into a list of char/int pairs
IList<CharIntPair> cips = new List<CharIntPair> ();
int i = 0;
foreach (char c in sb.ToString()) {
cips.Add(new CharIntPair(c, i));
++i;
}
//to lex, we need a list of token types
IList<TokenType<CharIntPairLexerAdapter>> types =
new List<TokenType<CharIntPairLexerAdapter>>();
//add a token type for a stretch of whitespace
types.Add(new TokenType<CharIntPairLexerAdapter>(
WhitespaceRecognizer(), delegate(IList<CharIntPairLexerAdapter> ciplas) {
//add up numbers associated with objects
int sum = 0;
foreach(CharIntPairLexerAdapter cipla in ciplas) {
sum += cipla.cip.i;
}
Console.WriteLine("Got " + ciplas.Count + " chars of whitespace with sum " + sum);
}
));
//add some keywords
types.Add(new TokenType<CharIntPairLexerAdapter>(
FSM.ForConstantStringPattern("for").AsRecognizerFSA(), delegate(IList<CharIntPairLexerAdapter> ciplas) {
Console.WriteLine("Got for at index " + ciplas[0].cip.i);
}
));
types.Add(new TokenType<CharIntPairLexerAdapter>(
FSM.ForConstantStringPattern("in").AsRecognizerFSA(), delegate(IList<CharIntPairLexerAdapter> ciplas) {
Console.WriteLine("Got in at index " + ciplas[0].cip.i);
}
));
types.Add (new TokenType<CharIntPairLexerAdapter> (
FSM.ForConstantStringPattern("while").AsRecognizerFSA(), delegate(IList<CharIntPairLexerAdapter> ciplas) {
Console.WriteLine ("Got while at index " + ciplas [0].cip.i);
}
));
//add a token type for a single line comment
types.Add(new TokenType<CharIntPairLexerAdapter>(
QuotePatternRecognizer("//", System.Environment.NewLine), delegate(IList<CharIntPairLexerAdapter> ciplas) {
StringBuilder sb2 = new StringBuilder();
foreach (CharIntPairLexerAdapter cipla in ciplas) {
sb2.Append(cipla.cip.c);
}
Console.WriteLine("Got single line comment:");
Console.Write(sb2.ToString());
}
));
//add a token type for a multiline comment
types.Add(new TokenType<CharIntPairLexerAdapter>(
QuotePatternRecognizer("/*", "*/"), delegate(IList<CharIntPairLexerAdapter> ciplas) {
StringBuilder sb2 = new StringBuilder();
foreach (CharIntPairLexerAdapter cipla in ciplas) {
sb2.Append(cipla.cip.c);
}
Console.WriteLine("Got multiline comment:");
Console.WriteLine(sb2.ToString());
}
));
//add a token type for a string literal
types.Add(new TokenType<CharIntPairLexerAdapter>(
QuotePatternRecognizer("\"", "\""), delegate(IList<CharIntPairLexerAdapter> ciplas) {
StringBuilder sb2 = new StringBuilder();
foreach (CharIntPairLexerAdapter cipla in ciplas) {
sb2.Append(cipla.cip.c);
}
Console.WriteLine("Got string literal:");
Console.WriteLine(sb2.ToString());
}
));
LexicalAnalyzer<CharIntPairLexerAdapter> lexer = new LexicalAnalyzer<CharIntPairLexerAdapter>(
types,
delegate(CharIntPairLexerAdapter cipla) {
Console.WriteLine("Got unhandlable char " + cipla.cip.c + " with number " + cipla.cip.i);
}
);
foreach (CharIntPair cip in cips) {
lexer.ProcessItem(new CharIntPairLexerAdapter(cip));
}
lexer.ProcessEOF();
}
}
}