forked from ocaml/ocaml
-
Notifications
You must be signed in to change notification settings - Fork 1
/
genlex.ml
201 lines (192 loc) · 7.95 KB
/
genlex.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
type token =
Kwd of string
| Ident of string
| Int of int
| Float of float
| String of string
| Char of char
(* The string buffering machinery *)
let initial_buffer = Bytes.create 32
let buffer = ref initial_buffer
let bufpos = ref 0
let reset_buffer () = buffer := initial_buffer; bufpos := 0
let store c =
if !bufpos >= Bytes.length !buffer then begin
let newbuffer = Bytes.create (2 * !bufpos) in
Bytes.blit !buffer 0 newbuffer 0 !bufpos;
buffer := newbuffer
end;
Bytes.set !buffer !bufpos c;
incr bufpos
let get_string () =
let s = Bytes.sub_string !buffer 0 !bufpos in buffer := initial_buffer; s
(* The lexer *)
let make_lexer keywords =
let kwd_table = Hashtbl.create 17 in
List.iter (fun s -> Hashtbl.add kwd_table s (Kwd s)) keywords;
let ident_or_keyword id =
try Hashtbl.find kwd_table id with
Not_found -> Ident id
and keyword_or_error c =
let s = String.make 1 c in
try Hashtbl.find kwd_table s with
Not_found -> raise (Stream.Error ("Illegal character " ^ s))
in
let rec next_token (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some (' ' | '\010' | '\013' | '\009' | '\026' | '\012') ->
Stream.junk strm__; next_token strm__
| Some ('A'..'Z' | 'a'..'z' | '_' | '\192'..'\255' as c) ->
Stream.junk strm__;
let s = strm__ in reset_buffer (); store c; ident s
| Some
('!' | '%' | '&' | '$' | '#' | '+' | '/' | ':' | '<' | '=' | '>' |
'?' | '@' | '\\' | '~' | '^' | '|' | '*' as c) ->
Stream.junk strm__;
let s = strm__ in reset_buffer (); store c; ident2 s
| Some ('0'..'9' as c) ->
Stream.junk strm__;
let s = strm__ in reset_buffer (); store c; number s
| Some '\'' ->
Stream.junk strm__;
let c =
try char strm__ with
Stream.Failure -> raise (Stream.Error "")
in
begin match Stream.peek strm__ with
Some '\'' -> Stream.junk strm__; Some (Char c)
| _ -> raise (Stream.Error "")
end
| Some '\"' ->
Stream.junk strm__;
let s = strm__ in reset_buffer (); Some (String (string s))
| Some '-' -> Stream.junk strm__; neg_number strm__
| Some '(' -> Stream.junk strm__; maybe_comment strm__
| Some c -> Stream.junk strm__; Some (keyword_or_error c)
| _ -> None
and ident (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some
('A'..'Z' | 'a'..'z' | '\192'..'\255' | '0'..'9' | '_' | '\'' as c) ->
Stream.junk strm__; let s = strm__ in store c; ident s
| _ -> Some (ident_or_keyword (get_string ()))
and ident2 (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some
('!' | '%' | '&' | '$' | '#' | '+' | '-' | '/' | ':' | '<' | '=' |
'>' | '?' | '@' | '\\' | '~' | '^' | '|' | '*' as c) ->
Stream.junk strm__; let s = strm__ in store c; ident2 s
| _ -> Some (ident_or_keyword (get_string ()))
and neg_number (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some ('0'..'9' as c) ->
Stream.junk strm__;
let s = strm__ in reset_buffer (); store '-'; store c; number s
| _ -> let s = strm__ in reset_buffer (); store '-'; ident2 s
and number (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some ('0'..'9' as c) ->
Stream.junk strm__; let s = strm__ in store c; number s
| Some '.' ->
Stream.junk strm__; let s = strm__ in store '.'; decimal_part s
| Some ('e' | 'E') ->
Stream.junk strm__; let s = strm__ in store 'E'; exponent_part s
| _ -> Some (Int (int_of_string (get_string ())))
and decimal_part (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some ('0'..'9' as c) ->
Stream.junk strm__; let s = strm__ in store c; decimal_part s
| Some ('e' | 'E') ->
Stream.junk strm__; let s = strm__ in store 'E'; exponent_part s
| _ -> Some (Float (float_of_string (get_string ())))
and exponent_part (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some ('+' | '-' as c) ->
Stream.junk strm__; let s = strm__ in store c; end_exponent_part s
| _ -> end_exponent_part strm__
and end_exponent_part (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some ('0'..'9' as c) ->
Stream.junk strm__; let s = strm__ in store c; end_exponent_part s
| _ -> Some (Float (float_of_string (get_string ())))
and string (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some '\"' -> Stream.junk strm__; get_string ()
| Some '\\' ->
Stream.junk strm__;
let c =
try escape strm__ with
Stream.Failure -> raise (Stream.Error "")
in
let s = strm__ in store c; string s
| Some c -> Stream.junk strm__; let s = strm__ in store c; string s
| _ -> raise Stream.Failure
and char (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some '\\' ->
Stream.junk strm__;
begin try escape strm__ with
Stream.Failure -> raise (Stream.Error "")
end
| Some c -> Stream.junk strm__; c
| _ -> raise Stream.Failure
and escape (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some 'n' -> Stream.junk strm__; '\n'
| Some 'r' -> Stream.junk strm__; '\r'
| Some 't' -> Stream.junk strm__; '\t'
| Some ('0'..'9' as c1) ->
Stream.junk strm__;
begin match Stream.peek strm__ with
Some ('0'..'9' as c2) ->
Stream.junk strm__;
begin match Stream.peek strm__ with
Some ('0'..'9' as c3) ->
Stream.junk strm__;
Char.chr
((Char.code c1 - 48) * 100 + (Char.code c2 - 48) * 10 +
(Char.code c3 - 48))
| _ -> raise (Stream.Error "")
end
| _ -> raise (Stream.Error "")
end
| Some c -> Stream.junk strm__; c
| _ -> raise Stream.Failure
and maybe_comment (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some '*' ->
Stream.junk strm__; let s = strm__ in comment s; next_token s
| _ -> Some (keyword_or_error '(')
and comment (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some '(' -> Stream.junk strm__; maybe_nested_comment strm__
| Some '*' -> Stream.junk strm__; maybe_end_comment strm__
| Some _ -> Stream.junk strm__; comment strm__
| _ -> raise Stream.Failure
and maybe_nested_comment (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some '*' -> Stream.junk strm__; let s = strm__ in comment s; comment s
| Some _ -> Stream.junk strm__; comment strm__
| _ -> raise Stream.Failure
and maybe_end_comment (strm__ : _ Stream.t) =
match Stream.peek strm__ with
Some ')' -> Stream.junk strm__; ()
| Some '*' -> Stream.junk strm__; maybe_end_comment strm__
| Some _ -> Stream.junk strm__; comment strm__
| _ -> raise Stream.Failure
in
fun input -> Stream.from (fun _count -> next_token input)