1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
| -- package reading the source file and transforming it in a stream of tokens
with aada_character_categories;
with aada_compiler_ucs4_character;
with aada_compiler_error_package;
with aada_compiler_token_position;
with aada_compiler_character_package;
with aada_text_input_package;
with aada_vstrings;
package aada_compiler_token_package is
-- error when the unget() function is called too many times
-- (it should never occur)
unget_stack_is_full_error: exception;
-- a literal is more than 4096 characters
literal_too_long_error: exception;
-- the set_token_token() function can be used for only a very small number of tokens
-- any other changes will raise this exception
token_change_not_allowed: exception;
-- look ahead buffer is full
token_look_ahead_buffer_full: exception;
-- all the ada tokens we support
type ada_token is (
-- special tokens
token_unknown, -- use to mark a token that was not yet typed
token_nil, -- useful to allow adjustment of the token start position
token_eof, -- no more data available
token_list, -- a list expression (a => b, ...)
-- identifier and literals
token_identifier, -- identifier that is not a reserved word
token_integer, -- a literal integer number
token_float, -- a literal floating point number
token_character, -- a literal character
token_string, -- a literal string
-- separators and delimiters (other than spaces)
token_concatenate, -- &
token_apostrophe, -- '
token_open_parenthesis, -- (
token_close_parenthesis, -- )
token_multiply, -- *
token_plus, -- +
token_comma, -- ,
token_minus, -- -
token_period, -- .
token_divide, -- /
token_colon, -- :
token_semi_colon, -- ;
token_less, -- <
token_equal, -- =
token_greater, -- >
token_choice, -- |
token_arrow, -- =>
token_double_dot, -- ..
token_exponential, -- **
token_assignment, -- :=
token_inequality, -- /=
token_greater_equal, -- >=
token_less_equal, -- <=
token_left_label, -- <<
token_right_label, -- >>
token_box, -- <>
-- reserved words, special identifiers
token_abort,
token_abs,
token_abstract,
token_accept,
token_access,
token_aliased,
token_all,
token_and,
token_array,
token_at,
token_begin,
token_body,
token_case,
token_constant,
token_declare,
token_delay,
token_delta,
token_digits,
token_do,
token_else,
token_elsif,
token_end,
token_entry,
token_exception,
token_exit,
token_for,
token_function,
token_generic,
token_goto,
token_if,
token_in,
token_interface,
token_is,
token_limited,
token_loop,
token_mod,
token_new,
token_not,
token_null,
token_of,
token_or,
token_others,
token_out,
token_overriding,
token_package,
token_pragma,
token_private,
token_procedure,
token_protected,
token_raise,
token_range,
token_record,
token_rem,
token_renames,
token_requeue,
token_return,
token_reverse,
token_select,
token_separate,
token_subtype,
token_synchronized,
token_tagged,
token_task,
token_terminate,
token_then,
token_type,
token_until,
token_use,
token_when,
token_while,
token_with,
token_xor,
-- renaming of tokens and multi-tokens that the parser shrinks
token_and_then,
token_call,
token_identity, -- (+expr)
token_negate, -- (-expr)
token_not_in,
token_null_record,
token_or_else,
token_qualify
);
-- limit copies of the token input since it includes file
type compiler_token_input_type is limited private;
type compiler_token_type is private;
-- we accept (not by default) extended based numerals
-- (i.e. base 2 to 36 to including all the Latin letters without accents)
type numeral_base_type is range 2 .. 36;
-- open the text file to tokenize
procedure open(f: in out compiler_token_input_type;
name: in string);
-- close this text file
procedure close(f: in out compiler_token_input_type);
-- check whether this text file is open
function is_open(f: in compiler_token_input_type) return boolean;
-- retrieve the filename of this text file (may not be available when closed)
function name(f: in compiler_token_input_type) return string;
-- get the next token from the text file
procedure next_token(f: in out compiler_token_input_type;
t: out compiler_token_type);
procedure unget_token(f: in out compiler_token_input_type;
t: in compiler_token_type);
function get_token_token(t: in compiler_token_type) return ada_token;
procedure set_token_token(t: in out compiler_token_type; token: in ada_token);
function get_token_position(t: in compiler_token_type)
return aada_compiler_token_position.token_position_type;
procedure get_token_operator(t: in out compiler_token_type;
token: out ada_token);
function get_token_literal(t: in compiler_token_type)
return aada_vstrings.vstring;
function get_token_uppercase(t: in compiler_token_type)
return aada_vstrings.vstring;
private
type compiler_literal_index is range 0 .. 4096;
type compiler_token_type is
record
-- exact position of this token in the source file
position: aada_compiler_token_position.token_position_type;
-- the corresponding token
token: ada_token;
-- set when "token = token_string" (i.e. "mod", "+", etc.)
-- we decide later whether it is indeed an operator or not
operator: ada_token;
-- the literal as we read it from the input file
literal: aada_vstrings.vstring;
-- the literal in uppercase
uppercase: aada_vstrings.vstring;
end record;
type compiler_token_character is
record
position: aada_compiler_token_position.token_position_type;
c: aada_compiler_character_package.compiler_character;
end record;
-- our stack should be a generic package!
-- define a stack of characters with their position so we can get/unget
-- and still have the correct position for each character
type stack_pointer_type is range 0 .. 20;
subtype stack_pointer_index is stack_pointer_type
range stack_pointer_type'first + 1 .. stack_pointer_type'last;
type stack_array is
array(stack_pointer_index'range) of compiler_token_character;
type compiler_token_character_stack is
record
pointer: stack_pointer_type;
stack: stack_array;
end record;
type compiler_token_input_type is
record
-- AAda Compiler Error Handler
compiler_error: aada_compiler_error_package.aada_compiler_error;
-- the input file where we read the characters from
file_input: aada_text_input_package.text_input_type;
-- when checking for 16#0D# 16#0A# we use this buffer if the
-- expected 16#0A# was not that character; since we will never
-- 16#0A# in that variable we use 16#0A# as the default value
unget: aada_compiler_ucs4_character.ucs4_character := 16#0A#;
-- information about our current position
current_position: aada_compiler_token_position.token_position_type;
-- change the following variables with:
-- pragma page_sizes(width, height, horizontal_tab, vertical_tab);
--
-- page dimensions
page_sizes: aada_compiler_token_position.page_sizes_type;
-- maximum numeral base, change with "pragma numeral_base(max-base);"
numeral_base_upper_limit: numeral_base_type := 16;
-- the current token
token: compiler_token_type;
-- for literals, the actual text of such (size = 0 => no literal)
-- this is used to build the literal that later is copied in the
-- token literal variable which has a variable size
literal_size: compiler_literal_index := 0;
literal: aada_compiler_character_package.compiler_character_string
(1 .. integer(compiler_literal_index'last));
-- stack of characters that we get/unget
-- these include their position information
stack: compiler_token_character_stack;
-- when a function has to look ahead, it may call
-- unget_token() in which case it gets saved here
look_ahead: compiler_token_type;
end record;
end aada_compiler_token_package;
-- vim: ts=2 sw=2 et syntax=ada |