AAda Compiler Token Package

8
Jan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
-- package reading the source file and transforming it in a stream of tokens
with aada_character_categories;
with aada_compiler_ucs4_character;
with aada_compiler_error_package;
with aada_compiler_token_position;
with aada_compiler_character_package;
with aada_text_input_package;
with aada_vstrings;
 
package aada_compiler_token_package is
  -- error when the unget() function is called too many times
  -- (it should never occur)
  unget_stack_is_full_error: exception;
  -- a literal is more than 4096 characters
  literal_too_long_error: exception;
  -- the set_token_token() function can be used for only a very small number of tokens
  -- any other changes will raise this exception
  token_change_not_allowed: exception;
  -- look ahead buffer is full
  token_look_ahead_buffer_full: exception;
 
  -- all the ada tokens we support
  type ada_token is (
    -- special tokens
    token_unknown, -- use to mark a token that was not yet typed
    token_nil, -- useful to allow adjustment of the token start position
    token_eof, -- no more data available
    token_list, -- a list expression (a => b, ...)
 
    -- identifier and literals
    token_identifier, -- identifier that is not a reserved word
    token_integer,    -- a literal integer number
    token_float,      -- a literal floating point number
    token_character,  -- a literal character
    token_string,     -- a literal string
 
    -- separators and delimiters (other than spaces)
    token_concatenate,       -- &
    token_apostrophe,        -- '
    token_open_parenthesis,  -- (
    token_close_parenthesis, -- )
    token_multiply,          -- *
    token_plus,              -- +
    token_comma,             -- ,
    token_minus,             -- -
    token_period,            -- .
    token_divide,            -- /
    token_colon,             -- :
    token_semi_colon,        -- ;
    token_less,              -- <
    token_equal,             -- =
    token_greater,           -- >
    token_choice,            -- |
    token_arrow,             -- =>
    token_double_dot,        -- ..
    token_exponential,       -- **
    token_assignment,        -- :=
    token_inequality,        -- /=
    token_greater_equal,     -- >=
    token_less_equal,        -- <=
    token_left_label,        -- <<
    token_right_label,       -- >>
    token_box,               -- <>
 
    -- reserved words, special identifiers
    token_abort,
    token_abs,
    token_abstract,
    token_accept,
    token_access,
    token_aliased,
    token_all,
    token_and,
    token_array,
    token_at,
    token_begin,
    token_body,
    token_case,
    token_constant,
    token_declare,
    token_delay,
    token_delta,
    token_digits,
    token_do,
    token_else,
    token_elsif,
    token_end,
    token_entry,
    token_exception,
    token_exit,
    token_for,
    token_function,
    token_generic,
    token_goto,
    token_if,
    token_in,
    token_interface,
    token_is,
    token_limited,
    token_loop,
    token_mod,
    token_new,
    token_not,
    token_null,
    token_of,
    token_or,
    token_others,
    token_out,
    token_overriding,
    token_package,
    token_pragma,
    token_private,
    token_procedure,
    token_protected,
    token_raise,
    token_range,
    token_record,
    token_rem,
    token_renames,
    token_requeue,
    token_return,
    token_reverse,
    token_select,
    token_separate,
    token_subtype,
    token_synchronized,
    token_tagged,
    token_task,
    token_terminate,
    token_then,
    token_type,
    token_until,
    token_use,
    token_when,
    token_while,
    token_with,
    token_xor,
 
    -- renaming of tokens and multi-tokens that the parser shrinks
    token_and_then,
    token_call,
    token_identity,  -- (+expr)
    token_negate,    -- (-expr)
    token_not_in,
    token_null_record,
    token_or_else,
    token_qualify
  );
 
  -- limit copies of the token input since it includes file
  type compiler_token_input_type is limited private;
 
  type compiler_token_type is private;
 
  -- we accept (not by default) extended based numerals
  -- (i.e. base 2 to 36 to including all the Latin letters without accents)
  type numeral_base_type is range 2 .. 36;
 
  -- open the text file to tokenize
  procedure open(f: in out compiler_token_input_type;
                 name: in string);
 
  -- close this text file
  procedure close(f: in out compiler_token_input_type);
 
  -- check whether this text file is open
  function is_open(f: in compiler_token_input_type) return boolean;
 
  -- retrieve the filename of this text file (may not be available when closed)
  function name(f: in compiler_token_input_type) return string;
 
  -- get the next token from the text file
  procedure next_token(f: in out compiler_token_input_type;
                       t: out compiler_token_type);
  procedure unget_token(f: in out compiler_token_input_type;
                        t: in compiler_token_type);
 
  function get_token_token(t: in compiler_token_type) return ada_token;
  procedure set_token_token(t: in out compiler_token_type; token: in ada_token);
  function get_token_position(t: in compiler_token_type)
                      return aada_compiler_token_position.token_position_type;
  procedure get_token_operator(t: in out compiler_token_type;
                               token: out ada_token);
  function get_token_literal(t: in compiler_token_type)
                      return aada_vstrings.vstring;
  function get_token_uppercase(t: in compiler_token_type)
                      return aada_vstrings.vstring;
 
private
  type compiler_literal_index is range 0 .. 4096;
 
  type compiler_token_type is
    record
      -- exact position of this token in the source file
      position: aada_compiler_token_position.token_position_type;
 
      -- the corresponding token
      token: ada_token;
      -- set when "token = token_string" (i.e. "mod", "+", etc.)
      -- we decide later whether it is indeed an operator or not
      operator: ada_token;
 
      -- the literal as we read it from the input file
      literal: aada_vstrings.vstring;
      -- the literal in uppercase
      uppercase: aada_vstrings.vstring;
    end record;
 
  type compiler_token_character is
    record
      position: aada_compiler_token_position.token_position_type;
      c: aada_compiler_character_package.compiler_character;
    end record;
 
  -- our stack should be a generic package!
  -- define a stack of characters with their position so we can get/unget
  -- and still have the correct position for each character
  type stack_pointer_type is range 0 .. 20;
  subtype stack_pointer_index is stack_pointer_type
                range stack_pointer_type'first + 1 .. stack_pointer_type'last;
  type stack_array is
                array(stack_pointer_index'range) of compiler_token_character;
 
  type compiler_token_character_stack is
    record
      pointer: stack_pointer_type;
      stack: stack_array;
    end record;
 
  type compiler_token_input_type is
    record
      -- AAda Compiler Error Handler
      compiler_error: aada_compiler_error_package.aada_compiler_error;
 
      -- the input file where we read the characters from
      file_input: aada_text_input_package.text_input_type;
 
      -- when checking for 16#0D# 16#0A# we use this buffer if the
      -- expected 16#0A# was not that character; since we will never
      -- 16#0A# in that variable we use 16#0A# as the default value
      unget: aada_compiler_ucs4_character.ucs4_character := 16#0A#;
 
      -- information about our current position
      current_position: aada_compiler_token_position.token_position_type;
 
      -- change the following variables with:
      -- pragma page_sizes(width, height, horizontal_tab, vertical_tab);
      --
      -- page dimensions
      page_sizes: aada_compiler_token_position.page_sizes_type;
 
      -- maximum numeral base, change with "pragma numeral_base(max-base);"
      numeral_base_upper_limit: numeral_base_type := 16;
 
      -- the current token
      token: compiler_token_type;
 
      -- for literals, the actual text of such (size = 0 => no literal)
      -- this is used to build the literal that later is copied in the
      -- token literal variable which has a variable size
      literal_size: compiler_literal_index := 0;
      literal: aada_compiler_character_package.compiler_character_string
                                    (1 .. integer(compiler_literal_index'last));
 
      -- stack of characters that we get/unget
      -- these include their position information
      stack: compiler_token_character_stack;
 
      -- when a function has to look ahead, it may call
      -- unget_token() in which case it gets saved here
      look_ahead: compiler_token_type;
    end record;
 
end aada_compiler_token_package;
 
-- vim: ts=2 sw=2 et syntax=ada
Project aada v1.0-338 (Project id #3)
Process Done (Last compiled on 2012/01/13 01:21:26)
Description Alexis Ada Compiler written in Ada (my first attempt was in C++ which is not correct for an Ada compiler.)