r1960 + more work done, wrt syntax analysis in particular
[racktables] / inc / code.php
CommitLineData
f63072f5 1<?php
a1b88eca
DO
2/*
3 * This file implement lexical scanner and syntax analyzer for the RackCode
4 * access configuration language.
5 *
6 * The language consists of the following lexems:
7 *
8 * LEX_LBRACE
9 * LEX_RBRACE
10 * LEX_DECISION
11 * LEX_DEFINE
12 * LEX_BOOLCONST
13 * LEX_NOT
14 * LEX_TAG
15 * LEX_PREDICATE
16 * LEX_BOOLOP
17 *
18 * Comments last from the first # to the end of the line and are filtered out
19 * automatically.
20 */
f63072f5
DO
21
22// Complain about martian char.
23function abortLex1 ($state, $text, $pos)
24{
25 echo "Error! Could not parse the following text (current state is '${state}'): ";
26 echo substr ($text, 0, $pos);
27 echo '<font color = red>-&gt;' . $text{$pos} . '&lt;-</font>';
28 echo substr ($text, $pos + 1);
29 die;
30}
31
32// Complain about martian keyword.
33function abortLex2 ($state, $word)
34{
a1b88eca 35 echo "Error! Could not parse word (current state is '${state}'): '${word}'.";
f63072f5
DO
36 die;
37}
38
a1b88eca
DO
39// Complain about wrong FSM state.
40function abortLex3 ($state)
41{
42 echo "Error! Lexical scanner final state is still '${state}' after scanning the last char.";
43 die;
44}
45
46function abortSynt ($lexname)
47{
48 echo "Error! Unknown lexeme '${lexname}'.";
49 die;
50}
51
52// Produce a list of lexems from the given text. Possible lexems are:
53function getLexemsFromRackCode ($text)
f63072f5
DO
54{
55 $ret = array();
56 $textlen = strlen ($text);
57 $state = "ESOTSM";
58 for ($i = 0; $i < $textlen; $i++) :
59 $char = $text{$i};
60 $newstate = $state;
61 switch ($state) :
62 case 'ESOTSM':
63 switch (TRUE)
64 {
f63072f5 65 case ($char == '('):
a1b88eca 66 $ret[] = array ('type' => 'LEX_LBRACE');
f63072f5
DO
67 break;
68 case ($char == ')'):
a1b88eca 69 $ret[] = array ('type' => 'LEX_RBRACE');
f63072f5
DO
70 break;
71 case ($char == '#'):
72 $newstate = 'skipping comment';
73 break;
74 case (preg_match ('/^[a-zA-Z]$/', $char)):
75 $newstate = 'reading word';
76 $buffer = $char;
77 break;
78 case (preg_match ('/^[ \t\n]$/', $char)):
79 // nom-nom...
80 break;
81 case ($char == '{'):
82 $newstate = 'reading tag 1';
83 break;
84 case ($char == '['):
85 $newstate = 'reading predicate 1';
86 break;
87 default:
88 abortLex1 ($state, $text, $i);
89 }
90 break;
91 case 'reading word':
92 switch (TRUE)
93 {
94 case (preg_match ('/^[a-zA-Z]$/', $char)):
95 $buffer .= $char;
96 break;
97 case (preg_match ('/^[ \t\n]$/', $char)):
98 // got a word, sort it out
99 switch ($buffer)
100 {
101 case 'allow':
f63072f5 102 case 'deny':
a1b88eca 103 $ret[] = array ('type' => 'LEX_DECISION', 'load' => $buffer);
f63072f5
DO
104 break;
105 case 'define':
a1b88eca 106 $ret[] = array ('type' => 'LEX_DEFINE');
f63072f5
DO
107 break;
108 case 'and':
f63072f5 109 case 'or':
a1b88eca
DO
110 $ret[] = array ('type' => 'LEX_BOOLOP', 'load' => $buffer);
111 break;
112 case 'not':
113 $ret[] = array ('type' => 'LEX_NOT');
114 break;
115 case 'false':
116 case 'true':
117 $ret[] = array ('type' => 'LEX_BOOLCONST', 'load' => $buffer);
f63072f5
DO
118 break;
119 default:
120 abortLex2 ($state, $buffer);
121 }
122 $newstate = 'ESOTSM';
123 break;
124 default:
125 abortLex1 ($state, $text, $i);
126 }
127 break;
128 case 'reading tag 1':
129 switch (TRUE)
130 {
131 case (preg_match ('/^[ \t\n]$/', $char)):
132 // nom-nom...
133 break;
134 case (preg_match ('/^[a-zA-Z\$]$/', $char)):
135 $buffer = $char;
136 $newstate = 'reading tag 2';
137 break;
138 default:
139 abortLex1 ($state, $text, $i);
140 }
141 break;
142 case 'reading tag 2':
143 switch (TRUE)
144 {
145 case ($char == '}'):
a1b88eca 146 $ret[] = array ('type' => 'LEX_TAG', 'load' => rtrim ($buffer));
f63072f5
DO
147 $newstate = 'ESOTSM';
148 break;
149 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
150 $buffer .= $char;
151 break;
152 default:
153 abortLex1 ($state, $text, $i);
154 }
155 break;
156 case 'reading predicate 1':
157 switch (TRUE)
158 {
159 case (preg_match ('/^[ \t\n]$/', $char)):
160 // nom-nom...
161 break;
162 case (preg_match ('/^[a-zA-Z]$/', $char)):
163 $buffer = $char;
164 $newstate = 'reading predicate 2';
165 break;
166 default:
167 abortLex1 ($state, $text, $i);
168 }
169 break;
170 case 'reading predicate 2':
171 switch (TRUE)
172 {
173 case ($char == ']'):
a1b88eca 174 $ret[] = array ('type' => 'LEX_PREDICATE', 'load' => rtrim ($buffer));
f63072f5
DO
175 $newstate = 'ESOTSM';
176 break;
177 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
178 $buffer .= $char;
179 break;
180 default:
181 abortLex1 ($state, $text, $i);
182 }
183 break;
184 case 'skipping comment':
185 switch ($text{$i})
186 {
187 case "\n":
188 $newstate = 'ESOTSM';
189 default: // eat char, nom-nom...
190 break;
191 }
192 break;
193 default:
194 die (__FUNCTION__ . "(): internal error, state == ${state}");
195 endswitch;
196 $state = $newstate;
197 endfor;
a1b88eca
DO
198 if ($state != 'ESOTSM')
199 abortLex3 ($state);
f63072f5
DO
200 return $ret;
201}
202
a1b88eca
DO
203function syntReduce_BOOLCONST (&$stack)
204{
205 if (count ($stack) < 1)
206 return FALSE;
207 $top = array_pop ($stack);
208 if ($top['type'] == 'LEX_BOOLCONST')
209 {
210 $s = array
211 (
212 'type' => 'SYNT_EXPRESSION',
213 'kids' => array ($top)
214 );
215 array_push ($stack, $s);
216 return TRUE;
217 }
218 // No luck, push it back.
219 array_push ($stack, $top);
220 return FALSE;
221}
222
223// Parse the given lexems stream into a list of RackCode sentences. Each such
224// sentence is a syntax tree, suitable for tag sequence evaluation. It may
225// contain all of the lexems plus the following syntax contructs:
226// SYNT_NOT
227// SYNT_EXPRESSION
228// SYNT_DEFINITION
229// SYNT_GRANT
230// SYNT_SENTENCE
231// SYNT_CODE
232function getSentencesFromLexems ($lexems)
233{
234 $ret = array(); // This is going to be the tree.
235 $stack = array(); // subject to array_push() and array_pop()
236 $done = 0; // $lexems[$done] is the next item in the tape
237 $todo = count ($lexems);
238
239 // Perform shift-reduce parsing.
240 while ($done < $todo)
241 {
242 $stacktop = $stacksecondtop = $stackthirdtop = array ('type' => 'null');
243 $stacksize = count ($stack);
244 if ($stacksize >= 1)
245 {
246 $stacktop = array_pop ($stack);
247 // It is possible to run into a S/R conflict, when having a syntaxically
248 // correct sentence base on the stack and some "and {something}" items
249 // on the input tape, hence let's detect this specific case and insist
250 // on "shift" action to make EXPR parsing hungry one.
251 if ($stacktop['type'] == 'SYNT_EXPR' and $lexems[$done]['type'] == 'LEX_BOOLOP')
252 {
253 // shift!
254 array_push ($stack, $stacktop);
255 array_push ($stack, $lexems[$done++]);
256 continue;
257 }
258 if ($stacksize >= 2)
259 {
260 $stacksecondtop = array_pop ($stack);
261 if ($stacksize >= 3)
262 {
263 $stackthirdtop = array_pop ($stack);
264 array_push ($stack, $stackthirdtop);
265 }
266 array_push ($stack, $stacksecondtop);
267 }
268 array_push ($stack, $stacktop);
269 // Try "replace" action only on a non-empty stack.
270 // If a handle is found for reversing a production rule, do it and start a new
271 // cycle instead of advancing further on rule list. This will preserve rule priority
272 // in the grammar and keep us from an extra shift action.
273 if
274 (
275 $stacktop['type'] == 'LEX_BOOLCONST' or
276 $stacktop['type'] == 'LEX_TAG' or
277 $stacktop['type'] == 'LEX_PREDICATE'
278 )
279 {
280 array_pop ($stack);
281 array_push
282 (
283 $stack,
284 array
285 (
286 'type' => 'SYNT_EXPR',
287 'load' => $stacktop
288 )
289 );
290 continue;
291 }
292 if
293 (
294 $stacktop['type'] == 'SYNT_EXPR' and
295 $stacksecondtop['type'] == 'LEX_NOT'
296 )
297 {
298 array_pop ($stack);
299 array_pop ($stack);
300 array_push
301 (
302 $stack,
303 array
304 (
305 'type' => 'SYNT_EXPR',
306 'load' => array
307 (
308 'type' => 'SYNT_NOTEXPR',
309 'arg' => $stacktop
310 )
311 )
312 );
313 continue;
314 }
315 if
316 (
317 $stacktop['type'] == 'LEX_RBRACE' and
318 $stacksecondtop['type'] == 'SYNT_EXPR' and
319 $stackthirdtop['type'] == 'LEX_LBRACE'
320 )
321 {
322 array_pop ($stack);
323 array_pop ($stack);
324 array_pop ($stack);
325 array_push
326 (
327 $stack,
328 $stacksecondtop
329 );
330 continue;
331 }
332 if
333 (
334 $stacktop['type'] == 'SYNT_EXPR' and
335 $stacksecondtop['type'] == 'LEX_BOOLOP' and
336 $stackthirdtop['type'] == 'SYNT_EXPR'
337 )
338 {
339 array_pop ($stack);
340 array_pop ($stack);
341 array_pop ($stack);
342 array_push
343 (
344 $stack,
345 array
346 (
347 'type' => 'SYNT_EXPR',
348 'load' => array
349 (
350 'type' => 'SYNT_BOOLOP',
351 'subtype' => $stacksecondtop['load'],
352 'left' => $stackthirdtop,
353 'right' => $stacktop
354 )
355 )
356 );
357 continue;
358 }
359 }
360 // We are here because of either an empty stack or none reduction succeeded. Shift!
361 array_push ($stack, $lexems[$done++]);
362 }
363 return $stack;
364}
365
f63072f5 366?>