r1961 + a better implementation of the syntax analyzer
[racktables] / inc / code.php
CommitLineData
f63072f5 1<?php
a1b88eca
DO
2/*
3 * This file implement lexical scanner and syntax analyzer for the RackCode
4 * access configuration language.
5 *
6 * The language consists of the following lexems:
7 *
8 * LEX_LBRACE
9 * LEX_RBRACE
10 * LEX_DECISION
11 * LEX_DEFINE
12 * LEX_BOOLCONST
13 * LEX_NOT
14 * LEX_TAG
15 * LEX_PREDICATE
16 * LEX_BOOLOP
17 *
18 * Comments last from the first # to the end of the line and are filtered out
19 * automatically.
20 */
f63072f5
DO
21
22// Complain about martian char.
23function abortLex1 ($state, $text, $pos)
24{
25 echo "Error! Could not parse the following text (current state is '${state}'): ";
26 echo substr ($text, 0, $pos);
27 echo '<font color = red>-&gt;' . $text{$pos} . '&lt;-</font>';
28 echo substr ($text, $pos + 1);
29 die;
30}
31
32// Complain about martian keyword.
33function abortLex2 ($state, $word)
34{
a1b88eca 35 echo "Error! Could not parse word (current state is '${state}'): '${word}'.";
f63072f5
DO
36 die;
37}
38
a1b88eca
DO
39// Complain about wrong FSM state.
40function abortLex3 ($state)
41{
42 echo "Error! Lexical scanner final state is still '${state}' after scanning the last char.";
43 die;
44}
45
46function abortSynt ($lexname)
47{
48 echo "Error! Unknown lexeme '${lexname}'.";
49 die;
50}
51
52// Produce a list of lexems from the given text. Possible lexems are:
53function getLexemsFromRackCode ($text)
f63072f5
DO
54{
55 $ret = array();
56 $textlen = strlen ($text);
57 $state = "ESOTSM";
58 for ($i = 0; $i < $textlen; $i++) :
59 $char = $text{$i};
60 $newstate = $state;
61 switch ($state) :
62 case 'ESOTSM':
63 switch (TRUE)
64 {
f63072f5 65 case ($char == '('):
a1b88eca 66 $ret[] = array ('type' => 'LEX_LBRACE');
f63072f5
DO
67 break;
68 case ($char == ')'):
a1b88eca 69 $ret[] = array ('type' => 'LEX_RBRACE');
f63072f5
DO
70 break;
71 case ($char == '#'):
72 $newstate = 'skipping comment';
73 break;
74 case (preg_match ('/^[a-zA-Z]$/', $char)):
75 $newstate = 'reading word';
76 $buffer = $char;
77 break;
78 case (preg_match ('/^[ \t\n]$/', $char)):
79 // nom-nom...
80 break;
81 case ($char == '{'):
82 $newstate = 'reading tag 1';
83 break;
84 case ($char == '['):
85 $newstate = 'reading predicate 1';
86 break;
87 default:
88 abortLex1 ($state, $text, $i);
89 }
90 break;
91 case 'reading word':
92 switch (TRUE)
93 {
94 case (preg_match ('/^[a-zA-Z]$/', $char)):
95 $buffer .= $char;
96 break;
97 case (preg_match ('/^[ \t\n]$/', $char)):
98 // got a word, sort it out
99 switch ($buffer)
100 {
101 case 'allow':
f63072f5 102 case 'deny':
a1b88eca 103 $ret[] = array ('type' => 'LEX_DECISION', 'load' => $buffer);
f63072f5
DO
104 break;
105 case 'define':
a1b88eca 106 $ret[] = array ('type' => 'LEX_DEFINE');
f63072f5
DO
107 break;
108 case 'and':
f63072f5 109 case 'or':
a1b88eca
DO
110 $ret[] = array ('type' => 'LEX_BOOLOP', 'load' => $buffer);
111 break;
112 case 'not':
113 $ret[] = array ('type' => 'LEX_NOT');
114 break;
115 case 'false':
116 case 'true':
117 $ret[] = array ('type' => 'LEX_BOOLCONST', 'load' => $buffer);
f63072f5
DO
118 break;
119 default:
120 abortLex2 ($state, $buffer);
121 }
122 $newstate = 'ESOTSM';
123 break;
124 default:
125 abortLex1 ($state, $text, $i);
126 }
127 break;
128 case 'reading tag 1':
129 switch (TRUE)
130 {
131 case (preg_match ('/^[ \t\n]$/', $char)):
132 // nom-nom...
133 break;
134 case (preg_match ('/^[a-zA-Z\$]$/', $char)):
135 $buffer = $char;
136 $newstate = 'reading tag 2';
137 break;
138 default:
139 abortLex1 ($state, $text, $i);
140 }
141 break;
142 case 'reading tag 2':
143 switch (TRUE)
144 {
145 case ($char == '}'):
a1b88eca 146 $ret[] = array ('type' => 'LEX_TAG', 'load' => rtrim ($buffer));
f63072f5
DO
147 $newstate = 'ESOTSM';
148 break;
149 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
150 $buffer .= $char;
151 break;
152 default:
153 abortLex1 ($state, $text, $i);
154 }
155 break;
156 case 'reading predicate 1':
157 switch (TRUE)
158 {
159 case (preg_match ('/^[ \t\n]$/', $char)):
160 // nom-nom...
161 break;
162 case (preg_match ('/^[a-zA-Z]$/', $char)):
163 $buffer = $char;
164 $newstate = 'reading predicate 2';
165 break;
166 default:
167 abortLex1 ($state, $text, $i);
168 }
169 break;
170 case 'reading predicate 2':
171 switch (TRUE)
172 {
173 case ($char == ']'):
a1b88eca 174 $ret[] = array ('type' => 'LEX_PREDICATE', 'load' => rtrim ($buffer));
f63072f5
DO
175 $newstate = 'ESOTSM';
176 break;
177 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
178 $buffer .= $char;
179 break;
180 default:
181 abortLex1 ($state, $text, $i);
182 }
183 break;
184 case 'skipping comment':
185 switch ($text{$i})
186 {
187 case "\n":
188 $newstate = 'ESOTSM';
189 default: // eat char, nom-nom...
190 break;
191 }
192 break;
193 default:
194 die (__FUNCTION__ . "(): internal error, state == ${state}");
195 endswitch;
196 $state = $newstate;
197 endfor;
a1b88eca
DO
198 if ($state != 'ESOTSM')
199 abortLex3 ($state);
f63072f5
DO
200 return $ret;
201}
202
a1b88eca
DO
203function syntReduce_BOOLCONST (&$stack)
204{
205 if (count ($stack) < 1)
206 return FALSE;
207 $top = array_pop ($stack);
208 if ($top['type'] == 'LEX_BOOLCONST')
209 {
210 $s = array
211 (
212 'type' => 'SYNT_EXPRESSION',
213 'kids' => array ($top)
214 );
215 array_push ($stack, $s);
216 return TRUE;
217 }
218 // No luck, push it back.
219 array_push ($stack, $top);
220 return FALSE;
221}
222
223// Parse the given lexems stream into a list of RackCode sentences. Each such
1381b655
DO
224// sentence is a syntax tree, suitable for tag sequence evaluation. It will
225// contain all of the input lexems framed into a parse tree built from the
226// following nodes:
227// SYNT_NOT (1 argument, SYNT_EXPR)
228// SYNT_BOOLOP (2 arguments of type SYNT_EXPR)
229// SYNT_EXPR (1 argument, different types)
230// SYNT_DEF (2 arguments: subject and definition)
231// SYNT_GRANT (2 arguments: decision and condition)
232// SYNT_CODESENTENCE (either a grant or a definition)
233// SYNT_CODETEXT (sequence of sentences)
a1b88eca
DO
234function getSentencesFromLexems ($lexems)
235{
a1b88eca
DO
236 $stack = array(); // subject to array_push() and array_pop()
237 $done = 0; // $lexems[$done] is the next item in the tape
238 $todo = count ($lexems);
239
1381b655
DO
240 // Perform shift-reduce processing. The "accept" actions occurs with an
241 // empty input tape and the stack holding only one symbol (the start
242 // symbol, SYNT_CODETEXT).
243 while (TRUE)
a1b88eca
DO
244 {
245 $stacktop = $stacksecondtop = $stackthirdtop = array ('type' => 'null');
246 $stacksize = count ($stack);
247 if ($stacksize >= 1)
248 {
249 $stacktop = array_pop ($stack);
250 // It is possible to run into a S/R conflict, when having a syntaxically
251 // correct sentence base on the stack and some "and {something}" items
252 // on the input tape, hence let's detect this specific case and insist
253 // on "shift" action to make EXPR parsing hungry one.
1381b655
DO
254 if
255 (
256 $stacktop['type'] == 'SYNT_EXPR' and
257 ($done < $todo) and
258 $lexems[$done]['type'] == 'LEX_BOOLOP'
259 )
a1b88eca
DO
260 {
261 // shift!
1381b655 262 echo 'shi[f]t!]';
a1b88eca
DO
263 array_push ($stack, $stacktop);
264 array_push ($stack, $lexems[$done++]);
265 continue;
266 }
267 if ($stacksize >= 2)
268 {
269 $stacksecondtop = array_pop ($stack);
270 if ($stacksize >= 3)
271 {
272 $stackthirdtop = array_pop ($stack);
273 array_push ($stack, $stackthirdtop);
274 }
275 array_push ($stack, $stacksecondtop);
276 }
277 array_push ($stack, $stacktop);
1381b655
DO
278 // First detect definition start to save the predicate from being reduced into
279 // expression.
280 if
281 (
282 $stacktop['type'] == 'LEX_PREDICATE' and
283 $stacksecondtop['type'] == 'LEX_DEFINE'
284 )
285 {
286 array_pop ($stack);
287 array_pop ($stack);
288 array_push
289 (
290 $stack,
291 array
292 (
293 'type' => 'SYNT_DEFINE',
294 'load' => $stacktop['load']
295 )
296 );
297 continue;
298 }
a1b88eca
DO
299 // Try "replace" action only on a non-empty stack.
300 // If a handle is found for reversing a production rule, do it and start a new
301 // cycle instead of advancing further on rule list. This will preserve rule priority
302 // in the grammar and keep us from an extra shift action.
303 if
304 (
305 $stacktop['type'] == 'LEX_BOOLCONST' or
306 $stacktop['type'] == 'LEX_TAG' or
307 $stacktop['type'] == 'LEX_PREDICATE'
308 )
309 {
1381b655 310 // reduce!
a1b88eca
DO
311 array_pop ($stack);
312 array_push
313 (
314 $stack,
315 array
316 (
317 'type' => 'SYNT_EXPR',
318 'load' => $stacktop
319 )
320 );
321 continue;
322 }
323 if
324 (
325 $stacktop['type'] == 'SYNT_EXPR' and
326 $stacksecondtop['type'] == 'LEX_NOT'
327 )
328 {
1381b655 329 // reduce!
a1b88eca
DO
330 array_pop ($stack);
331 array_pop ($stack);
332 array_push
333 (
334 $stack,
335 array
336 (
337 'type' => 'SYNT_EXPR',
338 'load' => array
339 (
340 'type' => 'SYNT_NOTEXPR',
341 'arg' => $stacktop
342 )
343 )
344 );
345 continue;
346 }
347 if
348 (
349 $stacktop['type'] == 'LEX_RBRACE' and
350 $stacksecondtop['type'] == 'SYNT_EXPR' and
351 $stackthirdtop['type'] == 'LEX_LBRACE'
352 )
353 {
1381b655 354 // reduce!
a1b88eca
DO
355 array_pop ($stack);
356 array_pop ($stack);
357 array_pop ($stack);
358 array_push
359 (
360 $stack,
361 $stacksecondtop
362 );
363 continue;
364 }
365 if
366 (
367 $stacktop['type'] == 'SYNT_EXPR' and
368 $stacksecondtop['type'] == 'LEX_BOOLOP' and
369 $stackthirdtop['type'] == 'SYNT_EXPR'
370 )
371 {
1381b655 372 // reduce!
a1b88eca
DO
373 array_pop ($stack);
374 array_pop ($stack);
375 array_pop ($stack);
376 array_push
377 (
378 $stack,
379 array
380 (
381 'type' => 'SYNT_EXPR',
382 'load' => array
383 (
384 'type' => 'SYNT_BOOLOP',
385 'subtype' => $stacksecondtop['load'],
386 'left' => $stackthirdtop,
387 'right' => $stacktop
388 )
389 )
390 );
391 continue;
392 }
1381b655
DO
393 if
394 (
395 $stacktop['type'] == 'SYNT_EXPR' and
396 $stacksecondtop['type'] == 'LEX_DECISION'
397 )
398 {
399 // reduce!
400 array_pop ($stack);
401 array_pop ($stack);
402 array_push
403 (
404 $stack,
405 array
406 (
407 'type' => 'SYNT_GRANT',
408 'decision' => $stacksecondtop['load'],
409 'condition' => $stacktop
410 )
411 );
412 continue;
413 }
414 if
415 (
416 $stacktop['type'] == 'SYNT_EXPR' and
417 $stacksecondtop['type'] == 'SYNT_DEFINE'
418 )
419 {
420 // reduce!
421 array_pop ($stack);
422 array_pop ($stack);
423 array_push
424 (
425 $stack,
426 array
427 (
428 'type' => 'SYNT_DEFINITION',
429 'term' => $stacksecondtop,
430 'definition' => $stacktop
431 )
432 );
433 continue;
434 }
435 if
436 (
437 $stacktop['type'] == 'SYNT_GRANT' or
438 $stacktop['type'] == 'SYNT_DEFINITION'
439 )
440 {
441 // reduce!
442 array_pop ($stack);
443 array_push
444 (
445 $stack,
446 array
447 (
448 'type' => 'SYNT_CODESENTENCE',
449 'load' => $stacktop
450 )
451 );
452 continue;
453 }
454 if
455 (
456 $stacktop['type'] == 'SYNT_CODESENTENCE' and
457 $stacksecondtop['type'] == 'SYNT_CODETEXT'
458 )
459 {
460 // reduce!
461 array_pop ($stack);
462 array_pop ($stack);
463 $stacksecondtop['load'][] = $stacktop;
464 array_push
465 (
466 $stack,
467 $stacksecondtop
468 );
469 continue;
470 }
471 if
472 (
473 $stacktop['type'] == 'SYNT_CODESENTENCE'
474 )
475 {
476 // reduce!
477 array_pop ($stack);
478 $stacksecondtop['log'][] = $stacktop;
479 array_push
480 (
481 $stack,
482 array
483 (
484 'type' => 'SYNT_CODETEXT',
485 'load' => array ($stacktop)
486 )
487 );
488 continue;
489 }
490 }
491 // The fact we execute here means, that no reduction or early shift
492 // has been done. The only way to enter another iteration is to "shift"
493 // more, if possible. If the tape is empty, we are facing the
494 // "accept"/"reject" dilemma. The only possible way to "accept" is to
495 // have sole starting nonterminal on the stack (SYNT_CODETEXT).
496 if ($done < $todo)
497 {
498 array_push ($stack, $lexems[$done++]);
499 continue;
a1b88eca 500 }
1381b655
DO
501 // The moment of truth.
502 if (count ($stack) == 1 and $stack[0]['type'] == 'SYNT_CODETEXT')
503 return $stack[0];
504 return NULL;
a1b88eca 505 }
a1b88eca
DO
506}
507
f63072f5 508?>