r1965 + more cleanups in syntax analyzer
[racktables] / inc / code.php
1 <?php
2 /*
3 * This file implements lexical scanner and syntax analyzer for the RackCode
4 * access configuration language.
5 *
6 * The language consists of the following lexems:
7 *
8 * LEX_LBRACE
9 * LEX_RBRACE
10 * LEX_DECISION
11 * LEX_DEFINE
12 * LEX_BOOLCONST
13 * LEX_NOT
14 * LEX_TAG
15 * LEX_PREDICATE
16 * LEX_BOOLOP
17 *
18 */
19
20 // Complain about martian char.
21 function abortLex1 ($state, $text, $pos)
22 {
23 echo "Error! Could not parse the following text (current state is '${state}'): ";
24 echo substr ($text, 0, $pos);
25 echo '<font color = red>-&gt;' . $text{$pos} . '&lt;-</font>';
26 echo substr ($text, $pos + 1);
27 die;
28 }
29
30 // Complain about martian keyword.
31 function abortLex2 ($state, $word)
32 {
33 echo "Error! Could not parse word (current state is '${state}'): '${word}'.";
34 die;
35 }
36
37 // Complain about wrong FSM state.
38 function abortLex3 ($state)
39 {
40 echo "Error! Lexical scanner final state is still '${state}' after scanning the last char.";
41 die;
42 }
43
44 function abortSynt ($lexname)
45 {
46 echo "Error! Unknown lexeme '${lexname}'.";
47 die;
48 }
49
50 // Produce a list of lexems from the given text. Possible lexems are:
51 function getLexemsFromRackCode ($text)
52 {
53 $ret = array();
54 $textlen = strlen ($text);
55 $state = "ESOTSM";
56 for ($i = 0; $i < $textlen; $i++) :
57 $char = $text{$i};
58 $newstate = $state;
59 switch ($state) :
60 case 'ESOTSM':
61 switch (TRUE)
62 {
63 case ($char == '('):
64 $ret[] = array ('type' => 'LEX_LBRACE');
65 break;
66 case ($char == ')'):
67 $ret[] = array ('type' => 'LEX_RBRACE');
68 break;
69 case ($char == '#'):
70 $newstate = 'skipping comment';
71 break;
72 case (preg_match ('/^[a-zA-Z]$/', $char)):
73 $newstate = 'reading word';
74 $buffer = $char;
75 break;
76 case (preg_match ('/^[ \t\n]$/', $char)):
77 // nom-nom...
78 break;
79 case ($char == '{'):
80 $newstate = 'reading tag 1';
81 break;
82 case ($char == '['):
83 $newstate = 'reading predicate 1';
84 break;
85 default:
86 abortLex1 ($state, $text, $i);
87 }
88 break;
89 case 'reading word':
90 switch (TRUE)
91 {
92 case (preg_match ('/^[a-zA-Z]$/', $char)):
93 $buffer .= $char;
94 break;
95 case (preg_match ('/^[ \t\n]$/', $char)):
96 // got a word, sort it out
97 switch ($buffer)
98 {
99 case 'allow':
100 case 'deny':
101 $ret[] = array ('type' => 'LEX_DECISION', 'load' => $buffer);
102 break;
103 case 'define':
104 $ret[] = array ('type' => 'LEX_DEFINE');
105 break;
106 case 'and':
107 case 'or':
108 $ret[] = array ('type' => 'LEX_BOOLOP', 'load' => $buffer);
109 break;
110 case 'not':
111 $ret[] = array ('type' => 'LEX_NOT');
112 break;
113 case 'false':
114 case 'true':
115 $ret[] = array ('type' => 'LEX_BOOLCONST', 'load' => $buffer);
116 break;
117 default:
118 abortLex2 ($state, $buffer);
119 }
120 $newstate = 'ESOTSM';
121 break;
122 default:
123 abortLex1 ($state, $text, $i);
124 }
125 break;
126 case 'reading tag 1':
127 switch (TRUE)
128 {
129 case (preg_match ('/^[ \t\n]$/', $char)):
130 // nom-nom...
131 break;
132 case (preg_match ('/^[a-zA-Z\$]$/', $char)):
133 $buffer = $char;
134 $newstate = 'reading tag 2';
135 break;
136 default:
137 abortLex1 ($state, $text, $i);
138 }
139 break;
140 case 'reading tag 2':
141 switch (TRUE)
142 {
143 case ($char == '}'):
144 $buffer = rtrim ($buffer);
145 if (!preg_match ('/^[a-zA-Z0-9]$/', substr ($buffer, -1)))
146 abortLex1 ($state, $text, $i);
147 $ret[] = array ('type' => 'LEX_TAG', 'load' => $buffer);
148 $newstate = 'ESOTSM';
149 break;
150 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
151 $buffer .= $char;
152 break;
153 default:
154 abortLex1 ($state, $text, $i);
155 }
156 break;
157 case 'reading predicate 1':
158 switch (TRUE)
159 {
160 case (preg_match ('/^[ \t\n]$/', $char)):
161 // nom-nom...
162 break;
163 case (preg_match ('/^[a-zA-Z]$/', $char)):
164 $buffer = $char;
165 $newstate = 'reading predicate 2';
166 break;
167 default:
168 abortLex1 ($state, $text, $i);
169 }
170 break;
171 case 'reading predicate 2':
172 switch (TRUE)
173 {
174 case ($char == ']'):
175 $buffer = rtrim ($buffer);
176 if (!preg_match ('/^[a-zA-Z0-9]$/', substr ($buffer, -1)))
177 abortLex1 ($state, $text, $i);
178 $ret[] = array ('type' => 'LEX_PREDICATE', 'load' => $buffer);
179 $newstate = 'ESOTSM';
180 break;
181 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
182 $buffer .= $char;
183 break;
184 default:
185 abortLex1 ($state, $text, $i);
186 }
187 break;
188 case 'skipping comment':
189 switch ($text{$i})
190 {
191 case "\n":
192 $newstate = 'ESOTSM';
193 default: // eat char, nom-nom...
194 break;
195 }
196 break;
197 default:
198 die (__FUNCTION__ . "(): internal error, state == ${state}");
199 endswitch;
200 $state = $newstate;
201 endfor;
202 if ($state != 'ESOTSM')
203 abortLex3 ($state);
204 return $ret;
205 }
206
207 // Parse the given lexems stream into a list of RackCode sentences. Each such
208 // sentence is a syntax tree, suitable for tag sequence evaluation. The final
209 // parse tree may contain the following nodes:
210 // LEX_TAG
211 // LEX_PREDICATE
212 // LEX_BOOLCONST
213 // SYNT_NOT (1 argument, holding SYNT_EXPR)
214 // SYNT_BOOLOP (2 arguments, each holding SYNT_EXPR)
215 // SYNT_DEFINITION (2 arguments: term and definition)
216 // SYNT_GRANT (2 arguments: decision and condition)
217 // SYNT_CODETEXT (sequence of sentences)
218 //
219 // After parsing the input successfully a list of SYNT_GRANT and SYNT_DEFINITION
220 // trees is returned.
221 function getSentencesFromLexems ($lexems)
222 {
223 $stack = array(); // subject to array_push() and array_pop()
224 $done = 0; // $lexems[$done] is the next item in the tape
225 $todo = count ($lexems);
226
227 // Perform shift-reduce processing. The "accept" actions occurs with an
228 // empty input tape and the stack holding only one symbol (the start
229 // symbol, SYNT_CODETEXT).
230 while (TRUE)
231 {
232 $stacktop = $stacksecondtop = $stackthirdtop = array ('type' => 'null');
233 $stacksize = count ($stack);
234 if ($stacksize >= 1)
235 {
236 $stacktop = array_pop ($stack);
237 // It is possible to run into a S/R conflict, when having a syntaxically
238 // correct sentence base on the stack and some "and {something}" items
239 // on the input tape, hence let's detect this specific case and insist
240 // on "shift" action to make EXPR parsing hungry one.
241 if
242 (
243 $stacktop['type'] == 'SYNT_EXPR' and
244 ($done < $todo) and
245 $lexems[$done]['type'] == 'LEX_BOOLOP'
246 )
247 {
248 // shift!
249 array_push ($stack, $stacktop);
250 array_push ($stack, $lexems[$done++]);
251 continue;
252 }
253 if ($stacksize >= 2)
254 {
255 $stacksecondtop = array_pop ($stack);
256 if ($stacksize >= 3)
257 {
258 $stackthirdtop = array_pop ($stack);
259 array_push ($stack, $stackthirdtop);
260 }
261 array_push ($stack, $stacksecondtop);
262 }
263 array_push ($stack, $stacktop);
264 // First detect definition start to save the predicate from being reduced into
265 // expression.
266 if
267 (
268 $stacktop['type'] == 'LEX_PREDICATE' and
269 $stacksecondtop['type'] == 'LEX_DEFINE'
270 )
271 {
272 array_pop ($stack);
273 array_pop ($stack);
274 array_push
275 (
276 $stack,
277 array
278 (
279 'type' => 'SYNT_DEFINE',
280 'load' => $stacktop['load']
281 )
282 );
283 continue;
284 }
285 // Try "replace" action only on a non-empty stack.
286 // If a handle is found for reversing a production rule, do it and start a new
287 // cycle instead of advancing further on rule list. This will preserve rule priority
288 // in the grammar and keep us from an extra shift action.
289 if
290 (
291 $stacktop['type'] == 'LEX_BOOLCONST' or
292 $stacktop['type'] == 'LEX_TAG' or
293 $stacktop['type'] == 'LEX_PREDICATE'
294 )
295 {
296 // reduce!
297 array_pop ($stack);
298 array_push
299 (
300 $stack,
301 array
302 (
303 'type' => 'SYNT_EXPR',
304 'load' => $stacktop
305 )
306 );
307 continue;
308 }
309 if
310 (
311 $stacktop['type'] == 'SYNT_EXPR' and
312 $stacksecondtop['type'] == 'LEX_NOT'
313 )
314 {
315 // reduce!
316 array_pop ($stack);
317 array_pop ($stack);
318 array_push
319 (
320 $stack,
321 array
322 (
323 'type' => 'SYNT_EXPR',
324 'load' => array
325 (
326 'type' => 'SYNT_NOTEXPR',
327 'load' => $stacktop['load']
328 )
329 )
330 );
331 continue;
332 }
333 if
334 (
335 $stacktop['type'] == 'LEX_RBRACE' and
336 $stacksecondtop['type'] == 'SYNT_EXPR' and
337 $stackthirdtop['type'] == 'LEX_LBRACE'
338 )
339 {
340 // reduce!
341 array_pop ($stack);
342 array_pop ($stack);
343 array_pop ($stack);
344 array_push
345 (
346 $stack,
347 $stacksecondtop
348 );
349 continue;
350 }
351 if
352 (
353 $stacktop['type'] == 'SYNT_EXPR' and
354 $stacksecondtop['type'] == 'LEX_BOOLOP' and
355 $stackthirdtop['type'] == 'SYNT_EXPR'
356 )
357 {
358 // reduce!
359 array_pop ($stack);
360 array_pop ($stack);
361 array_pop ($stack);
362 array_push
363 (
364 $stack,
365 array
366 (
367 'type' => 'SYNT_EXPR',
368 'load' => array
369 (
370 'type' => 'SYNT_BOOLOP',
371 'subtype' => $stacksecondtop['load'],
372 'left' => $stackthirdtop['load'],
373 'right' => $stacktop['load']
374 )
375 )
376 );
377 continue;
378 }
379 if
380 (
381 $stacktop['type'] == 'SYNT_EXPR' and
382 $stacksecondtop['type'] == 'LEX_DECISION'
383 )
384 {
385 // reduce!
386 array_pop ($stack);
387 array_pop ($stack);
388 array_push
389 (
390 $stack,
391 array
392 (
393 'type' => 'SYNT_GRANT',
394 'decision' => $stacksecondtop['load'],
395 'condition' => $stacktop['load']
396 )
397 );
398 continue;
399 }
400 if
401 (
402 $stacktop['type'] == 'SYNT_EXPR' and
403 $stacksecondtop['type'] == 'SYNT_DEFINE'
404 )
405 {
406 // reduce!
407 array_pop ($stack);
408 array_pop ($stack);
409 array_push
410 (
411 $stack,
412 array
413 (
414 'type' => 'SYNT_DEFINITION',
415 'term' => $stacksecondtop['load'],
416 'definition' => $stacktop['load']
417 )
418 );
419 continue;
420 }
421 if
422 (
423 ($stacktop['type'] == 'SYNT_GRANT' or $stacktop['type'] == 'SYNT_DEFINITION') and
424 $stacksecondtop['type'] == 'SYNT_CODETEXT'
425 )
426 {
427 // reduce!
428 array_pop ($stack);
429 array_pop ($stack);
430 $stacksecondtop['load'][] = $stacktop;
431 array_push
432 (
433 $stack,
434 $stacksecondtop
435 );
436 continue;
437 }
438 if
439 (
440 $stacktop['type'] == 'SYNT_GRANT' or
441 $stacktop['type'] == 'SYNT_DEFINITION'
442 )
443 {
444 // reduce!
445 array_pop ($stack);
446 array_push
447 (
448 $stack,
449 array
450 (
451 'type' => 'SYNT_CODETEXT',
452 'load' => array ($stacktop)
453 )
454 );
455 continue;
456 }
457 }
458 // The fact we execute here means, that no reduction or early shift
459 // has been done. The only way to enter another iteration is to "shift"
460 // more, if possible. If the tape is empty, we are facing the
461 // "accept"/"reject" dilemma. The only possible way to "accept" is to
462 // have sole starting nonterminal on the stack (SYNT_CODETEXT).
463 if ($done < $todo)
464 {
465 array_push ($stack, $lexems[$done++]);
466 continue;
467 }
468 // The moment of truth.
469 if (count ($stack) == 1 and $stack[0]['type'] == 'SYNT_CODETEXT')
470 return $stack[0]['load'];
471 return NULL;
472 }
473 }
474
475 function eval_expression ($expr, $tagchain, $ptable)
476 {
477 switch ($expr['type'])
478 {
479 case 'LEX_TAG': // Return true, if given tag is present on the tag chain.
480 foreach ($tagchain as $tagInfo)
481 if ($expr['load'] == $tagInfo['tag'])
482 return TRUE;
483 return FALSE;
484 case 'LEX_PREDICATE': // Find given predicate in the symbol table and evaluate it.
485 $pname = $expr['load'];
486 if (!isset ($ptable[$pname]))
487 {
488 showError ("Predicate '${pname}' is referenced before declaration");
489 return;
490 }
491 return eval_expression ($ptable[$pname], $tagchain, $ptable);
492 case 'BOOLCONST': // Evaluate a boolean constant.
493 switch ($expr['load'])
494 {
495 case 'true':
496 return TRUE;
497 case 'false':
498 return FALSE;
499 default:
500 showError ("Could not parse a boolean constant with value '${expr['load']}'");
501 return; // should failure be harder?
502 }
503 case 'SYNT_NOTEXPR':
504 return !eval_expression ($expr['load'], $tagchain, $ptable);
505 case 'SYNT_BOOLOP':
506 $leftresult = eval_expression ($expr['left'], $tagchain, $ptable);
507 switch ($expr['subtype'])
508 {
509 case 'or':
510 if ($leftresult)
511 return TRUE; // early success
512 return eval_expression ($expr['right'], $tagchain, $ptable);
513 case 'and':
514 if (!$leftresult)
515 return FALSE; // early failure
516 return eval_expression ($expr['right'], $tagchain, $ptable);
517 default:
518 showError ("Cannot evaluate unknown boolean operation '${boolop['subtype']}'");
519 return;
520 }
521 default:
522 showError ("Evaluation error, cannot process expression type '${expr['type']}'");
523 break;
524 }
525 }
526
527 function gotClearanceForTagChain ($tagchain)
528 {
529 global $rackCode;
530 $ptable = array();
531 foreach ($rackCode as $sentence)
532 {
533 switch ($sentence['type'])
534 {
535 case 'SYNT_DEFINITION':
536 $ptable[$sentence['term']] = $sentence['definition'];
537 break;
538 case 'SYNT_GRANT':
539 if (eval_expression ($sentence['condition'], $tagchain, $ptable))
540 switch ($sentence['decision'])
541 {
542 case 'allow':
543 return TRUE;
544 case 'deny':
545 return FALSE;
546 default:
547 showError ("Condition match for unknown grant decision '${sentence['decision']}'");
548 break;
549 }
550 break;
551 default:
552 showError ("Can't process sentence of unknown type '${sentence['type']}'");
553 break;
554 }
555 }
556 return FALSE;
557 }
558
559 function getRackCode ()
560 {
561 // FIXME: handle errors and display a message with an option to reset RackCode text
562 // FIXME: perform semantical analysis to prove the tree be reference-wise error
563 // free regardless of evaluation order
564 return getSentencesFromLexems (getLexemsFromRackCode (getLongText ('RackCode')));
565 }
566
567 ?>