r2000 + improve the semantic filter: include the problematic predicate name in error...
[racktables] / inc / code.php
1 <?php
2 /*
3 * This file implements lexical scanner and syntax analyzer for the RackCode
4 * access configuration language.
5 *
6 * The language consists of the following lexems:
7 *
8 * LEX_LBRACE
9 * LEX_RBRACE
10 * LEX_DECISION
11 * LEX_DEFINE
12 * LEX_BOOLCONST
13 * LEX_NOT
14 * LEX_TAG
15 * LEX_PREDICATE
16 * LEX_BOOLOP
17 *
18 */
19
20 // Complain about martian char.
21 function abortLex1 ($state, $text, $pos)
22 {
23 $message = "invalid char with code " . ord (mb_substr ($text, $pos, 1));
24 $message .= " at position ${pos} (FSM state is '${state}')";
25 return array ('result' => 'NAK', 'load' => $message);
26 }
27
28 // Complain about martian keyword.
29 function abortLex2 ($word)
30 {
31 return array
32 (
33 'result' => 'NAK',
34 'load' => "could not parse keyword '${word}'"
35 );
36 }
37
38 // Complain about wrong FSM state.
39 function abortLex3 ($state)
40 {
41 return array
42 (
43 'result' => 'NAK',
44 'load' => "FSM state is still '${state}' at end of input"
45 );
46 }
47
48 // Produce a list of lexems from the given text. Possible lexems are:
49 function getLexemsFromRackCode ($text)
50 {
51 $ret = array();
52 $textlen = mb_strlen ($text);
53 $state = "ESOTSM";
54 for ($i = 0; $i < $textlen; $i++) :
55 $char = mb_substr ($text, $i, 1);
56 $newstate = $state;
57 switch ($state) :
58 case 'ESOTSM':
59 switch (TRUE)
60 {
61 case ($char == '('):
62 $ret[] = array ('type' => 'LEX_LBRACE');
63 break;
64 case ($char == ')'):
65 $ret[] = array ('type' => 'LEX_RBRACE');
66 break;
67 case ($char == '#'):
68 $newstate = 'skipping comment';
69 break;
70 case (mb_ereg ('[[:alpha:]]', $char) > 0):
71 $newstate = 'reading keyword';
72 $buffer = $char;
73 break;
74 case (preg_match ('/^[ \t\n\r]$/', $char)):
75 // nom-nom...
76 break;
77 case ($char == '{'):
78 $newstate = 'reading tag 1';
79 break;
80 case ($char == '['):
81 $newstate = 'reading predicate 1';
82 break;
83 default:
84 return abortLex1 ($state, $text, $i);
85 }
86 break;
87 case 'reading keyword':
88 switch (TRUE)
89 {
90 case (mb_ereg ('[[:alpha:]]', $char) > 0):
91 $buffer .= $char;
92 break;
93 case (preg_match ('/^[ \t\n]$/', $char)):
94 // got a word, sort it out
95 switch ($buffer)
96 {
97 case 'allow':
98 case 'deny':
99 $ret[] = array ('type' => 'LEX_DECISION', 'load' => $buffer);
100 break;
101 case 'define':
102 $ret[] = array ('type' => 'LEX_DEFINE');
103 break;
104 case 'and':
105 case 'or':
106 $ret[] = array ('type' => 'LEX_BOOLOP', 'load' => $buffer);
107 break;
108 case 'not':
109 $ret[] = array ('type' => 'LEX_NOT');
110 break;
111 case 'false':
112 case 'true':
113 $ret[] = array ('type' => 'LEX_BOOLCONST', 'load' => $buffer);
114 break;
115 default:
116 return abortLex2 ($buffer);
117 }
118 $newstate = 'ESOTSM';
119 break;
120 default:
121 return abortLex1 ($state, $text, $i);
122 }
123 break;
124 case 'reading tag 1':
125 switch (TRUE)
126 {
127 case (preg_match ('/^[ \t\n\r]$/', $char)):
128 // nom-nom...
129 break;
130 case (mb_ereg ('[[:alpha:]\$]', $char) > 0):
131 $buffer = $char;
132 $newstate = 'reading tag 2';
133 break;
134 default:
135 return abortLex1 ($state, $text, $i);
136 }
137 break;
138 case 'reading tag 2':
139 switch (TRUE)
140 {
141 case ($char == '}'):
142 $buffer = rtrim ($buffer);
143 if (mb_ereg ('[[:alnum:]]', mb_substr ($buffer, -1)) == 0)
144 return abortLex1 ($state, $text, $i);
145 $ret[] = array ('type' => 'LEX_TAG', 'load' => $buffer);
146 $newstate = 'ESOTSM';
147 break;
148 case (mb_ereg ('[[:alnum:] _-]', $char) > 0):
149 $buffer .= $char;
150 break;
151 default:
152 return abortLex1 ($state, $text, $i);
153 }
154 break;
155 case 'reading predicate 1':
156 switch (TRUE)
157 {
158 case (preg_match ('/^[ \t\n\r]$/', $char)):
159 // nom-nom...
160 break;
161 case (mb_ereg ('[[:alpha:]]', $char) > 0):
162 $buffer = $char;
163 $newstate = 'reading predicate 2';
164 break;
165 default:
166 return abortLex1 ($state, $text, $i);
167 }
168 break;
169 case 'reading predicate 2':
170 switch (TRUE)
171 {
172 case ($char == ']'):
173 $buffer = rtrim ($buffer);
174 if (mb_ereg ('[[:alnum:]]', mb_substr ($buffer, -1)) == 0)
175 return abortLex1 ($state, $text, $i);
176 $ret[] = array ('type' => 'LEX_PREDICATE', 'load' => $buffer);
177 $newstate = 'ESOTSM';
178 break;
179 case (mb_ereg ('[[:alnum:] _-]', $char) > 0):
180 $buffer .= $char;
181 break;
182 default:
183 return abortLex1 ($state, $text, $i);
184 }
185 break;
186 case 'skipping comment':
187 switch ($char)
188 {
189 case "\n":
190 $newstate = 'ESOTSM';
191 default: // eat char, nom-nom...
192 break;
193 }
194 break;
195 default:
196 die (__FUNCTION__ . "(): internal error, state == ${state}");
197 endswitch;
198 $state = $newstate;
199 endfor;
200 if ($state != 'ESOTSM' and $state != 'skipping comment')
201 return abortLex3 ($state);
202 return array ('result' => 'ACK', 'load' => $ret);
203 }
204
205 // Parse the given lexems stream into a list of RackCode sentences. Each such
206 // sentence is a syntax tree, suitable for tag sequence evaluation. The final
207 // parse tree may contain the following nodes:
208 // LEX_TAG
209 // LEX_PREDICATE
210 // LEX_BOOLCONST
211 // SYNT_NOT (1 argument, holding SYNT_EXPR)
212 // SYNT_BOOLOP (2 arguments, each holding SYNT_EXPR)
213 // SYNT_DEFINITION (2 arguments: term and definition)
214 // SYNT_GRANT (2 arguments: decision and condition)
215 // SYNT_CODETEXT (sequence of sentences)
216 //
217 // After parsing the input successfully a list of SYNT_GRANT and SYNT_DEFINITION
218 // trees is returned.
219 function getSentencesFromLexems ($lexems)
220 {
221 $stack = array(); // subject to array_push() and array_pop()
222 $done = 0; // $lexems[$done] is the next item in the tape
223 $todo = count ($lexems);
224
225 // Perform shift-reduce processing. The "accept" actions occurs with an
226 // empty input tape and the stack holding only one symbol (the start
227 // symbol, SYNT_CODETEXT).
228 while (TRUE)
229 {
230 $stacktop = $stacksecondtop = $stackthirdtop = array ('type' => 'null');
231 $stacksize = count ($stack);
232 if ($stacksize >= 1)
233 {
234 $stacktop = array_pop ($stack);
235 // It is possible to run into a S/R conflict, when having a syntaxically
236 // correct sentence base on the stack and some "and {something}" items
237 // on the input tape, hence let's detect this specific case and insist
238 // on "shift" action to make EXPR parsing hungry one.
239 if
240 (
241 $stacktop['type'] == 'SYNT_EXPR' and
242 ($done < $todo) and
243 $lexems[$done]['type'] == 'LEX_BOOLOP'
244 )
245 {
246 // shift!
247 array_push ($stack, $stacktop);
248 array_push ($stack, $lexems[$done++]);
249 continue;
250 }
251 if ($stacksize >= 2)
252 {
253 $stacksecondtop = array_pop ($stack);
254 if ($stacksize >= 3)
255 {
256 $stackthirdtop = array_pop ($stack);
257 array_push ($stack, $stackthirdtop);
258 }
259 array_push ($stack, $stacksecondtop);
260 }
261 array_push ($stack, $stacktop);
262 // First detect definition start to save the predicate from being reduced into
263 // expression.
264 if
265 (
266 $stacktop['type'] == 'LEX_PREDICATE' and
267 $stacksecondtop['type'] == 'LEX_DEFINE'
268 )
269 {
270 array_pop ($stack);
271 array_pop ($stack);
272 array_push
273 (
274 $stack,
275 array
276 (
277 'type' => 'SYNT_DEFINE',
278 'load' => $stacktop['load']
279 )
280 );
281 continue;
282 }
283 // Try "replace" action only on a non-empty stack.
284 // If a handle is found for reversing a production rule, do it and start a new
285 // cycle instead of advancing further on rule list. This will preserve rule priority
286 // in the grammar and keep us from an extra shift action.
287 if
288 (
289 $stacktop['type'] == 'LEX_BOOLCONST' or
290 $stacktop['type'] == 'LEX_TAG' or
291 $stacktop['type'] == 'LEX_PREDICATE'
292 )
293 {
294 // reduce!
295 array_pop ($stack);
296 array_push
297 (
298 $stack,
299 array
300 (
301 'type' => 'SYNT_EXPR',
302 'load' => $stacktop
303 )
304 );
305 continue;
306 }
307 if
308 (
309 $stacktop['type'] == 'SYNT_EXPR' and
310 $stacksecondtop['type'] == 'LEX_NOT'
311 )
312 {
313 // reduce!
314 array_pop ($stack);
315 array_pop ($stack);
316 array_push
317 (
318 $stack,
319 array
320 (
321 'type' => 'SYNT_EXPR',
322 'load' => array
323 (
324 'type' => 'SYNT_NOTEXPR',
325 'load' => $stacktop['load']
326 )
327 )
328 );
329 continue;
330 }
331 if
332 (
333 $stacktop['type'] == 'LEX_RBRACE' and
334 $stacksecondtop['type'] == 'SYNT_EXPR' and
335 $stackthirdtop['type'] == 'LEX_LBRACE'
336 )
337 {
338 // reduce!
339 array_pop ($stack);
340 array_pop ($stack);
341 array_pop ($stack);
342 array_push
343 (
344 $stack,
345 $stacksecondtop
346 );
347 continue;
348 }
349 if
350 (
351 $stacktop['type'] == 'SYNT_EXPR' and
352 $stacksecondtop['type'] == 'LEX_BOOLOP' and
353 $stackthirdtop['type'] == 'SYNT_EXPR'
354 )
355 {
356 // reduce!
357 array_pop ($stack);
358 array_pop ($stack);
359 array_pop ($stack);
360 array_push
361 (
362 $stack,
363 array
364 (
365 'type' => 'SYNT_EXPR',
366 'load' => array
367 (
368 'type' => 'SYNT_BOOLOP',
369 'subtype' => $stacksecondtop['load'],
370 'left' => $stackthirdtop['load'],
371 'right' => $stacktop['load']
372 )
373 )
374 );
375 continue;
376 }
377 if
378 (
379 $stacktop['type'] == 'SYNT_EXPR' and
380 $stacksecondtop['type'] == 'LEX_DECISION'
381 )
382 {
383 // reduce!
384 array_pop ($stack);
385 array_pop ($stack);
386 array_push
387 (
388 $stack,
389 array
390 (
391 'type' => 'SYNT_GRANT',
392 'decision' => $stacksecondtop['load'],
393 'condition' => $stacktop['load']
394 )
395 );
396 continue;
397 }
398 if
399 (
400 $stacktop['type'] == 'SYNT_EXPR' and
401 $stacksecondtop['type'] == 'SYNT_DEFINE'
402 )
403 {
404 // reduce!
405 array_pop ($stack);
406 array_pop ($stack);
407 array_push
408 (
409 $stack,
410 array
411 (
412 'type' => 'SYNT_DEFINITION',
413 'term' => $stacksecondtop['load'],
414 'definition' => $stacktop['load']
415 )
416 );
417 continue;
418 }
419 if
420 (
421 ($stacktop['type'] == 'SYNT_GRANT' or $stacktop['type'] == 'SYNT_DEFINITION') and
422 $stacksecondtop['type'] == 'SYNT_CODETEXT'
423 )
424 {
425 // reduce!
426 array_pop ($stack);
427 array_pop ($stack);
428 $stacksecondtop['load'][] = $stacktop;
429 array_push
430 (
431 $stack,
432 $stacksecondtop
433 );
434 continue;
435 }
436 if
437 (
438 $stacktop['type'] == 'SYNT_GRANT' or
439 $stacktop['type'] == 'SYNT_DEFINITION'
440 )
441 {
442 // reduce!
443 array_pop ($stack);
444 array_push
445 (
446 $stack,
447 array
448 (
449 'type' => 'SYNT_CODETEXT',
450 'load' => array ($stacktop)
451 )
452 );
453 continue;
454 }
455 }
456 // The fact we execute here means, that no reduction or early shift
457 // has been done. The only way to enter another iteration is to "shift"
458 // more, if possible. If the tape is empty, we are facing the
459 // "accept"/"reject" dilemma. The only possible way to "accept" is to
460 // have sole starting nonterminal on the stack (SYNT_CODETEXT).
461 if ($done < $todo)
462 {
463 array_push ($stack, $lexems[$done++]);
464 continue;
465 }
466 // The moment of truth.
467 if (count ($stack) == 1 and $stack[0]['type'] == 'SYNT_CODETEXT')
468 return array ('result' => 'ACK', 'load' => $stack[0]['load']);
469 return array ('result' => 'NAK', 'load' => 'Syntax error!');
470 }
471 }
472
473 function eval_expression ($expr, $tagchain, $ptable)
474 {
475 switch ($expr['type'])
476 {
477 case 'LEX_TAG': // Return true, if given tag is present on the tag chain.
478 foreach ($tagchain as $tagInfo)
479 if ($expr['load'] == $tagInfo['tag'])
480 return TRUE;
481 return FALSE;
482 case 'LEX_PREDICATE': // Find given predicate in the symbol table and evaluate it.
483 $pname = $expr['load'];
484 if (!isset ($ptable[$pname]))
485 {
486 showError ("Predicate '${pname}' is referenced before declaration", __FUNCTION__);
487 return;
488 }
489 return eval_expression ($ptable[$pname], $tagchain, $ptable);
490 case 'LEX_BOOLCONST': // Evaluate a boolean constant.
491 switch ($expr['load'])
492 {
493 case 'true':
494 return TRUE;
495 case 'false':
496 return FALSE;
497 default:
498 showError ("Could not parse a boolean constant with value '${expr['load']}'", __FUNCTION__);
499 return; // should failure be harder?
500 }
501 case 'SYNT_NOTEXPR':
502 return !eval_expression ($expr['load'], $tagchain, $ptable);
503 case 'SYNT_BOOLOP':
504 $leftresult = eval_expression ($expr['left'], $tagchain, $ptable);
505 switch ($expr['subtype'])
506 {
507 case 'or':
508 if ($leftresult)
509 return TRUE; // early success
510 return eval_expression ($expr['right'], $tagchain, $ptable);
511 case 'and':
512 if (!$leftresult)
513 return FALSE; // early failure
514 return eval_expression ($expr['right'], $tagchain, $ptable);
515 default:
516 showError ("Cannot evaluate unknown boolean operation '${boolop['subtype']}'");
517 return;
518 }
519 default:
520 showError ("Evaluation error, cannot process expression type '${expr['type']}'", __FUNCTION__);
521 break;
522 }
523 }
524
525 function gotClearanceForTagChain ($tagchain)
526 {
527 global $rackCode;
528 $ptable = array();
529 foreach ($rackCode as $sentence)
530 {
531 switch ($sentence['type'])
532 {
533 case 'SYNT_DEFINITION':
534 $ptable[$sentence['term']] = $sentence['definition'];
535 break;
536 case 'SYNT_GRANT':
537 if (eval_expression ($sentence['condition'], $tagchain, $ptable))
538 switch ($sentence['decision'])
539 {
540 case 'allow':
541 return TRUE;
542 case 'deny':
543 return FALSE;
544 default:
545 showError ("Condition match for unknown grant decision '${sentence['decision']}'", __FUNCTION__);
546 break;
547 }
548 break;
549 default:
550 showError ("Can't process sentence of unknown type '${sentence['type']}'", __FUNCTION__);
551 break;
552 }
553 }
554 return FALSE;
555 }
556
557 function getRackCode ($text)
558 {
559 if (!mb_strlen ($text))
560 return array ('result' => 'NAK', 'load' => 'The RackCode text was found empty in ' . __FUNCTION__);
561 $text = str_replace ("\r", '', $text) . "\n";
562 $lex = getLexemsFromRackCode ($text);
563 if ($lex['result'] != 'ACK')
564 return $lex;
565 $synt = getSentencesFromLexems ($lex['load']);
566 if ($synt['result'] != 'ACK')
567 return $synt;
568 // An empty sentence list is semantically valid, yet senseless,
569 // so checking intermediate result once more won't hurt.
570 if (!count ($synt['load']))
571 return array ('result' => 'NAK', 'load' => 'Empty parse tree found in ' . __FUNCTION__);
572 return semanticFilter ($synt['load']);
573 }
574
575 // Return NULL, if the given expression can be evaluated against the given
576 // predicate list. Return the name of the first show stopper otherwise.
577 function firstUnrefPredicate ($plist, $expr)
578 {
579 switch ($expr['type'])
580 {
581 case 'LEX_BOOLCONST':
582 case 'LEX_TAG':
583 return NULL;
584 case 'LEX_PREDICATE':
585 return in_array ($expr['load'], $plist) ? NULL : $expr['load'];
586 case 'SYNT_NOTEXPR':
587 return firstUnrefPredicate ($plist, $expr['load']);
588 case 'SYNT_BOOLOP':
589 if (($tmp = firstUnrefPredicate ($plist, $expr['left'])) !== NULL)
590 return $tmp;
591 if (($tmp = firstUnrefPredicate ($plist, $expr['right'])) !== NULL)
592 return $tmp;
593 return NULL;
594 default:
595 return NULL;
596 }
597 }
598
599 function semanticFilter ($code)
600 {
601 $predicatelist = array();
602 foreach ($code as $sentence)
603 switch ($sentence['type'])
604 {
605 case 'SYNT_DEFINITION':
606 $up = firstUnrefPredicate ($predicatelist, $sentence['definition']);
607 if ($up !== NULL)
608 return array
609 (
610 'result' => 'NAK',
611 'load' => "definition [${sentence['term']}] uses unknown predicate [${up}]"
612 );
613 $predicatelist[] = $sentence['term'];
614 break;
615 case 'SYNT_GRANT':
616 $up = firstUnrefPredicate ($predicatelist, $sentence['condition']);
617 if ($up !== NULL)
618 return array
619 (
620 'result' => 'NAK',
621 'load' => "grant sentence uses unknown predicate [${up}]"
622 );
623 break;
624 default:
625 return array ('result' => 'NAK', 'load' => 'unknown sentence type');
626 }
627 return array ('result' => 'ACK', 'load' => $code);
628 }
629
630 ?>