r2034 + fixed tag name regexp once more
[racktables] / inc / code.php
1 <?php
2 /*
3 * This file implements lexical scanner and syntax analyzer for the RackCode
4 * access configuration language.
5 *
6 * The language consists of the following lexems:
7 *
8 * LEX_LBRACE
9 * LEX_RBRACE
10 * LEX_DECISION
11 * LEX_DEFINE
12 * LEX_BOOLCONST
13 * LEX_NOT
14 * LEX_TAG
15 * LEX_PREDICATE
16 * LEX_BOOLOP
17 *
18 */
19
20 // Complain about martian char.
21 function lexError1 ($state, $text, $pos)
22 {
23 $message = "invalid char with code " . ord (mb_substr ($text, $pos, 1)) . ' (';
24 $message .= mb_substr ($text, $pos, 1) . ") at position ${pos} (FSM state is '${state}')";
25 return array ('result' => 'NAK', 'load' => $message);
26 }
27
28 // Complain about martian keyword.
29 function lexError2 ($word)
30 {
31 return array
32 (
33 'result' => 'NAK',
34 'load' => "could not parse keyword '${word}'"
35 );
36 }
37
38 // Complain about wrong FSM state.
39 function lexError3 ($state)
40 {
41 return array
42 (
43 'result' => 'NAK',
44 'load' => "FSM state is still '${state}' at end of input"
45 );
46 }
47
48 function lexError4 ($s)
49 {
50 return array
51 (
52 'result' => 'NAK',
53 'load' => "Invalid tag or predicate name '${s}'"
54 );
55 }
56
57 // Produce a list of lexems from the given text. Possible lexems are:
58 function getLexemsFromRackCode ($text)
59 {
60 $ret = array();
61 $textlen = mb_strlen ($text);
62 $state = "ESOTSM";
63 for ($i = 0; $i < $textlen; $i++) :
64 $char = mb_substr ($text, $i, 1);
65 $newstate = $state;
66 switch ($state) :
67 case 'ESOTSM':
68 switch (TRUE)
69 {
70 case ($char == '('):
71 $ret[] = array ('type' => 'LEX_LBRACE');
72 break;
73 case ($char == ')'):
74 $ret[] = array ('type' => 'LEX_RBRACE');
75 break;
76 case ($char == '#'):
77 $newstate = 'skipping comment';
78 break;
79 case (mb_ereg ('[[:alpha:]]', $char) > 0):
80 $newstate = 'reading keyword';
81 $buffer = $char;
82 break;
83 case (preg_match ('/^[ \t\n\r]$/', $char)):
84 // nom-nom...
85 break;
86 case ($char == '{'):
87 $newstate = 'reading tag 1';
88 break;
89 case ($char == '['):
90 $newstate = 'reading predicate 1';
91 break;
92 default:
93 return lexError1 ($state, $text, $i);
94 }
95 break;
96 case 'reading keyword':
97 switch (TRUE)
98 {
99 case (mb_ereg ('[[:alpha:]]', $char) > 0):
100 $buffer .= $char;
101 break;
102 case (preg_match ('/^[ \t\n]$/', $char)):
103 // got a word, sort it out
104 switch ($buffer)
105 {
106 case 'allow':
107 case 'deny':
108 $ret[] = array ('type' => 'LEX_DECISION', 'load' => $buffer);
109 break;
110 case 'define':
111 $ret[] = array ('type' => 'LEX_DEFINE');
112 break;
113 case 'and':
114 case 'or':
115 $ret[] = array ('type' => 'LEX_BOOLOP', 'load' => $buffer);
116 break;
117 case 'not':
118 $ret[] = array ('type' => 'LEX_NOT');
119 break;
120 case 'false':
121 case 'true':
122 $ret[] = array ('type' => 'LEX_BOOLCONST', 'load' => $buffer);
123 break;
124 default:
125 return lexError2 ($buffer);
126 }
127 $newstate = 'ESOTSM';
128 break;
129 default:
130 return lexError1 ($state, $text, $i);
131 }
132 break;
133 case 'reading tag 1':
134 switch (TRUE)
135 {
136 case (preg_match ('/^[ \t\n\r]$/', $char)):
137 // nom-nom...
138 break;
139 case (mb_ereg ('[[:alnum:]\$]', $char) > 0):
140 $buffer = $char;
141 $newstate = 'reading tag 2';
142 break;
143 default:
144 return lexError1 ($state, $text, $i);
145 }
146 break;
147 case 'reading tag 2':
148 switch (TRUE)
149 {
150 case ($char == '}'):
151 $buffer = rtrim ($buffer);
152 if (!validTagName ($buffer, TRUE))
153 return lexError4 ($buffer);
154 $ret[] = array ('type' => 'LEX_TAG', 'load' => $buffer);
155 $newstate = 'ESOTSM';
156 break;
157 case (mb_ereg ('[[:alnum:]\. _~-]', $char) > 0):
158 $buffer .= $char;
159 break;
160 default:
161 return lexError1 ($state, $text, $i);
162 }
163 break;
164 case 'reading predicate 1':
165 switch (TRUE)
166 {
167 case (preg_match ('/^[ \t\n\r]$/', $char)):
168 // nom-nom...
169 break;
170 case (mb_ereg ('[[:alnum:]]', $char) > 0):
171 $buffer = $char;
172 $newstate = 'reading predicate 2';
173 break;
174 default:
175 return lexError1 ($state, $text, $i);
176 }
177 break;
178 case 'reading predicate 2':
179 switch (TRUE)
180 {
181 case ($char == ']'):
182 $buffer = rtrim ($buffer);
183 if (!validTagName ($buffer))
184 return lexError4 ($buffer);
185 $ret[] = array ('type' => 'LEX_PREDICATE', 'load' => $buffer);
186 $newstate = 'ESOTSM';
187 break;
188 case (mb_ereg ('[[:alnum:]\. _~-]', $char) > 0):
189 $buffer .= $char;
190 break;
191 default:
192 return lexError1 ($state, $text, $i);
193 }
194 break;
195 case 'skipping comment':
196 switch ($char)
197 {
198 case "\n":
199 $newstate = 'ESOTSM';
200 default: // eat char, nom-nom...
201 break;
202 }
203 break;
204 default:
205 die (__FUNCTION__ . "(): internal error, state == ${state}");
206 endswitch;
207 $state = $newstate;
208 endfor;
209 if ($state != 'ESOTSM' and $state != 'skipping comment')
210 return lexError3 ($state);
211 return array ('result' => 'ACK', 'load' => $ret);
212 }
213
214 // Parse the given lexems stream into a list of RackCode sentences. Each such
215 // sentence is a syntax tree, suitable for tag sequence evaluation. The final
216 // parse tree may contain the following nodes:
217 // LEX_TAG
218 // LEX_PREDICATE
219 // LEX_BOOLCONST
220 // SYNT_NOT (1 argument, holding SYNT_EXPR)
221 // SYNT_BOOLOP (2 arguments, each holding SYNT_EXPR)
222 // SYNT_DEFINITION (2 arguments: term and definition)
223 // SYNT_GRANT (2 arguments: decision and condition)
224 // SYNT_CODETEXT (sequence of sentences)
225 //
226 // After parsing the input successfully a list of SYNT_GRANT and SYNT_DEFINITION
227 // trees is returned.
228 function getSentencesFromLexems ($lexems)
229 {
230 $stack = array(); // subject to array_push() and array_pop()
231 $done = 0; // $lexems[$done] is the next item in the tape
232 $todo = count ($lexems);
233
234 // Perform shift-reduce processing. The "accept" actions occurs with an
235 // empty input tape and the stack holding only one symbol (the start
236 // symbol, SYNT_CODETEXT).
237 while (TRUE)
238 {
239 $stacktop = $stacksecondtop = $stackthirdtop = array ('type' => 'null');
240 $stacksize = count ($stack);
241 if ($stacksize >= 1)
242 {
243 $stacktop = array_pop ($stack);
244 // It is possible to run into a S/R conflict, when having a syntaxically
245 // correct sentence base on the stack and some "and {something}" items
246 // on the input tape, hence let's detect this specific case and insist
247 // on "shift" action to make EXPR parsing hungry one.
248 if
249 (
250 $stacktop['type'] == 'SYNT_EXPR' and
251 ($done < $todo) and
252 $lexems[$done]['type'] == 'LEX_BOOLOP'
253 )
254 {
255 // shift!
256 array_push ($stack, $stacktop);
257 array_push ($stack, $lexems[$done++]);
258 continue;
259 }
260 if ($stacksize >= 2)
261 {
262 $stacksecondtop = array_pop ($stack);
263 if ($stacksize >= 3)
264 {
265 $stackthirdtop = array_pop ($stack);
266 array_push ($stack, $stackthirdtop);
267 }
268 array_push ($stack, $stacksecondtop);
269 }
270 array_push ($stack, $stacktop);
271 // First detect definition start to save the predicate from being reduced into
272 // expression.
273 if
274 (
275 $stacktop['type'] == 'LEX_PREDICATE' and
276 $stacksecondtop['type'] == 'LEX_DEFINE'
277 )
278 {
279 array_pop ($stack);
280 array_pop ($stack);
281 array_push
282 (
283 $stack,
284 array
285 (
286 'type' => 'SYNT_DEFINE',
287 'load' => $stacktop['load']
288 )
289 );
290 continue;
291 }
292 // Try "replace" action only on a non-empty stack.
293 // If a handle is found for reversing a production rule, do it and start a new
294 // cycle instead of advancing further on rule list. This will preserve rule priority
295 // in the grammar and keep us from an extra shift action.
296 if
297 (
298 $stacktop['type'] == 'LEX_BOOLCONST' or
299 $stacktop['type'] == 'LEX_TAG' or
300 $stacktop['type'] == 'LEX_PREDICATE'
301 )
302 {
303 // reduce!
304 array_pop ($stack);
305 array_push
306 (
307 $stack,
308 array
309 (
310 'type' => 'SYNT_EXPR',
311 'load' => $stacktop
312 )
313 );
314 continue;
315 }
316 if
317 (
318 $stacktop['type'] == 'SYNT_EXPR' and
319 $stacksecondtop['type'] == 'LEX_NOT'
320 )
321 {
322 // reduce!
323 array_pop ($stack);
324 array_pop ($stack);
325 array_push
326 (
327 $stack,
328 array
329 (
330 'type' => 'SYNT_EXPR',
331 'load' => array
332 (
333 'type' => 'SYNT_NOTEXPR',
334 'load' => $stacktop['load']
335 )
336 )
337 );
338 continue;
339 }
340 if
341 (
342 $stacktop['type'] == 'LEX_RBRACE' and
343 $stacksecondtop['type'] == 'SYNT_EXPR' and
344 $stackthirdtop['type'] == 'LEX_LBRACE'
345 )
346 {
347 // reduce!
348 array_pop ($stack);
349 array_pop ($stack);
350 array_pop ($stack);
351 array_push
352 (
353 $stack,
354 $stacksecondtop
355 );
356 continue;
357 }
358 if
359 (
360 $stacktop['type'] == 'SYNT_EXPR' and
361 $stacksecondtop['type'] == 'LEX_BOOLOP' and
362 $stackthirdtop['type'] == 'SYNT_EXPR'
363 )
364 {
365 // reduce!
366 array_pop ($stack);
367 array_pop ($stack);
368 array_pop ($stack);
369 array_push
370 (
371 $stack,
372 array
373 (
374 'type' => 'SYNT_EXPR',
375 'load' => array
376 (
377 'type' => 'SYNT_BOOLOP',
378 'subtype' => $stacksecondtop['load'],
379 'left' => $stackthirdtop['load'],
380 'right' => $stacktop['load']
381 )
382 )
383 );
384 continue;
385 }
386 if
387 (
388 $stacktop['type'] == 'SYNT_EXPR' and
389 $stacksecondtop['type'] == 'LEX_DECISION'
390 )
391 {
392 // reduce!
393 array_pop ($stack);
394 array_pop ($stack);
395 array_push
396 (
397 $stack,
398 array
399 (
400 'type' => 'SYNT_GRANT',
401 'decision' => $stacksecondtop['load'],
402 'condition' => $stacktop['load']
403 )
404 );
405 continue;
406 }
407 if
408 (
409 $stacktop['type'] == 'SYNT_EXPR' and
410 $stacksecondtop['type'] == 'SYNT_DEFINE'
411 )
412 {
413 // reduce!
414 array_pop ($stack);
415 array_pop ($stack);
416 array_push
417 (
418 $stack,
419 array
420 (
421 'type' => 'SYNT_DEFINITION',
422 'term' => $stacksecondtop['load'],
423 'definition' => $stacktop['load']
424 )
425 );
426 continue;
427 }
428 if
429 (
430 ($stacktop['type'] == 'SYNT_GRANT' or $stacktop['type'] == 'SYNT_DEFINITION') and
431 $stacksecondtop['type'] == 'SYNT_CODETEXT'
432 )
433 {
434 // reduce!
435 array_pop ($stack);
436 array_pop ($stack);
437 $stacksecondtop['load'][] = $stacktop;
438 array_push
439 (
440 $stack,
441 $stacksecondtop
442 );
443 continue;
444 }
445 if
446 (
447 $stacktop['type'] == 'SYNT_GRANT' or
448 $stacktop['type'] == 'SYNT_DEFINITION'
449 )
450 {
451 // reduce!
452 array_pop ($stack);
453 array_push
454 (
455 $stack,
456 array
457 (
458 'type' => 'SYNT_CODETEXT',
459 'load' => array ($stacktop)
460 )
461 );
462 continue;
463 }
464 }
465 // The fact we execute here means, that no reduction or early shift
466 // has been done. The only way to enter another iteration is to "shift"
467 // more, if possible. If the tape is empty, we are facing the
468 // "accept"/"reject" dilemma. The only possible way to "accept" is to
469 // have sole starting nonterminal on the stack (SYNT_CODETEXT).
470 if ($done < $todo)
471 {
472 array_push ($stack, $lexems[$done++]);
473 continue;
474 }
475 // The moment of truth.
476 if (count ($stack) == 1 and $stack[0]['type'] == 'SYNT_CODETEXT')
477 return array ('result' => 'ACK', 'load' => $stack[0]['load']);
478 return array ('result' => 'NAK', 'load' => 'Syntax error!');
479 }
480 }
481
482 function eval_expression ($expr, $tagchain, $ptable)
483 {
484 switch ($expr['type'])
485 {
486 case 'LEX_TAG': // Return true, if given tag is present on the tag chain.
487 foreach ($tagchain as $tagInfo)
488 if ($expr['load'] == $tagInfo['tag'])
489 return TRUE;
490 return FALSE;
491 case 'LEX_PREDICATE': // Find given predicate in the symbol table and evaluate it.
492 $pname = $expr['load'];
493 if (!isset ($ptable[$pname]))
494 {
495 showError ("Predicate '${pname}' is referenced before declaration", __FUNCTION__);
496 return;
497 }
498 return eval_expression ($ptable[$pname], $tagchain, $ptable);
499 case 'LEX_BOOLCONST': // Evaluate a boolean constant.
500 switch ($expr['load'])
501 {
502 case 'true':
503 return TRUE;
504 case 'false':
505 return FALSE;
506 default:
507 showError ("Could not parse a boolean constant with value '${expr['load']}'", __FUNCTION__);
508 return; // should failure be harder?
509 }
510 case 'SYNT_NOTEXPR':
511 return !eval_expression ($expr['load'], $tagchain, $ptable);
512 case 'SYNT_BOOLOP':
513 $leftresult = eval_expression ($expr['left'], $tagchain, $ptable);
514 switch ($expr['subtype'])
515 {
516 case 'or':
517 if ($leftresult)
518 return TRUE; // early success
519 return eval_expression ($expr['right'], $tagchain, $ptable);
520 case 'and':
521 if (!$leftresult)
522 return FALSE; // early failure
523 return eval_expression ($expr['right'], $tagchain, $ptable);
524 default:
525 showError ("Cannot evaluate unknown boolean operation '${boolop['subtype']}'");
526 return;
527 }
528 default:
529 showError ("Evaluation error, cannot process expression type '${expr['type']}'", __FUNCTION__);
530 break;
531 }
532 }
533
534 function gotClearanceForTagChain ($tagchain)
535 {
536 global $rackCode;
537 $ptable = array();
538 foreach ($rackCode as $sentence)
539 {
540 switch ($sentence['type'])
541 {
542 case 'SYNT_DEFINITION':
543 $ptable[$sentence['term']] = $sentence['definition'];
544 break;
545 case 'SYNT_GRANT':
546 if (eval_expression ($sentence['condition'], $tagchain, $ptable))
547 switch ($sentence['decision'])
548 {
549 case 'allow':
550 return TRUE;
551 case 'deny':
552 return FALSE;
553 default:
554 showError ("Condition match for unknown grant decision '${sentence['decision']}'", __FUNCTION__);
555 break;
556 }
557 break;
558 default:
559 showError ("Can't process sentence of unknown type '${sentence['type']}'", __FUNCTION__);
560 break;
561 }
562 }
563 return FALSE;
564 }
565
566 function getRackCode ($text)
567 {
568 if (!mb_strlen ($text))
569 return array ('result' => 'NAK', 'load' => 'The RackCode text was found empty in ' . __FUNCTION__);
570 $text = str_replace ("\r", '', $text) . "\n";
571 $lex = getLexemsFromRackCode ($text);
572 if ($lex['result'] != 'ACK')
573 return $lex;
574 $synt = getSentencesFromLexems ($lex['load']);
575 if ($synt['result'] != 'ACK')
576 return $synt;
577 // An empty sentence list is semantically valid, yet senseless,
578 // so checking intermediate result once more won't hurt.
579 if (!count ($synt['load']))
580 return array ('result' => 'NAK', 'load' => 'Empty parse tree found in ' . __FUNCTION__);
581 return semanticFilter ($synt['load']);
582 }
583
584 // Return NULL, if the given expression can be evaluated against the given
585 // predicate list. Return the name of the first show stopper otherwise.
586 function firstUnrefPredicate ($plist, $expr)
587 {
588 switch ($expr['type'])
589 {
590 case 'LEX_BOOLCONST':
591 case 'LEX_TAG':
592 return NULL;
593 case 'LEX_PREDICATE':
594 return in_array ($expr['load'], $plist) ? NULL : $expr['load'];
595 case 'SYNT_NOTEXPR':
596 return firstUnrefPredicate ($plist, $expr['load']);
597 case 'SYNT_BOOLOP':
598 if (($tmp = firstUnrefPredicate ($plist, $expr['left'])) !== NULL)
599 return $tmp;
600 if (($tmp = firstUnrefPredicate ($plist, $expr['right'])) !== NULL)
601 return $tmp;
602 return NULL;
603 default:
604 return NULL;
605 }
606 }
607
608 function semanticFilter ($code)
609 {
610 $predicatelist = array();
611 foreach ($code as $sentence)
612 switch ($sentence['type'])
613 {
614 case 'SYNT_DEFINITION':
615 $up = firstUnrefPredicate ($predicatelist, $sentence['definition']);
616 if ($up !== NULL)
617 return array
618 (
619 'result' => 'NAK',
620 'load' => "definition [${sentence['term']}] uses unknown predicate [${up}]"
621 );
622 $predicatelist[] = $sentence['term'];
623 break;
624 case 'SYNT_GRANT':
625 $up = firstUnrefPredicate ($predicatelist, $sentence['condition']);
626 if ($up !== NULL)
627 return array
628 (
629 'result' => 'NAK',
630 'load' => "grant sentence uses unknown predicate [${up}]"
631 );
632 break;
633 default:
634 return array ('result' => 'NAK', 'load' => 'unknown sentence type');
635 }
636 return array ('result' => 'ACK', 'load' => $code);
637 }
638
639 ?>