r1963 + optimized code tree
[racktables] / inc / code.php
1 <?php
2 /*
3 * This file implements lexical scanner and syntax analyzer for the RackCode
4 * access configuration language.
5 *
6 * The language consists of the following lexems:
7 *
8 * LEX_LBRACE
9 * LEX_RBRACE
10 * LEX_DECISION
11 * LEX_DEFINE
12 * LEX_BOOLCONST
13 * LEX_NOT
14 * LEX_TAG
15 * LEX_PREDICATE
16 * LEX_BOOLOP
17 *
18 */
19
20 // Evaluation function table. The functions below process a lexical/syntax node
21 // of given type and return boolean value. SYNT_EXPRESSION is missing from this
22 // list, because the places, where eval_expression() should be called, are all
23 // defined. Each function is passed three arguments: the evaluated item; list
24 // of tags, against which we evaluate the whole code; predicates table.
25 // The latter changes as the top-level processor meets new definitions on its
26 // way down the code list.
27 $evalfunc = array
28 (
29 'LEX_BOOLCONST' => 'eval_boolconst',
30 'SYNT_NOTEXPR' => 'eval_notexpr',
31 'SYNT_BOOLOP' => 'eval_boolop',
32 'LEX_TAG' => 'eval_tag',
33 'LEX_PREDICATE' => 'eval_predicate',
34 );
35
36 // Complain about martian char.
37 function abortLex1 ($state, $text, $pos)
38 {
39 echo "Error! Could not parse the following text (current state is '${state}'): ";
40 echo substr ($text, 0, $pos);
41 echo '<font color = red>-&gt;' . $text{$pos} . '&lt;-</font>';
42 echo substr ($text, $pos + 1);
43 die;
44 }
45
46 // Complain about martian keyword.
47 function abortLex2 ($state, $word)
48 {
49 echo "Error! Could not parse word (current state is '${state}'): '${word}'.";
50 die;
51 }
52
53 // Complain about wrong FSM state.
54 function abortLex3 ($state)
55 {
56 echo "Error! Lexical scanner final state is still '${state}' after scanning the last char.";
57 die;
58 }
59
60 function abortSynt ($lexname)
61 {
62 echo "Error! Unknown lexeme '${lexname}'.";
63 die;
64 }
65
66 // Produce a list of lexems from the given text. Possible lexems are:
67 function getLexemsFromRackCode ($text)
68 {
69 $ret = array();
70 $textlen = strlen ($text);
71 $state = "ESOTSM";
72 for ($i = 0; $i < $textlen; $i++) :
73 $char = $text{$i};
74 $newstate = $state;
75 switch ($state) :
76 case 'ESOTSM':
77 switch (TRUE)
78 {
79 case ($char == '('):
80 $ret[] = array ('type' => 'LEX_LBRACE');
81 break;
82 case ($char == ')'):
83 $ret[] = array ('type' => 'LEX_RBRACE');
84 break;
85 case ($char == '#'):
86 $newstate = 'skipping comment';
87 break;
88 case (preg_match ('/^[a-zA-Z]$/', $char)):
89 $newstate = 'reading word';
90 $buffer = $char;
91 break;
92 case (preg_match ('/^[ \t\n]$/', $char)):
93 // nom-nom...
94 break;
95 case ($char == '{'):
96 $newstate = 'reading tag 1';
97 break;
98 case ($char == '['):
99 $newstate = 'reading predicate 1';
100 break;
101 default:
102 abortLex1 ($state, $text, $i);
103 }
104 break;
105 case 'reading word':
106 switch (TRUE)
107 {
108 case (preg_match ('/^[a-zA-Z]$/', $char)):
109 $buffer .= $char;
110 break;
111 case (preg_match ('/^[ \t\n]$/', $char)):
112 // got a word, sort it out
113 switch ($buffer)
114 {
115 case 'allow':
116 case 'deny':
117 $ret[] = array ('type' => 'LEX_DECISION', 'load' => $buffer);
118 break;
119 case 'define':
120 $ret[] = array ('type' => 'LEX_DEFINE');
121 break;
122 case 'and':
123 case 'or':
124 $ret[] = array ('type' => 'LEX_BOOLOP', 'load' => $buffer);
125 break;
126 case 'not':
127 $ret[] = array ('type' => 'LEX_NOT');
128 break;
129 case 'false':
130 case 'true':
131 $ret[] = array ('type' => 'LEX_BOOLCONST', 'load' => $buffer);
132 break;
133 default:
134 abortLex2 ($state, $buffer);
135 }
136 $newstate = 'ESOTSM';
137 break;
138 default:
139 abortLex1 ($state, $text, $i);
140 }
141 break;
142 case 'reading tag 1':
143 switch (TRUE)
144 {
145 case (preg_match ('/^[ \t\n]$/', $char)):
146 // nom-nom...
147 break;
148 case (preg_match ('/^[a-zA-Z\$]$/', $char)):
149 $buffer = $char;
150 $newstate = 'reading tag 2';
151 break;
152 default:
153 abortLex1 ($state, $text, $i);
154 }
155 break;
156 case 'reading tag 2':
157 switch (TRUE)
158 {
159 case ($char == '}'):
160 $buffer = rtrim ($buffer);
161 if (!preg_match ('/^[a-zA-Z0-9]$/', substr ($buffer, -1)))
162 abortLex1 ($state, $text, $i);
163 $ret[] = array ('type' => 'LEX_TAG', 'load' => $buffer);
164 $newstate = 'ESOTSM';
165 break;
166 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
167 $buffer .= $char;
168 break;
169 default:
170 abortLex1 ($state, $text, $i);
171 }
172 break;
173 case 'reading predicate 1':
174 switch (TRUE)
175 {
176 case (preg_match ('/^[ \t\n]$/', $char)):
177 // nom-nom...
178 break;
179 case (preg_match ('/^[a-zA-Z]$/', $char)):
180 $buffer = $char;
181 $newstate = 'reading predicate 2';
182 break;
183 default:
184 abortLex1 ($state, $text, $i);
185 }
186 break;
187 case 'reading predicate 2':
188 switch (TRUE)
189 {
190 case ($char == ']'):
191 $buffer = rtrim ($buffer);
192 if (!preg_match ('/^[a-zA-Z0-9]$/', substr ($buffer, -1)))
193 abortLex1 ($state, $text, $i);
194 $ret[] = array ('type' => 'LEX_PREDICATE', 'load' => $buffer);
195 $newstate = 'ESOTSM';
196 break;
197 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
198 $buffer .= $char;
199 break;
200 default:
201 abortLex1 ($state, $text, $i);
202 }
203 break;
204 case 'skipping comment':
205 switch ($text{$i})
206 {
207 case "\n":
208 $newstate = 'ESOTSM';
209 default: // eat char, nom-nom...
210 break;
211 }
212 break;
213 default:
214 die (__FUNCTION__ . "(): internal error, state == ${state}");
215 endswitch;
216 $state = $newstate;
217 endfor;
218 if ($state != 'ESOTSM')
219 abortLex3 ($state);
220 return $ret;
221 }
222
223 // Parse the given lexems stream into a list of RackCode sentences. Each such
224 // sentence is a syntax tree, suitable for tag sequence evaluation. The final
225 // parse tree may contain the following nodes:
226 // LEX_TAG
227 // LEX_PREDICATE
228 // LEX_BOOLCONST
229 // SYNT_NOT (1 argument, holding SYNT_EXPR)
230 // SYNT_BOOLOP (2 arguments, each holding SYNT_EXPR)
231 // SYNT_DEFINITION (2 arguments: term and definition)
232 // SYNT_GRANT (2 arguments: decision and condition)
233 // SYNT_CODETEXT (sequence of sentences)
234 //
235 // After parsing the input successfully a list of SYNT_GRANT and SYNT_DEFINITION
236 // trees is returned.
237 function getSentencesFromLexems ($lexems)
238 {
239 $stack = array(); // subject to array_push() and array_pop()
240 $done = 0; // $lexems[$done] is the next item in the tape
241 $todo = count ($lexems);
242
243 // Perform shift-reduce processing. The "accept" actions occurs with an
244 // empty input tape and the stack holding only one symbol (the start
245 // symbol, SYNT_CODETEXT).
246 while (TRUE)
247 {
248 $stacktop = $stacksecondtop = $stackthirdtop = array ('type' => 'null');
249 $stacksize = count ($stack);
250 if ($stacksize >= 1)
251 {
252 $stacktop = array_pop ($stack);
253 // It is possible to run into a S/R conflict, when having a syntaxically
254 // correct sentence base on the stack and some "and {something}" items
255 // on the input tape, hence let's detect this specific case and insist
256 // on "shift" action to make EXPR parsing hungry one.
257 if
258 (
259 $stacktop['type'] == 'SYNT_EXPR' and
260 ($done < $todo) and
261 $lexems[$done]['type'] == 'LEX_BOOLOP'
262 )
263 {
264 // shift!
265 array_push ($stack, $stacktop);
266 array_push ($stack, $lexems[$done++]);
267 continue;
268 }
269 if ($stacksize >= 2)
270 {
271 $stacksecondtop = array_pop ($stack);
272 if ($stacksize >= 3)
273 {
274 $stackthirdtop = array_pop ($stack);
275 array_push ($stack, $stackthirdtop);
276 }
277 array_push ($stack, $stacksecondtop);
278 }
279 array_push ($stack, $stacktop);
280 // First detect definition start to save the predicate from being reduced into
281 // expression.
282 if
283 (
284 $stacktop['type'] == 'LEX_PREDICATE' and
285 $stacksecondtop['type'] == 'LEX_DEFINE'
286 )
287 {
288 array_pop ($stack);
289 array_pop ($stack);
290 array_push
291 (
292 $stack,
293 array
294 (
295 'type' => 'SYNT_DEFINE',
296 'load' => $stacktop['load']
297 )
298 );
299 continue;
300 }
301 // Try "replace" action only on a non-empty stack.
302 // If a handle is found for reversing a production rule, do it and start a new
303 // cycle instead of advancing further on rule list. This will preserve rule priority
304 // in the grammar and keep us from an extra shift action.
305 if
306 (
307 $stacktop['type'] == 'LEX_BOOLCONST' or
308 $stacktop['type'] == 'LEX_TAG' or
309 $stacktop['type'] == 'LEX_PREDICATE'
310 )
311 {
312 // reduce!
313 array_pop ($stack);
314 array_push
315 (
316 $stack,
317 array
318 (
319 'type' => 'SYNT_EXPR',
320 'load' => $stacktop
321 )
322 );
323 continue;
324 }
325 if
326 (
327 $stacktop['type'] == 'SYNT_EXPR' and
328 $stacksecondtop['type'] == 'LEX_NOT'
329 )
330 {
331 // reduce!
332 array_pop ($stack);
333 array_pop ($stack);
334 array_push
335 (
336 $stack,
337 array
338 (
339 'type' => 'SYNT_EXPR',
340 'load' => array
341 (
342 'type' => 'SYNT_NOTEXPR',
343 'load' => $stacktop['load']
344 )
345 )
346 );
347 continue;
348 }
349 if
350 (
351 $stacktop['type'] == 'LEX_RBRACE' and
352 $stacksecondtop['type'] == 'SYNT_EXPR' and
353 $stackthirdtop['type'] == 'LEX_LBRACE'
354 )
355 {
356 // reduce!
357 array_pop ($stack);
358 array_pop ($stack);
359 array_pop ($stack);
360 array_push
361 (
362 $stack,
363 $stacksecondtop
364 );
365 continue;
366 }
367 if
368 (
369 $stacktop['type'] == 'SYNT_EXPR' and
370 $stacksecondtop['type'] == 'LEX_BOOLOP' and
371 $stackthirdtop['type'] == 'SYNT_EXPR'
372 )
373 {
374 // reduce!
375 array_pop ($stack);
376 array_pop ($stack);
377 array_pop ($stack);
378 array_push
379 (
380 $stack,
381 array
382 (
383 'type' => 'SYNT_EXPR',
384 'load' => array
385 (
386 'type' => 'SYNT_BOOLOP',
387 'subtype' => $stacksecondtop['load'],
388 'left' => $stackthirdtop['load'],
389 'right' => $stacktop['load']
390 )
391 )
392 );
393 continue;
394 }
395 if
396 (
397 $stacktop['type'] == 'SYNT_EXPR' and
398 $stacksecondtop['type'] == 'LEX_DECISION'
399 )
400 {
401 // reduce!
402 array_pop ($stack);
403 array_pop ($stack);
404 array_push
405 (
406 $stack,
407 array
408 (
409 'type' => 'SYNT_GRANT',
410 'decision' => $stacksecondtop['load'],
411 'condition' => $stacktop['load']
412 )
413 );
414 continue;
415 }
416 if
417 (
418 $stacktop['type'] == 'SYNT_EXPR' and
419 $stacksecondtop['type'] == 'SYNT_DEFINE'
420 )
421 {
422 // reduce!
423 array_pop ($stack);
424 array_pop ($stack);
425 array_push
426 (
427 $stack,
428 array
429 (
430 'type' => 'SYNT_DEFINITION',
431 'term' => $stacksecondtop['load'],
432 'definition' => $stacktop
433 )
434 );
435 continue;
436 }
437 if
438 (
439 ($stacktop['type'] == 'SYNT_GRANT' or $stacktop['type'] == 'SYNT_DEFINITION') and
440 $stacksecondtop['type'] == 'SYNT_CODETEXT'
441 )
442 {
443 // reduce!
444 array_pop ($stack);
445 array_pop ($stack);
446 $stacksecondtop['load'][] = $stacktop;
447 array_push
448 (
449 $stack,
450 $stacksecondtop
451 );
452 continue;
453 }
454 if
455 (
456 $stacktop['type'] == 'SYNT_GRANT' or
457 $stacktop['type'] == 'SYNT_DEFINITION'
458 )
459 {
460 // reduce!
461 array_pop ($stack);
462 array_push
463 (
464 $stack,
465 array
466 (
467 'type' => 'SYNT_CODETEXT',
468 'load' => array ($stacktop)
469 )
470 );
471 continue;
472 }
473 }
474 // The fact we execute here means, that no reduction or early shift
475 // has been done. The only way to enter another iteration is to "shift"
476 // more, if possible. If the tape is empty, we are facing the
477 // "accept"/"reject" dilemma. The only possible way to "accept" is to
478 // have sole starting nonterminal on the stack (SYNT_CODETEXT).
479 if ($done < $todo)
480 {
481 array_push ($stack, $lexems[$done++]);
482 continue;
483 }
484 // The moment of truth.
485 if (count ($stack) == 1 and $stack[0]['type'] == 'SYNT_CODETEXT')
486 return $stack[0]['load'];
487 return NULL;
488 }
489 }
490
491 // Evaluate a boolean constant.
492 function eval_boolconst ($const, $tagchain, $ptable)
493 {
494 switch ($const['load'])
495 {
496 case 'true':
497 return TRUE;
498 case 'false':
499 return FALSE;
500 default:
501 showError ("Could not parse a boolean constant with value '${const['load']}'");
502 break;
503 }
504 }
505
506 function eval_expression ($expr, $tagchain, $ptable)
507 {
508 }
509
510 function eval_notexpr ($notexpr, $tagchain, $ptable)
511 {
512 return !eval_expression ($notexpr['load']);
513 }
514
515 function eval_boolop ($boolop, $tagchain, $ptable)
516 {
517 $leftresult = eval_expression ($boolop['left']);
518 switch ($boolop['subtype'])
519 {
520 case 'or':
521 if ($leftresult)
522 return TRUE; // early success
523 return eval_expression ($boolop['right']);
524 case 'and':
525 if (!$leftresult)
526 return FALSE; // early failure
527 return eval_expression ($boolop['right']);
528 default:
529 showError ("Cannot evaluate boolean operation '${boolop['subtype']}'");
530 return;
531 }
532 }
533
534 // Return true, if given tag is present on the tag chain.
535 function eval_tag ($tag, $tagchain, $ptable)
536 {
537 foreach ($tagchain as $tagInfo)
538 if ($tag['load'] == $tagInfo['tag'])
539 return TRUE;
540 return FALSE;
541 }
542
543 // Find given predicate in the symbol table and evaluate it.
544 function eval_predicate ($tag, $tagchain, $ptable)
545 {
546 $pname = $predicate['load'];
547 if (!isset ($ptable[$pname]))
548 {
549 showError ("Predicate '${pname}' is referenced before declaration");
550 return;
551 }
552 return eval_expression ($ptable[$pname]);
553 }
554
555 function gotClearanceForTagChain ($code, $tagchain)
556 {
557 $ptable = array();
558 foreach ($code as $sentence)
559 {
560 switch ($sentence['type'])
561 {
562 case 'SYNT_DEFINITION':
563 $ptable[$sentence['term']] = $sentence['definition'];
564 break;
565 case 'SYNT_GRANT':
566 if (eval_expression ($sentence['condition'], $tagchain, $ptable))
567 switch ($sentence['decision'])
568 {
569 case 'allow':
570 return TRUE;
571 case 'deny':
572 return FALSE;
573 default:
574 showError ("Condition match for unknown grant decision '${sentence['decision']}'");
575 break;
576 }
577 default:
578 showError ("Can't process sentence of unknown type '${sentence['type']}'");
579 break;
580 }
581 }
582 return FALSE;
583 }
584
585 ?>