r1962 + update comments
[racktables] / inc / code.php
1 <?php
2 /*
3 * This file implement lexical scanner and syntax analyzer for the RackCode
4 * access configuration language.
5 *
6 * The language consists of the following lexems:
7 *
8 * LEX_LBRACE
9 * LEX_RBRACE
10 * LEX_DECISION
11 * LEX_DEFINE
12 * LEX_BOOLCONST
13 * LEX_NOT
14 * LEX_TAG
15 * LEX_PREDICATE
16 * LEX_BOOLOP
17 *
18 * Comments last from the first # to the end of the line and are filtered out
19 * automatically.
20 */
21
22 // Complain about martian char.
23 function abortLex1 ($state, $text, $pos)
24 {
25 echo "Error! Could not parse the following text (current state is '${state}'): ";
26 echo substr ($text, 0, $pos);
27 echo '<font color = red>-&gt;' . $text{$pos} . '&lt;-</font>';
28 echo substr ($text, $pos + 1);
29 die;
30 }
31
32 // Complain about martian keyword.
33 function abortLex2 ($state, $word)
34 {
35 echo "Error! Could not parse word (current state is '${state}'): '${word}'.";
36 die;
37 }
38
39 // Complain about wrong FSM state.
40 function abortLex3 ($state)
41 {
42 echo "Error! Lexical scanner final state is still '${state}' after scanning the last char.";
43 die;
44 }
45
46 function abortSynt ($lexname)
47 {
48 echo "Error! Unknown lexeme '${lexname}'.";
49 die;
50 }
51
52 // Produce a list of lexems from the given text. Possible lexems are:
53 function getLexemsFromRackCode ($text)
54 {
55 $ret = array();
56 $textlen = strlen ($text);
57 $state = "ESOTSM";
58 for ($i = 0; $i < $textlen; $i++) :
59 $char = $text{$i};
60 $newstate = $state;
61 switch ($state) :
62 case 'ESOTSM':
63 switch (TRUE)
64 {
65 case ($char == '('):
66 $ret[] = array ('type' => 'LEX_LBRACE');
67 break;
68 case ($char == ')'):
69 $ret[] = array ('type' => 'LEX_RBRACE');
70 break;
71 case ($char == '#'):
72 $newstate = 'skipping comment';
73 break;
74 case (preg_match ('/^[a-zA-Z]$/', $char)):
75 $newstate = 'reading word';
76 $buffer = $char;
77 break;
78 case (preg_match ('/^[ \t\n]$/', $char)):
79 // nom-nom...
80 break;
81 case ($char == '{'):
82 $newstate = 'reading tag 1';
83 break;
84 case ($char == '['):
85 $newstate = 'reading predicate 1';
86 break;
87 default:
88 abortLex1 ($state, $text, $i);
89 }
90 break;
91 case 'reading word':
92 switch (TRUE)
93 {
94 case (preg_match ('/^[a-zA-Z]$/', $char)):
95 $buffer .= $char;
96 break;
97 case (preg_match ('/^[ \t\n]$/', $char)):
98 // got a word, sort it out
99 switch ($buffer)
100 {
101 case 'allow':
102 case 'deny':
103 $ret[] = array ('type' => 'LEX_DECISION', 'load' => $buffer);
104 break;
105 case 'define':
106 $ret[] = array ('type' => 'LEX_DEFINE');
107 break;
108 case 'and':
109 case 'or':
110 $ret[] = array ('type' => 'LEX_BOOLOP', 'load' => $buffer);
111 break;
112 case 'not':
113 $ret[] = array ('type' => 'LEX_NOT');
114 break;
115 case 'false':
116 case 'true':
117 $ret[] = array ('type' => 'LEX_BOOLCONST', 'load' => $buffer);
118 break;
119 default:
120 abortLex2 ($state, $buffer);
121 }
122 $newstate = 'ESOTSM';
123 break;
124 default:
125 abortLex1 ($state, $text, $i);
126 }
127 break;
128 case 'reading tag 1':
129 switch (TRUE)
130 {
131 case (preg_match ('/^[ \t\n]$/', $char)):
132 // nom-nom...
133 break;
134 case (preg_match ('/^[a-zA-Z\$]$/', $char)):
135 $buffer = $char;
136 $newstate = 'reading tag 2';
137 break;
138 default:
139 abortLex1 ($state, $text, $i);
140 }
141 break;
142 case 'reading tag 2':
143 switch (TRUE)
144 {
145 case ($char == '}'):
146 $buffer = rtrim ($buffer);
147 if (!preg_match ('/^[a-zA-Z0-9]$/', substr ($buffer, -1)))
148 abortLex1 ($state, $text, $i);
149 $ret[] = array ('type' => 'LEX_TAG', 'load' => $buffer);
150 $newstate = 'ESOTSM';
151 break;
152 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
153 $buffer .= $char;
154 break;
155 default:
156 abortLex1 ($state, $text, $i);
157 }
158 break;
159 case 'reading predicate 1':
160 switch (TRUE)
161 {
162 case (preg_match ('/^[ \t\n]$/', $char)):
163 // nom-nom...
164 break;
165 case (preg_match ('/^[a-zA-Z]$/', $char)):
166 $buffer = $char;
167 $newstate = 'reading predicate 2';
168 break;
169 default:
170 abortLex1 ($state, $text, $i);
171 }
172 break;
173 case 'reading predicate 2':
174 switch (TRUE)
175 {
176 case ($char == ']'):
177 $buffer = rtrim ($buffer);
178 if (!preg_match ('/^[a-zA-Z0-9]$/', substr ($buffer, -1)))
179 abortLex1 ($state, $text, $i);
180 $ret[] = array ('type' => 'LEX_PREDICATE', 'load' => $buffer);
181 $newstate = 'ESOTSM';
182 break;
183 case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)):
184 $buffer .= $char;
185 break;
186 default:
187 abortLex1 ($state, $text, $i);
188 }
189 break;
190 case 'skipping comment':
191 switch ($text{$i})
192 {
193 case "\n":
194 $newstate = 'ESOTSM';
195 default: // eat char, nom-nom...
196 break;
197 }
198 break;
199 default:
200 die (__FUNCTION__ . "(): internal error, state == ${state}");
201 endswitch;
202 $state = $newstate;
203 endfor;
204 if ($state != 'ESOTSM')
205 abortLex3 ($state);
206 return $ret;
207 }
208
209 function syntReduce_BOOLCONST (&$stack)
210 {
211 if (count ($stack) < 1)
212 return FALSE;
213 $top = array_pop ($stack);
214 if ($top['type'] == 'LEX_BOOLCONST')
215 {
216 $s = array
217 (
218 'type' => 'SYNT_EXPRESSION',
219 'kids' => array ($top)
220 );
221 array_push ($stack, $s);
222 return TRUE;
223 }
224 // No luck, push it back.
225 array_push ($stack, $top);
226 return FALSE;
227 }
228
229 // Parse the given lexems stream into a list of RackCode sentences. Each such
230 // sentence is a syntax tree, suitable for tag sequence evaluation. It will
231 // contain all of the input lexems framed into a parse tree built from the
232 // following nodes:
233 // SYNT_NOT (1 argument, holding SYNT_EXPR)
234 // SYNT_BOOLOP (2 arguments, each holding SYNT_EXPR)
235 // SYNT_EXPR (1 argument, different types)
236 // SYNT_DEFINE (keyword with 1 term)
237 // SYNT_DEFINITION (2 arguments: term and definition)
238 // SYNT_GRANT (2 arguments: decision and condition)
239 // SYNT_CODESENTENCE (either a grant or a definition)
240 // SYNT_CODETEXT (sequence of sentences)
241 function getSentencesFromLexems ($lexems)
242 {
243 $stack = array(); // subject to array_push() and array_pop()
244 $done = 0; // $lexems[$done] is the next item in the tape
245 $todo = count ($lexems);
246
247 // Perform shift-reduce processing. The "accept" actions occurs with an
248 // empty input tape and the stack holding only one symbol (the start
249 // symbol, SYNT_CODETEXT).
250 while (TRUE)
251 {
252 $stacktop = $stacksecondtop = $stackthirdtop = array ('type' => 'null');
253 $stacksize = count ($stack);
254 if ($stacksize >= 1)
255 {
256 $stacktop = array_pop ($stack);
257 // It is possible to run into a S/R conflict, when having a syntaxically
258 // correct sentence base on the stack and some "and {something}" items
259 // on the input tape, hence let's detect this specific case and insist
260 // on "shift" action to make EXPR parsing hungry one.
261 if
262 (
263 $stacktop['type'] == 'SYNT_EXPR' and
264 ($done < $todo) and
265 $lexems[$done]['type'] == 'LEX_BOOLOP'
266 )
267 {
268 // shift!
269 array_push ($stack, $stacktop);
270 array_push ($stack, $lexems[$done++]);
271 continue;
272 }
273 if ($stacksize >= 2)
274 {
275 $stacksecondtop = array_pop ($stack);
276 if ($stacksize >= 3)
277 {
278 $stackthirdtop = array_pop ($stack);
279 array_push ($stack, $stackthirdtop);
280 }
281 array_push ($stack, $stacksecondtop);
282 }
283 array_push ($stack, $stacktop);
284 // First detect definition start to save the predicate from being reduced into
285 // expression.
286 if
287 (
288 $stacktop['type'] == 'LEX_PREDICATE' and
289 $stacksecondtop['type'] == 'LEX_DEFINE'
290 )
291 {
292 array_pop ($stack);
293 array_pop ($stack);
294 array_push
295 (
296 $stack,
297 array
298 (
299 'type' => 'SYNT_DEFINE',
300 'load' => $stacktop['load']
301 )
302 );
303 continue;
304 }
305 // Try "replace" action only on a non-empty stack.
306 // If a handle is found for reversing a production rule, do it and start a new
307 // cycle instead of advancing further on rule list. This will preserve rule priority
308 // in the grammar and keep us from an extra shift action.
309 if
310 (
311 $stacktop['type'] == 'LEX_BOOLCONST' or
312 $stacktop['type'] == 'LEX_TAG' or
313 $stacktop['type'] == 'LEX_PREDICATE'
314 )
315 {
316 // reduce!
317 array_pop ($stack);
318 array_push
319 (
320 $stack,
321 array
322 (
323 'type' => 'SYNT_EXPR',
324 'load' => $stacktop
325 )
326 );
327 continue;
328 }
329 if
330 (
331 $stacktop['type'] == 'SYNT_EXPR' and
332 $stacksecondtop['type'] == 'LEX_NOT'
333 )
334 {
335 // reduce!
336 array_pop ($stack);
337 array_pop ($stack);
338 array_push
339 (
340 $stack,
341 array
342 (
343 'type' => 'SYNT_EXPR',
344 'load' => array
345 (
346 'type' => 'SYNT_NOTEXPR',
347 'arg' => $stacktop
348 )
349 )
350 );
351 continue;
352 }
353 if
354 (
355 $stacktop['type'] == 'LEX_RBRACE' and
356 $stacksecondtop['type'] == 'SYNT_EXPR' and
357 $stackthirdtop['type'] == 'LEX_LBRACE'
358 )
359 {
360 // reduce!
361 array_pop ($stack);
362 array_pop ($stack);
363 array_pop ($stack);
364 array_push
365 (
366 $stack,
367 $stacksecondtop
368 );
369 continue;
370 }
371 if
372 (
373 $stacktop['type'] == 'SYNT_EXPR' and
374 $stacksecondtop['type'] == 'LEX_BOOLOP' and
375 $stackthirdtop['type'] == 'SYNT_EXPR'
376 )
377 {
378 // reduce!
379 array_pop ($stack);
380 array_pop ($stack);
381 array_pop ($stack);
382 array_push
383 (
384 $stack,
385 array
386 (
387 'type' => 'SYNT_EXPR',
388 'load' => array
389 (
390 'type' => 'SYNT_BOOLOP',
391 'subtype' => $stacksecondtop['load'],
392 'left' => $stackthirdtop,
393 'right' => $stacktop
394 )
395 )
396 );
397 continue;
398 }
399 if
400 (
401 $stacktop['type'] == 'SYNT_EXPR' and
402 $stacksecondtop['type'] == 'LEX_DECISION'
403 )
404 {
405 // reduce!
406 array_pop ($stack);
407 array_pop ($stack);
408 array_push
409 (
410 $stack,
411 array
412 (
413 'type' => 'SYNT_GRANT',
414 'decision' => $stacksecondtop['load'],
415 'condition' => $stacktop
416 )
417 );
418 continue;
419 }
420 if
421 (
422 $stacktop['type'] == 'SYNT_EXPR' and
423 $stacksecondtop['type'] == 'SYNT_DEFINE'
424 )
425 {
426 // reduce!
427 array_pop ($stack);
428 array_pop ($stack);
429 array_push
430 (
431 $stack,
432 array
433 (
434 'type' => 'SYNT_DEFINITION',
435 'term' => $stacksecondtop,
436 'definition' => $stacktop
437 )
438 );
439 continue;
440 }
441 if
442 (
443 $stacktop['type'] == 'SYNT_GRANT' or
444 $stacktop['type'] == 'SYNT_DEFINITION'
445 )
446 {
447 // reduce!
448 array_pop ($stack);
449 array_push
450 (
451 $stack,
452 array
453 (
454 'type' => 'SYNT_CODESENTENCE',
455 'load' => $stacktop
456 )
457 );
458 continue;
459 }
460 if
461 (
462 $stacktop['type'] == 'SYNT_CODESENTENCE' and
463 $stacksecondtop['type'] == 'SYNT_CODETEXT'
464 )
465 {
466 // reduce!
467 array_pop ($stack);
468 array_pop ($stack);
469 $stacksecondtop['load'][] = $stacktop;
470 array_push
471 (
472 $stack,
473 $stacksecondtop
474 );
475 continue;
476 }
477 if
478 (
479 $stacktop['type'] == 'SYNT_CODESENTENCE'
480 )
481 {
482 // reduce!
483 array_pop ($stack);
484 $stacksecondtop['log'][] = $stacktop;
485 array_push
486 (
487 $stack,
488 array
489 (
490 'type' => 'SYNT_CODETEXT',
491 'load' => array ($stacktop)
492 )
493 );
494 continue;
495 }
496 }
497 // The fact we execute here means, that no reduction or early shift
498 // has been done. The only way to enter another iteration is to "shift"
499 // more, if possible. If the tape is empty, we are facing the
500 // "accept"/"reject" dilemma. The only possible way to "accept" is to
501 // have sole starting nonterminal on the stack (SYNT_CODETEXT).
502 if ($done < $todo)
503 {
504 array_push ($stack, $lexems[$done++]);
505 continue;
506 }
507 // The moment of truth.
508 if (count ($stack) == 1 and $stack[0]['type'] == 'SYNT_CODETEXT')
509 return $stack[0];
510 return NULL;
511 }
512 }
513
514 ?>