Commit | Line | Data |
---|---|---|
f63072f5 | 1 | <?php |
a1b88eca DO |
2 | /* |
3 | * This file implement lexical scanner and syntax analyzer for the RackCode | |
4 | * access configuration language. | |
5 | * | |
6 | * The language consists of the following lexems: | |
7 | * | |
8 | * LEX_LBRACE | |
9 | * LEX_RBRACE | |
10 | * LEX_DECISION | |
11 | * LEX_DEFINE | |
12 | * LEX_BOOLCONST | |
13 | * LEX_NOT | |
14 | * LEX_TAG | |
15 | * LEX_PREDICATE | |
16 | * LEX_BOOLOP | |
17 | * | |
18 | * Comments last from the first # to the end of the line and are filtered out | |
19 | * automatically. | |
20 | */ | |
f63072f5 DO |
21 | |
22 | // Complain about martian char. | |
23 | function abortLex1 ($state, $text, $pos) | |
24 | { | |
25 | echo "Error! Could not parse the following text (current state is '${state}'): "; | |
26 | echo substr ($text, 0, $pos); | |
27 | echo '<font color = red>->' . $text{$pos} . '<-</font>'; | |
28 | echo substr ($text, $pos + 1); | |
29 | die; | |
30 | } | |
31 | ||
32 | // Complain about martian keyword. | |
33 | function abortLex2 ($state, $word) | |
34 | { | |
a1b88eca | 35 | echo "Error! Could not parse word (current state is '${state}'): '${word}'."; |
f63072f5 DO |
36 | die; |
37 | } | |
38 | ||
a1b88eca DO |
39 | // Complain about wrong FSM state. |
40 | function abortLex3 ($state) | |
41 | { | |
42 | echo "Error! Lexical scanner final state is still '${state}' after scanning the last char."; | |
43 | die; | |
44 | } | |
45 | ||
46 | function abortSynt ($lexname) | |
47 | { | |
48 | echo "Error! Unknown lexeme '${lexname}'."; | |
49 | die; | |
50 | } | |
51 | ||
52 | // Produce a list of lexems from the given text. Possible lexems are: | |
53 | function getLexemsFromRackCode ($text) | |
f63072f5 DO |
54 | { |
55 | $ret = array(); | |
56 | $textlen = strlen ($text); | |
57 | $state = "ESOTSM"; | |
58 | for ($i = 0; $i < $textlen; $i++) : | |
59 | $char = $text{$i}; | |
60 | $newstate = $state; | |
61 | switch ($state) : | |
62 | case 'ESOTSM': | |
63 | switch (TRUE) | |
64 | { | |
f63072f5 | 65 | case ($char == '('): |
a1b88eca | 66 | $ret[] = array ('type' => 'LEX_LBRACE'); |
f63072f5 DO |
67 | break; |
68 | case ($char == ')'): | |
a1b88eca | 69 | $ret[] = array ('type' => 'LEX_RBRACE'); |
f63072f5 DO |
70 | break; |
71 | case ($char == '#'): | |
72 | $newstate = 'skipping comment'; | |
73 | break; | |
74 | case (preg_match ('/^[a-zA-Z]$/', $char)): | |
75 | $newstate = 'reading word'; | |
76 | $buffer = $char; | |
77 | break; | |
78 | case (preg_match ('/^[ \t\n]$/', $char)): | |
79 | // nom-nom... | |
80 | break; | |
81 | case ($char == '{'): | |
82 | $newstate = 'reading tag 1'; | |
83 | break; | |
84 | case ($char == '['): | |
85 | $newstate = 'reading predicate 1'; | |
86 | break; | |
87 | default: | |
88 | abortLex1 ($state, $text, $i); | |
89 | } | |
90 | break; | |
91 | case 'reading word': | |
92 | switch (TRUE) | |
93 | { | |
94 | case (preg_match ('/^[a-zA-Z]$/', $char)): | |
95 | $buffer .= $char; | |
96 | break; | |
97 | case (preg_match ('/^[ \t\n]$/', $char)): | |
98 | // got a word, sort it out | |
99 | switch ($buffer) | |
100 | { | |
101 | case 'allow': | |
f63072f5 | 102 | case 'deny': |
a1b88eca | 103 | $ret[] = array ('type' => 'LEX_DECISION', 'load' => $buffer); |
f63072f5 DO |
104 | break; |
105 | case 'define': | |
a1b88eca | 106 | $ret[] = array ('type' => 'LEX_DEFINE'); |
f63072f5 DO |
107 | break; |
108 | case 'and': | |
f63072f5 | 109 | case 'or': |
a1b88eca DO |
110 | $ret[] = array ('type' => 'LEX_BOOLOP', 'load' => $buffer); |
111 | break; | |
112 | case 'not': | |
113 | $ret[] = array ('type' => 'LEX_NOT'); | |
114 | break; | |
115 | case 'false': | |
116 | case 'true': | |
117 | $ret[] = array ('type' => 'LEX_BOOLCONST', 'load' => $buffer); | |
f63072f5 DO |
118 | break; |
119 | default: | |
120 | abortLex2 ($state, $buffer); | |
121 | } | |
122 | $newstate = 'ESOTSM'; | |
123 | break; | |
124 | default: | |
125 | abortLex1 ($state, $text, $i); | |
126 | } | |
127 | break; | |
128 | case 'reading tag 1': | |
129 | switch (TRUE) | |
130 | { | |
131 | case (preg_match ('/^[ \t\n]$/', $char)): | |
132 | // nom-nom... | |
133 | break; | |
134 | case (preg_match ('/^[a-zA-Z\$]$/', $char)): | |
135 | $buffer = $char; | |
136 | $newstate = 'reading tag 2'; | |
137 | break; | |
138 | default: | |
139 | abortLex1 ($state, $text, $i); | |
140 | } | |
141 | break; | |
142 | case 'reading tag 2': | |
143 | switch (TRUE) | |
144 | { | |
145 | case ($char == '}'): | |
e5bd52e5 DO |
146 | $buffer = rtrim ($buffer); |
147 | if (!preg_match ('/^[a-zA-Z0-9]$/', substr ($buffer, -1))) | |
148 | abortLex1 ($state, $text, $i); | |
149 | $ret[] = array ('type' => 'LEX_TAG', 'load' => $buffer); | |
f63072f5 DO |
150 | $newstate = 'ESOTSM'; |
151 | break; | |
152 | case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)): | |
153 | $buffer .= $char; | |
154 | break; | |
155 | default: | |
156 | abortLex1 ($state, $text, $i); | |
157 | } | |
158 | break; | |
159 | case 'reading predicate 1': | |
160 | switch (TRUE) | |
161 | { | |
162 | case (preg_match ('/^[ \t\n]$/', $char)): | |
163 | // nom-nom... | |
164 | break; | |
165 | case (preg_match ('/^[a-zA-Z]$/', $char)): | |
166 | $buffer = $char; | |
167 | $newstate = 'reading predicate 2'; | |
168 | break; | |
169 | default: | |
170 | abortLex1 ($state, $text, $i); | |
171 | } | |
172 | break; | |
173 | case 'reading predicate 2': | |
174 | switch (TRUE) | |
175 | { | |
176 | case ($char == ']'): | |
e5bd52e5 DO |
177 | $buffer = rtrim ($buffer); |
178 | if (!preg_match ('/^[a-zA-Z0-9]$/', substr ($buffer, -1))) | |
179 | abortLex1 ($state, $text, $i); | |
180 | $ret[] = array ('type' => 'LEX_PREDICATE', 'load' => $buffer); | |
f63072f5 DO |
181 | $newstate = 'ESOTSM'; |
182 | break; | |
183 | case (preg_match ('/^[a-zA-Z0-9 _-]$/', $char)): | |
184 | $buffer .= $char; | |
185 | break; | |
186 | default: | |
187 | abortLex1 ($state, $text, $i); | |
188 | } | |
189 | break; | |
190 | case 'skipping comment': | |
191 | switch ($text{$i}) | |
192 | { | |
193 | case "\n": | |
194 | $newstate = 'ESOTSM'; | |
195 | default: // eat char, nom-nom... | |
196 | break; | |
197 | } | |
198 | break; | |
199 | default: | |
200 | die (__FUNCTION__ . "(): internal error, state == ${state}"); | |
201 | endswitch; | |
202 | $state = $newstate; | |
203 | endfor; | |
a1b88eca DO |
204 | if ($state != 'ESOTSM') |
205 | abortLex3 ($state); | |
f63072f5 DO |
206 | return $ret; |
207 | } | |
208 | ||
a1b88eca DO |
209 | function syntReduce_BOOLCONST (&$stack) |
210 | { | |
211 | if (count ($stack) < 1) | |
212 | return FALSE; | |
213 | $top = array_pop ($stack); | |
214 | if ($top['type'] == 'LEX_BOOLCONST') | |
215 | { | |
216 | $s = array | |
217 | ( | |
218 | 'type' => 'SYNT_EXPRESSION', | |
219 | 'kids' => array ($top) | |
220 | ); | |
221 | array_push ($stack, $s); | |
222 | return TRUE; | |
223 | } | |
224 | // No luck, push it back. | |
225 | array_push ($stack, $top); | |
226 | return FALSE; | |
227 | } | |
228 | ||
229 | // Parse the given lexems stream into a list of RackCode sentences. Each such | |
1381b655 DO |
230 | // sentence is a syntax tree, suitable for tag sequence evaluation. It will |
231 | // contain all of the input lexems framed into a parse tree built from the | |
232 | // following nodes: | |
e5bd52e5 DO |
233 | // SYNT_NOT (1 argument, holding SYNT_EXPR) |
234 | // SYNT_BOOLOP (2 arguments, each holding SYNT_EXPR) | |
1381b655 | 235 | // SYNT_EXPR (1 argument, different types) |
e5bd52e5 DO |
236 | // SYNT_DEFINE (keyword with 1 term) |
237 | // SYNT_DEFINITION (2 arguments: term and definition) | |
1381b655 DO |
238 | // SYNT_GRANT (2 arguments: decision and condition) |
239 | // SYNT_CODESENTENCE (either a grant or a definition) | |
240 | // SYNT_CODETEXT (sequence of sentences) | |
a1b88eca DO |
241 | function getSentencesFromLexems ($lexems) |
242 | { | |
a1b88eca DO |
243 | $stack = array(); // subject to array_push() and array_pop() |
244 | $done = 0; // $lexems[$done] is the next item in the tape | |
245 | $todo = count ($lexems); | |
246 | ||
1381b655 DO |
247 | // Perform shift-reduce processing. The "accept" actions occurs with an |
248 | // empty input tape and the stack holding only one symbol (the start | |
249 | // symbol, SYNT_CODETEXT). | |
250 | while (TRUE) | |
a1b88eca DO |
251 | { |
252 | $stacktop = $stacksecondtop = $stackthirdtop = array ('type' => 'null'); | |
253 | $stacksize = count ($stack); | |
254 | if ($stacksize >= 1) | |
255 | { | |
256 | $stacktop = array_pop ($stack); | |
257 | // It is possible to run into a S/R conflict, when having a syntaxically | |
258 | // correct sentence base on the stack and some "and {something}" items | |
259 | // on the input tape, hence let's detect this specific case and insist | |
260 | // on "shift" action to make EXPR parsing hungry one. | |
1381b655 DO |
261 | if |
262 | ( | |
263 | $stacktop['type'] == 'SYNT_EXPR' and | |
264 | ($done < $todo) and | |
265 | $lexems[$done]['type'] == 'LEX_BOOLOP' | |
266 | ) | |
a1b88eca DO |
267 | { |
268 | // shift! | |
269 | array_push ($stack, $stacktop); | |
270 | array_push ($stack, $lexems[$done++]); | |
271 | continue; | |
272 | } | |
273 | if ($stacksize >= 2) | |
274 | { | |
275 | $stacksecondtop = array_pop ($stack); | |
276 | if ($stacksize >= 3) | |
277 | { | |
278 | $stackthirdtop = array_pop ($stack); | |
279 | array_push ($stack, $stackthirdtop); | |
280 | } | |
281 | array_push ($stack, $stacksecondtop); | |
282 | } | |
283 | array_push ($stack, $stacktop); | |
1381b655 DO |
284 | // First detect definition start to save the predicate from being reduced into |
285 | // expression. | |
286 | if | |
287 | ( | |
288 | $stacktop['type'] == 'LEX_PREDICATE' and | |
289 | $stacksecondtop['type'] == 'LEX_DEFINE' | |
290 | ) | |
291 | { | |
292 | array_pop ($stack); | |
293 | array_pop ($stack); | |
294 | array_push | |
295 | ( | |
296 | $stack, | |
297 | array | |
298 | ( | |
299 | 'type' => 'SYNT_DEFINE', | |
300 | 'load' => $stacktop['load'] | |
301 | ) | |
302 | ); | |
303 | continue; | |
304 | } | |
a1b88eca DO |
305 | // Try "replace" action only on a non-empty stack. |
306 | // If a handle is found for reversing a production rule, do it and start a new | |
307 | // cycle instead of advancing further on rule list. This will preserve rule priority | |
308 | // in the grammar and keep us from an extra shift action. | |
309 | if | |
310 | ( | |
311 | $stacktop['type'] == 'LEX_BOOLCONST' or | |
312 | $stacktop['type'] == 'LEX_TAG' or | |
313 | $stacktop['type'] == 'LEX_PREDICATE' | |
314 | ) | |
315 | { | |
1381b655 | 316 | // reduce! |
a1b88eca DO |
317 | array_pop ($stack); |
318 | array_push | |
319 | ( | |
320 | $stack, | |
321 | array | |
322 | ( | |
323 | 'type' => 'SYNT_EXPR', | |
324 | 'load' => $stacktop | |
325 | ) | |
326 | ); | |
327 | continue; | |
328 | } | |
329 | if | |
330 | ( | |
331 | $stacktop['type'] == 'SYNT_EXPR' and | |
332 | $stacksecondtop['type'] == 'LEX_NOT' | |
333 | ) | |
334 | { | |
1381b655 | 335 | // reduce! |
a1b88eca DO |
336 | array_pop ($stack); |
337 | array_pop ($stack); | |
338 | array_push | |
339 | ( | |
340 | $stack, | |
341 | array | |
342 | ( | |
343 | 'type' => 'SYNT_EXPR', | |
344 | 'load' => array | |
345 | ( | |
346 | 'type' => 'SYNT_NOTEXPR', | |
347 | 'arg' => $stacktop | |
348 | ) | |
349 | ) | |
350 | ); | |
351 | continue; | |
352 | } | |
353 | if | |
354 | ( | |
355 | $stacktop['type'] == 'LEX_RBRACE' and | |
356 | $stacksecondtop['type'] == 'SYNT_EXPR' and | |
357 | $stackthirdtop['type'] == 'LEX_LBRACE' | |
358 | ) | |
359 | { | |
1381b655 | 360 | // reduce! |
a1b88eca DO |
361 | array_pop ($stack); |
362 | array_pop ($stack); | |
363 | array_pop ($stack); | |
364 | array_push | |
365 | ( | |
366 | $stack, | |
367 | $stacksecondtop | |
368 | ); | |
369 | continue; | |
370 | } | |
371 | if | |
372 | ( | |
373 | $stacktop['type'] == 'SYNT_EXPR' and | |
374 | $stacksecondtop['type'] == 'LEX_BOOLOP' and | |
375 | $stackthirdtop['type'] == 'SYNT_EXPR' | |
376 | ) | |
377 | { | |
1381b655 | 378 | // reduce! |
a1b88eca DO |
379 | array_pop ($stack); |
380 | array_pop ($stack); | |
381 | array_pop ($stack); | |
382 | array_push | |
383 | ( | |
384 | $stack, | |
385 | array | |
386 | ( | |
387 | 'type' => 'SYNT_EXPR', | |
388 | 'load' => array | |
389 | ( | |
390 | 'type' => 'SYNT_BOOLOP', | |
391 | 'subtype' => $stacksecondtop['load'], | |
392 | 'left' => $stackthirdtop, | |
393 | 'right' => $stacktop | |
394 | ) | |
395 | ) | |
396 | ); | |
397 | continue; | |
398 | } | |
1381b655 DO |
399 | if |
400 | ( | |
401 | $stacktop['type'] == 'SYNT_EXPR' and | |
402 | $stacksecondtop['type'] == 'LEX_DECISION' | |
403 | ) | |
404 | { | |
405 | // reduce! | |
406 | array_pop ($stack); | |
407 | array_pop ($stack); | |
408 | array_push | |
409 | ( | |
410 | $stack, | |
411 | array | |
412 | ( | |
413 | 'type' => 'SYNT_GRANT', | |
414 | 'decision' => $stacksecondtop['load'], | |
415 | 'condition' => $stacktop | |
416 | ) | |
417 | ); | |
418 | continue; | |
419 | } | |
420 | if | |
421 | ( | |
422 | $stacktop['type'] == 'SYNT_EXPR' and | |
423 | $stacksecondtop['type'] == 'SYNT_DEFINE' | |
424 | ) | |
425 | { | |
426 | // reduce! | |
427 | array_pop ($stack); | |
428 | array_pop ($stack); | |
429 | array_push | |
430 | ( | |
431 | $stack, | |
432 | array | |
433 | ( | |
434 | 'type' => 'SYNT_DEFINITION', | |
435 | 'term' => $stacksecondtop, | |
436 | 'definition' => $stacktop | |
437 | ) | |
438 | ); | |
439 | continue; | |
440 | } | |
441 | if | |
442 | ( | |
443 | $stacktop['type'] == 'SYNT_GRANT' or | |
444 | $stacktop['type'] == 'SYNT_DEFINITION' | |
445 | ) | |
446 | { | |
447 | // reduce! | |
448 | array_pop ($stack); | |
449 | array_push | |
450 | ( | |
451 | $stack, | |
452 | array | |
453 | ( | |
454 | 'type' => 'SYNT_CODESENTENCE', | |
455 | 'load' => $stacktop | |
456 | ) | |
457 | ); | |
458 | continue; | |
459 | } | |
460 | if | |
461 | ( | |
462 | $stacktop['type'] == 'SYNT_CODESENTENCE' and | |
463 | $stacksecondtop['type'] == 'SYNT_CODETEXT' | |
464 | ) | |
465 | { | |
466 | // reduce! | |
467 | array_pop ($stack); | |
468 | array_pop ($stack); | |
469 | $stacksecondtop['load'][] = $stacktop; | |
470 | array_push | |
471 | ( | |
472 | $stack, | |
473 | $stacksecondtop | |
474 | ); | |
475 | continue; | |
476 | } | |
477 | if | |
478 | ( | |
479 | $stacktop['type'] == 'SYNT_CODESENTENCE' | |
480 | ) | |
481 | { | |
482 | // reduce! | |
483 | array_pop ($stack); | |
484 | $stacksecondtop['log'][] = $stacktop; | |
485 | array_push | |
486 | ( | |
487 | $stack, | |
488 | array | |
489 | ( | |
490 | 'type' => 'SYNT_CODETEXT', | |
491 | 'load' => array ($stacktop) | |
492 | ) | |
493 | ); | |
494 | continue; | |
495 | } | |
496 | } | |
497 | // The fact we execute here means, that no reduction or early shift | |
498 | // has been done. The only way to enter another iteration is to "shift" | |
499 | // more, if possible. If the tape is empty, we are facing the | |
500 | // "accept"/"reject" dilemma. The only possible way to "accept" is to | |
501 | // have sole starting nonterminal on the stack (SYNT_CODETEXT). | |
502 | if ($done < $todo) | |
503 | { | |
504 | array_push ($stack, $lexems[$done++]); | |
505 | continue; | |
a1b88eca | 506 | } |
1381b655 DO |
507 | // The moment of truth. |
508 | if (count ($stack) == 1 and $stack[0]['type'] == 'SYNT_CODETEXT') | |
509 | return $stack[0]; | |
510 | return NULL; | |
a1b88eca | 511 | } |
a1b88eca DO |
512 | } |
513 | ||
f63072f5 | 514 | ?> |