Hoa central
Parser.php
Go to the documentation of this file.
1 <?php
2 
37 namespace Hoa\Compiler\Llk;
38 
39 use Hoa\Compiler;
40 
49 class Parser
50 {
56  protected $_skip = null;
57 
64  protected $_tokens = null;
65 
71  protected $_rules = null;
72 
78  protected $_currentState = 0;
79 
85  protected $_errorState = 0;
86 
92  protected $_tokenSequence = [];
93 
99  protected $_trace = [];
100 
106  protected $_todo = null;
107 
113  protected $_tree = null;
114 
120  protected $_depth = -1;
121 
122 
123 
131  public function __construct(Array $tokens = [], Array $rules = [])
132  {
133  $this->_tokens = $tokens;
134  $this->_rules = $rules;
135 
136  return;
137  }
138 
148  public function parse($text, $rule = null, $tree = true)
149  {
150  $lexer = new Lexer();
151  $this->_tokenSequence = $lexer->lexMe($text, $this->_tokens);
152  $this->_currentState = 0;
153  $this->_errorState = 0;
154  $this->_trace = [];
155  $this->_todo = [];
156 
157  if (false === array_key_exists($rule, $this->_rules)) {
158  $rule = $this->getRootRule();
159  }
160 
161  $closeRule = new Rule\Ekzit($rule, 0);
162  $openRule = new Rule\Entry($rule, 0, [$closeRule]);
163  $this->_todo = [$closeRule, $openRule];
164 
165  do {
166  $out = $this->unfold();
167 
168  if (null !== $out &&
169  'EOF' === $this->getCurrentToken()) {
170  break;
171  }
172 
173  if (false === $this->backtrack()) {
174  $token = $this->_tokenSequence[$this->_errorState];
175  $offset = $token['offset'];
176  $line = 1;
177  $column = 1;
178 
179  if (!empty($text)) {
180  if (0 === $offset) {
181  $leftnl = 0;
182  } else {
183  $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0;
184  }
185 
186  $rightnl = strpos($text, "\n", $offset);
187  $line = substr_count($text, "\n", 0, $leftnl + 1) + 1;
188  $column = $offset - $leftnl + (0 === $leftnl);
189 
190  if (false !== $rightnl) {
191  $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n");
192  }
193  }
194 
196  'Unexpected token "%s" (%s) at line %d and column %d:' .
197  "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑',
198  0,
199  [
200  $token['value'],
201  $token['token'],
202  $line,
203  $column,
204  $text
205  ],
206  $line,
207  $column
208  );
209  }
210  } while (true);
211 
212  if (false === $tree) {
213  return true;
214  }
215 
216  $tree = $this->_buildTree();
217 
218  if (!($tree instanceof TreeNode)) {
219  throw new Compiler\Exception(
220  'Parsing error: cannot build AST, the trace is corrupted.',
221  1
222  );
223  }
224 
225  return $this->_tree = $tree;
226  }
227 
233  protected function unfold()
234  {
235  while (0 < count($this->_todo)) {
236  $rule = array_pop($this->_todo);
237 
238  if ($rule instanceof Rule\Ekzit) {
239  $rule->setDepth($this->_depth);
240  $this->_trace[] = $rule;
241 
242  if (false === $rule->isTransitional()) {
243  --$this->_depth;
244  }
245  } else {
246  $ruleName = $rule->getRule();
247  $next = $rule->getData();
248  $zeRule = $this->_rules[$ruleName];
249  $out = $this->_parse($zeRule, $next);
250 
251  if (false === $out) {
252  if (false === $this->backtrack()) {
253  return null;
254  }
255  }
256  }
257  }
258 
259  return true;
260  }
261 
269  protected function _parse(Rule $zeRule, $next)
270  {
271  if ($zeRule instanceof Rule\Token) {
272  $name = $this->getCurrentToken();
273 
274  if ($zeRule->getTokenName() !== $name) {
275  return false;
276  }
277 
278  $value = $this->getCurrentToken('value');
279 
280  if (0 <= $unification = $zeRule->getUnificationIndex()) {
281  for ($skip = 0, $i = count($this->_trace) - 1; $i >= 0; --$i) {
282  $trace = $this->_trace[$i];
283 
284  if ($trace instanceof Rule\Entry) {
285  if (false === $trace->isTransitional()) {
286  if ($trace->getDepth() <= $this->_depth) {
287  break;
288  }
289 
290  --$skip;
291  }
292  } elseif ($trace instanceof Rule\Ekzit &&
293  false === $trace->isTransitional()) {
294  $skip += $trace->getDepth() > $this->_depth;
295  }
296 
297  if (0 < $skip) {
298  continue;
299  }
300 
301  if ($trace instanceof Rule\Token &&
302  $unification === $trace->getUnificationIndex() &&
303  $value !== $trace->getValue()) {
304  return false;
305  }
306  }
307  }
308 
309  $namespace = $this->getCurrentToken('namespace');
310  $zzeRule = clone $zeRule;
311  $zzeRule->setValue($value);
312  $zzeRule->setNamespace($namespace);
313 
314  if (isset($this->_tokens[$namespace][$name])) {
315  $zzeRule->setRepresentation($this->_tokens[$namespace][$name]);
316  } else {
317  foreach ($this->_tokens[$namespace] as $_name => $regex) {
318  if (false === $pos = strpos($_name, ':')) {
319  continue;
320  }
321 
322  $_name = substr($_name, 0, $pos);
323 
324  if ($_name === $name) {
325  break;
326  }
327  }
328 
329  $zzeRule->setRepresentation($regex);
330  }
331 
332  array_pop($this->_todo);
333  $this->_trace[] = $zzeRule;
334  $this->_errorState = ++$this->_currentState;
335 
336  return true;
337  } elseif ($zeRule instanceof Rule\Concatenation) {
338  if (false === $zeRule->isTransitional()) {
339  ++$this->_depth;
340  }
341 
342  $this->_trace[] = new Rule\Entry(
343  $zeRule->getName(),
344  0,
345  null,
347  );
348  $content = $zeRule->getContent();
349 
350  for ($i = count($content) - 1; $i >= 0; --$i) {
351  $nextRule = $content[$i];
352  $this->_todo[] = new Rule\Ekzit($nextRule, 0);
353  $this->_todo[] = new Rule\Entry($nextRule, 0);
354  }
355 
356  return true;
357  } elseif ($zeRule instanceof Rule\Choice) {
358  $content = $zeRule->getContent();
359 
360  if ($next >= count($content)) {
361  return false;
362  }
363 
364  if (false === $zeRule->isTransitional()) {
365  ++$this->_depth;
366  }
367 
368  $this->_trace[] = new Rule\Entry(
369  $zeRule->getName(),
370  $next,
371  $this->_todo,
373  );
374  $nextRule = $content[$next];
375  $this->_todo[] = new Rule\Ekzit($nextRule, 0);
376  $this->_todo[] = new Rule\Entry($nextRule, 0);
377 
378  return true;
379  } elseif ($zeRule instanceof Rule\Repetition) {
380  $nextRule = $zeRule->getContent();
381 
382  if (0 === $next) {
383  $name = $zeRule->getName();
384  $min = $zeRule->getMin();
385 
386  if (false === $zeRule->isTransitional()) {
387  ++$this->_depth;
388  }
389 
390  $this->_trace[] = new Rule\Entry(
391  $name,
392  $min,
393  null,
394  $this->_depth
395  );
396  array_pop($this->_todo);
397  $this->_todo[] = new Rule\Ekzit(
398  $name,
399  $min,
400  $this->_todo
401  );
402 
403  for ($i = 0; $i < $min; ++$i) {
404  $this->_todo[] = new Rule\Ekzit($nextRule, 0);
405  $this->_todo[] = new Rule\Entry($nextRule, 0);
406  }
407 
408  return true;
409  } else {
410  $max = $zeRule->getMax();
411 
412  if (-1 != $max && $next > $max) {
413  return false;
414  }
415 
416  $this->_todo[] = new Rule\Ekzit(
417  $zeRule->getName(),
418  $next,
420  );
421  $this->_todo[] = new Rule\Ekzit($nextRule, 0);
422  $this->_todo[] = new Rule\Entry($nextRule, 0);
423 
424  return true;
425  }
426  }
427 
428  return false;
429  }
430 
436  protected function backtrack()
437  {
438  $found = false;
439 
440  do {
441  $last = array_pop($this->_trace);
442 
443  if ($last instanceof Rule\Entry) {
444  $zeRule = $this->_rules[$last->getRule()];
445  $found = $zeRule instanceof Rule\Choice;
446  } elseif ($last instanceof Rule\Ekzit) {
447  $zeRule = $this->_rules[$last->getRule()];
448  $found = $zeRule instanceof Rule\Repetition;
449  } elseif ($last instanceof Rule\Token) {
451  }
452  } while (0 < count($this->_trace) && false === $found);
453 
454  if (false === $found) {
455  return false;
456  }
457 
458  $rule = $last->getRule();
459  $next = $last->getData() + 1;
460  $this->_depth = $last->getDepth();
461  $this->_todo = $last->getTodo();
462  $this->_todo[] = new Rule\Entry($rule, $next);
463 
464  return true;
465  }
466 
475  protected function _buildTree($i = 0, &$children = [])
476  {
477  $max = count($this->_trace);
478 
479  while ($i < $max) {
480  $trace = $this->_trace[$i];
481 
482  if ($trace instanceof Rule\Entry) {
483  $ruleName = $trace->getRule();
484  $rule = $this->_rules[$ruleName];
485  $isRule = false === $trace->isTransitional();
486  $nextTrace = $this->_trace[$i + 1];
487  $id = $rule->getNodeId();
488 
489  // Optimization: Skip empty trace sequence.
490  if ($nextTrace instanceof Rule\Ekzit &&
491  $ruleName == $nextTrace->getRule()) {
492  $i += 2;
493 
494  continue;
495  }
496 
497  if (true === $isRule) {
498  $children[] = $ruleName;
499  }
500 
501  if (null !== $id) {
502  $children[] = [
503  'id' => $id,
504  'options' => $rule->getNodeOptions()
505  ];
506  }
507 
508  $i = $this->_buildTree($i + 1, $children);
509 
510  if (false === $isRule) {
511  continue;
512  }
513 
514  $handle = [];
515  $cId = null;
516  $cOptions = [];
517 
518  do {
519  $pop = array_pop($children);
520 
521  if (true === is_object($pop)) {
522  $handle[] = $pop;
523  } elseif (true === is_array($pop) && null === $cId) {
524  $cId = $pop['id'];
525  $cOptions = $pop['options'];
526  } elseif ($ruleName == $pop) {
527  break;
528  }
529  } while (null !== $pop);
530 
531  if (null === $cId) {
532  $cId = $rule->getDefaultId();
533  $cOptions = $rule->getDefaultOptions();
534  }
535 
536  if (null === $cId) {
537  for ($j = count($handle) - 1; $j >= 0; --$j) {
538  $children[] = $handle[$j];
539  }
540 
541  continue;
542  }
543 
544  if (true === in_array('M', $cOptions) &&
545  true === $this->mergeTree($children, $handle, $cId)) {
546  continue;
547  }
548 
549  if (true === in_array('m', $cOptions) &&
550  true === $this->mergeTree($children, $handle, $cId, true)) {
551  continue;
552  }
553 
554  $cTree = new TreeNode($id ?: $cId);
555 
556  foreach ($handle as $child) {
557  $child->setParent($cTree);
558  $cTree->prependChild($child);
559  }
560 
561  $children[] = $cTree;
562  } elseif ($trace instanceof Rule\Ekzit) {
563  return $i + 1;
564  } else {
565  if (false === $trace->isKept()) {
566  ++$i;
567 
568  continue;
569  }
570 
571  $child = new TreeNode('token', [
572  'token' => $trace->getTokenName(),
573  'value' => $trace->getValue(),
574  'namespace' => $trace->getNamespace(),
575  ]);
576  $children[] = $child;
577  ++$i;
578  }
579  }
580 
581  return $children[0];
582  }
583 
594  protected function mergeTree(
595  &$children,
596  &$handle,
597  $cId,
598  $recursive = false
599  ) {
600  end($children);
601  $last = current($children);
602 
603  if (!is_object($last)) {
604  return false;
605  }
606 
607  if ($cId !== $last->getId()) {
608  return false;
609  }
610 
611  if (true === $recursive) {
612  foreach ($handle as $child) {
613  $this->mergeTreeRecursive($last, $child);
614  }
615 
616  return true;
617  }
618 
619  foreach ($handle as $child) {
620  $last->appendChild($child);
621  $child->setParent($last);
622  }
623 
624  return true;
625  }
626 
635  protected function mergeTreeRecursive(TreeNode $node, TreeNode $newNode)
636  {
637  $nNId = $newNode->getId();
638 
639  if ('token' === $nNId) {
640  $node->appendChild($newNode);
641  $newNode->setParent($node);
642 
643  return;
644  }
645 
646  $children = $node->getChildren();
647  end($children);
648  $last = current($children);
649 
650  if ($last->getId() !== $nNId) {
651  $node->appendChild($newNode);
652  $newNode->setParent($node);
653 
654  return;
655  }
656 
657  foreach ($newNode->getChildren() as $child) {
658  $this->mergeTreeRecursive($last, $child);
659  }
660 
661  return;
662  }
663 
670  public function getCurrentToken($kind = 'token')
671  {
672  return $this->_tokenSequence[$this->_currentState][$kind];
673  }
674 
680  public function getTree()
681  {
682  return $this->_tree;
683  }
684 
690  public function getTrace()
691  {
692  return $this->_trace;
693  }
694 
700  public function getTokens()
701  {
702  return $this->_tokens;
703  }
704 
710  public function getTokenSequence()
711  {
712  return $this->_tokenSequence;
713  }
714 
720  public function getRule($name)
721  {
722  if (!isset($this->_rules[$name])) {
723  return null;
724  }
725 
726  return $this->_rules[$name];
727  }
728 
734  public function getRules()
735  {
736  return $this->_rules;
737  }
738 
744  public function getRootRule()
745  {
746  foreach ($this->_rules as $rule => $_) {
747  if (!is_int($rule)) {
748  break;
749  }
750  }
751 
752  return $rule;
753  }
754 }
getCurrentToken($kind= 'token')
Definition: Parser.php:670
_buildTree($i=0, &$children=[])
Definition: Parser.php:475
mergeTreeRecursive(TreeNode $node, TreeNode $newNode)
Definition: Parser.php:635
_parse(Rule $zeRule, $next)
Definition: Parser.php:269
setParent(TreeNode $parent)
Definition: TreeNode.php:278
__construct(Array $tokens=[], Array $rules=[])
Definition: Parser.php:131
parse($text, $rule=null, $tree=true)
Definition: Parser.php:148
$content
Definition: Hoa.php:119
appendChild(TreeNode $child)
Definition: TreeNode.php:211
mergeTree(&$children, &$handle, $cId, $recursive=false)
Definition: Parser.php:594