Hoa central
Public Member Functions | Protected Member Functions | Protected Attributes | List of all members
Hoa\Compiler\Llk\Lexer Class Reference

Public Member Functions

 lexMe ($text, Array $tokens)
 

Protected Member Functions

 nextToken ($offset)
 
 matchLexeme ($lexeme, $regex, $offset)
 

Protected Attributes

 $_lexerState = null
 
 $_text = null
 
 $_tokens = []
 
 $_nsStack = null
 

Detailed Description

Class .

PP lexer.

Definition at line 49 of file Lexer.php.

Member Function Documentation

Hoa\Compiler\Llk\Lexer::lexMe (   $text,
Array  $tokens 
)

Text tokenizer: splits the text in parameter in an ordered array of tokens.

Parameters
string$textText to tokenize.
array$tokensTokens to be returned.
Returns
array
Exceptions

Definition at line 90 of file Lexer.php.

91  {
92  $this->_text = $text;
93  $this->_tokens = $tokens;
94  $this->_nsStack = null;
95  $offset = 0;
96  $maxOffset = strlen($this->_text);
97  $tokenized = [];
98  $this->_lexerState = 'default';
99  $stack = false;
100 
101  foreach ($this->_tokens as &$tokens) {
102  $_tokens = [];
103 
104  foreach ($tokens as $fullLexeme => $regex) {
105  if (false === strpos($fullLexeme, ':')) {
106  $_tokens[$fullLexeme] = [$regex, null];
107 
108  continue;
109  }
110 
111  list($lexeme, $namespace) = explode(':', $fullLexeme, 2);
112 
113  $stack |= ('__shift__' === substr($namespace, 0, 9));
114 
115  unset($tokens[$fullLexeme]);
116  $_tokens[$lexeme] = [$regex, $namespace];
117  }
118 
119  $tokens = $_tokens;
120  }
121 
122  if (true == $stack) {
123  $this->_nsStack = new \SplStack();
124  }
125 
126  while ($offset < $maxOffset) {
127  $nextToken = $this->nextToken($offset);
128 
129  if (null === $nextToken) {
130  throw new Compiler\Exception\UnrecognizedToken(
131  'Unrecognized token "%s" at line 1 and column %d:' .
132  "\n" . '%s' . "\n" .
133  str_repeat(' ', mb_strlen(substr($text, 0, $offset))) . '↑',
134  0,
135  [
136  mb_substr(substr($text, $offset), 0, 1),
137  $offset + 1,
138  $text
139  ],
140  1,
141  $offset
142  );
143  }
144 
145  if (true === $nextToken['keep']) {
146  $nextToken['offset'] = $offset;
147  $tokenized[] = $nextToken;
148  }
149 
150  $offset += strlen($nextToken['value']);
151  }
152 
153  $tokenized[] = [
154  'token' => 'EOF',
155  'value' => 'EOF',
156  'length' => 0,
157  'namespace' => 'default',
158  'keep' => true,
159  'offset' => $offset
160  ];
161 
162  return $tokenized;
163  }
nextToken($offset)
Definition: Lexer.php:172

Here is the call graph for this function:

Hoa\Compiler\Llk\Lexer::matchLexeme (   $lexeme,
  $regex,
  $offset 
)
protected

Check if a given lexeme is matched at the beginning of the text.

Parameters
string$lexemeName of the lexeme.
string$regexRegular expression describing the lexeme.
int$offsetOffset.
Returns
array
Exceptions

Definition at line 255 of file Lexer.php.

256  {
257  $_regex = str_replace('#', '\#', $regex);
258  $preg = preg_match(
259  '#\G(?|' . $_regex . ')#u',
260  $this->_text,
261  $matches,
262  0,
263  $offset
264  );
265 
266  if (0 === $preg) {
267  return null;
268  }
269 
270  if ('' === $matches[0]) {
271  throw new Compiler\Exception\Lexer(
272  'A lexeme must not match an empty value, which is the ' .
273  'case of "%s" (%s).',
274  3,
275  [$lexeme, $regex]
276  );
277  }
278 
279  return [
280  'token' => $lexeme,
281  'value' => $matches[0],
282  'length' => mb_strlen($matches[0])
283  ];
284  }

Here is the caller graph for this function:

Hoa\Compiler\Llk\Lexer::nextToken (   $offset)
protected

Compute the next token recognized at the beginning of the string.

Parameters
int$offsetOffset.
Returns
array
Exceptions

Definition at line 172 of file Lexer.php.

173  {
174  $tokenArray = &$this->_tokens[$this->_lexerState];
175 
176  foreach ($tokenArray as $lexeme => $bucket) {
177  list($regex, $nextState) = $bucket;
178 
179  if (null === $nextState) {
180  $nextState = $this->_lexerState;
181  }
182 
183  $out = $this->matchLexeme($lexeme, $regex, $offset);
184 
185  if (null !== $out) {
186  $out['namespace'] = $this->_lexerState;
187  $out['keep'] = 'skip' !== $lexeme;
188 
189  if ($nextState !== $this->_lexerState) {
190  $shift = false;
191 
192  if (null !== $this->_nsStack &&
193  0 !== preg_match('#^__shift__(?:\s*\*\s*(\d+))?$#', $nextState, $matches)) {
194  $i = isset($matches[1]) ? intval($matches[1]) : 1;
195 
196  if ($i > ($c = count($this->_nsStack))) {
197  throw new Compiler\Exception\Lexer(
198  'Cannot shift namespace %d-times, from token ' .
199  '%s in namespace %s, because the stack ' .
200  'contains only %d namespaces.',
201  1,
202  [
203  $i,
204  $lexeme,
205  $this->_lexerState,
206  $c
207  ]
208  );
209  }
210 
211  while (1 <= $i--) {
212  $previousNamespace = $this->_nsStack->pop();
213  }
214 
215  $nextState = $previousNamespace;
216  $shift = true;
217  }
218 
219  if (!isset($this->_tokens[$nextState])) {
220  throw new Compiler\Exception\Lexer(
221  'Namespace %s does not exist, called by token %s ' .
222  'in namespace %s.',
223  2,
224  [
225  $nextState,
226  $lexeme,
227  $this->_lexerState
228  ]
229  );
230  }
231 
232  if (null !== $this->_nsStack && false === $shift) {
233  $this->_nsStack[] = $this->_lexerState;
234  }
235 
236  $this->_lexerState = $nextState;
237  }
238 
239  return $out;
240  }
241  }
242 
243  return null;
244  }
matchLexeme($lexeme, $regex, $offset)
Definition: Lexer.php:255

Here is the call graph for this function:

Here is the caller graph for this function:

Member Data Documentation

Hoa\Compiler\Llk\Lexer::$_lexerState = null
protected

Definition at line 56 of file Lexer.php.

Hoa\Compiler\Llk\Lexer::$_nsStack = null
protected

Definition at line 77 of file Lexer.php.

Hoa\Compiler\Llk\Lexer::$_text = null
protected

Definition at line 63 of file Lexer.php.

Hoa\Compiler\Llk\Lexer::$_tokens = []
protected

Definition at line 70 of file Lexer.php.


The documentation for this class was generated from the following file: