Lexer.php (16689B)
1 <?php 2 3 /* 4 * This file is part of Twig. 5 * 6 * (c) 2009 Fabien Potencier 7 * (c) 2009 Armin Ronacher 8 * 9 * For the full copyright and license information, please view the LICENSE 10 * file that was distributed with this source code. 11 */ 12 13 /** 14 * Lexes a template string. 15 * 16 * @author Fabien Potencier <fabien@symfony.com> 17 */ 18 class Twig_Lexer implements Twig_LexerInterface 19 { 20 protected $tokens; 21 protected $code; 22 protected $cursor; 23 protected $lineno; 24 protected $end; 25 protected $state; 26 protected $states; 27 protected $brackets; 28 protected $env; 29 protected $filename; 30 protected $options; 31 protected $regexes; 32 protected $position; 33 protected $positions; 34 protected $currentVarBlockLine; 35 36 const STATE_DATA = 0; 37 const STATE_BLOCK = 1; 38 const STATE_VAR = 2; 39 const STATE_STRING = 3; 40 const STATE_INTERPOLATION = 4; 41 42 const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A'; 43 const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A'; 44 const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As'; 45 const REGEX_DQ_STRING_DELIM = '/"/A'; 46 const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As'; 47 const PUNCTUATION = '()[]{}?:.,|'; 48 49 public function __construct(Twig_Environment $env, array $options = array()) 50 { 51 $this->env = $env; 52 53 $this->options = array_merge(array( 54 'tag_comment' => array('{#', '#}'), 55 'tag_block' => array('{%', '%}'), 56 'tag_variable' => array('{{', '}}'), 57 'whitespace_trim' => '-', 58 'interpolation' => array('#{', '}'), 59 ), $options); 60 61 $this->regexes = array( 62 'lex_var' => '/\s*'.preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_variable'][1], '/').'/A', 63 'lex_block' => '/\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')\n?/A', 64 'lex_raw_data' => '/('.preg_quote($this->options['tag_block'][0].$this->options['whitespace_trim'], '/').'|'.preg_quote($this->options['tag_block'][0], '/').')\s*(?:end%s)\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')/s', 65 'operator' => $this->getOperatorRegex(), 66 'lex_comment' => '/(?:'.preg_quote($this->options['whitespace_trim'], '/').preg_quote($this->options['tag_comment'][1], '/').'\s*|'.preg_quote($this->options['tag_comment'][1], '/').')\n?/s', 67 'lex_block_raw' => '/\s*(raw|verbatim)\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')/As', 68 'lex_block_line' => '/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As', 69 'lex_tokens_start' => '/('.preg_quote($this->options['tag_variable'][0], '/').'|'.preg_quote($this->options['tag_block'][0], '/').'|'.preg_quote($this->options['tag_comment'][0], '/').')('.preg_quote($this->options['whitespace_trim'], '/').')?/s', 70 'interpolation_start' => '/'.preg_quote($this->options['interpolation'][0], '/').'\s*/A', 71 'interpolation_end' => '/\s*'.preg_quote($this->options['interpolation'][1], '/').'/A', 72 ); 73 } 74 75 /** 76 * {@inheritdoc} 77 */ 78 public function tokenize($code, $filename = null) 79 { 80 if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) { 81 $mbEncoding = mb_internal_encoding(); 82 mb_internal_encoding('ASCII'); 83 } else { 84 $mbEncoding = null; 85 } 86 87 $this->code = str_replace(array("\r\n", "\r"), "\n", $code); 88 $this->filename = $filename; 89 $this->cursor = 0; 90 $this->lineno = 1; 91 $this->end = strlen($this->code); 92 $this->tokens = array(); 93 $this->state = self::STATE_DATA; 94 $this->states = array(); 95 $this->brackets = array(); 96 $this->position = -1; 97 98 // find all token starts in one go 99 preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, PREG_OFFSET_CAPTURE); 100 $this->positions = $matches; 101 102 while ($this->cursor < $this->end) { 103 // dispatch to the lexing functions depending 104 // on the current state 105 switch ($this->state) { 106 case self::STATE_DATA: 107 $this->lexData(); 108 break; 109 110 case self::STATE_BLOCK: 111 $this->lexBlock(); 112 break; 113 114 case self::STATE_VAR: 115 $this->lexVar(); 116 break; 117 118 case self::STATE_STRING: 119 $this->lexString(); 120 break; 121 122 case self::STATE_INTERPOLATION: 123 $this->lexInterpolation(); 124 break; 125 } 126 } 127 128 $this->pushToken(Twig_Token::EOF_TYPE); 129 130 if (!empty($this->brackets)) { 131 list($expect, $lineno) = array_pop($this->brackets); 132 throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $expect), $lineno, $this->filename); 133 } 134 135 if ($mbEncoding) { 136 mb_internal_encoding($mbEncoding); 137 } 138 139 return new Twig_TokenStream($this->tokens, $this->filename); 140 } 141 142 protected function lexData() 143 { 144 // if no matches are left we return the rest of the template as simple text token 145 if ($this->position == count($this->positions[0]) - 1) { 146 $this->pushToken(Twig_Token::TEXT_TYPE, substr($this->code, $this->cursor)); 147 $this->cursor = $this->end; 148 149 return; 150 } 151 152 // Find the first token after the current cursor 153 $position = $this->positions[0][++$this->position]; 154 while ($position[1] < $this->cursor) { 155 if ($this->position == count($this->positions[0]) - 1) { 156 return; 157 } 158 $position = $this->positions[0][++$this->position]; 159 } 160 161 // push the template text first 162 $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor); 163 if (isset($this->positions[2][$this->position][0])) { 164 $text = rtrim($text); 165 } 166 $this->pushToken(Twig_Token::TEXT_TYPE, $text); 167 $this->moveCursor($textContent.$position[0]); 168 169 switch ($this->positions[1][$this->position][0]) { 170 case $this->options['tag_comment'][0]: 171 $this->lexComment(); 172 break; 173 174 case $this->options['tag_block'][0]: 175 // raw data? 176 if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, null, $this->cursor)) { 177 $this->moveCursor($match[0]); 178 $this->lexRawData($match[1]); 179 // {% line \d+ %} 180 } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, null, $this->cursor)) { 181 $this->moveCursor($match[0]); 182 $this->lineno = (int) $match[1]; 183 } else { 184 $this->pushToken(Twig_Token::BLOCK_START_TYPE); 185 $this->pushState(self::STATE_BLOCK); 186 $this->currentVarBlockLine = $this->lineno; 187 } 188 break; 189 190 case $this->options['tag_variable'][0]: 191 $this->pushToken(Twig_Token::VAR_START_TYPE); 192 $this->pushState(self::STATE_VAR); 193 $this->currentVarBlockLine = $this->lineno; 194 break; 195 } 196 } 197 198 protected function lexBlock() 199 { 200 if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, null, $this->cursor)) { 201 $this->pushToken(Twig_Token::BLOCK_END_TYPE); 202 $this->moveCursor($match[0]); 203 $this->popState(); 204 } else { 205 $this->lexExpression(); 206 } 207 } 208 209 protected function lexVar() 210 { 211 if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, null, $this->cursor)) { 212 $this->pushToken(Twig_Token::VAR_END_TYPE); 213 $this->moveCursor($match[0]); 214 $this->popState(); 215 } else { 216 $this->lexExpression(); 217 } 218 } 219 220 protected function lexExpression() 221 { 222 // whitespace 223 if (preg_match('/\s+/A', $this->code, $match, null, $this->cursor)) { 224 $this->moveCursor($match[0]); 225 226 if ($this->cursor >= $this->end) { 227 throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $this->state === self::STATE_BLOCK ? 'block' : 'variable'), $this->currentVarBlockLine, $this->filename); 228 } 229 } 230 231 // operators 232 if (preg_match($this->regexes['operator'], $this->code, $match, null, $this->cursor)) { 233 $this->pushToken(Twig_Token::OPERATOR_TYPE, preg_replace('/\s+/', ' ', $match[0])); 234 $this->moveCursor($match[0]); 235 } 236 // names 237 elseif (preg_match(self::REGEX_NAME, $this->code, $match, null, $this->cursor)) { 238 $this->pushToken(Twig_Token::NAME_TYPE, $match[0]); 239 $this->moveCursor($match[0]); 240 } 241 // numbers 242 elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, null, $this->cursor)) { 243 $number = (float) $match[0]; // floats 244 if (ctype_digit($match[0]) && $number <= PHP_INT_MAX) { 245 $number = (int) $match[0]; // integers lower than the maximum 246 } 247 $this->pushToken(Twig_Token::NUMBER_TYPE, $number); 248 $this->moveCursor($match[0]); 249 } 250 // punctuation 251 elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) { 252 // opening bracket 253 if (false !== strpos('([{', $this->code[$this->cursor])) { 254 $this->brackets[] = array($this->code[$this->cursor], $this->lineno); 255 } 256 // closing bracket 257 elseif (false !== strpos(')]}', $this->code[$this->cursor])) { 258 if (empty($this->brackets)) { 259 throw new Twig_Error_Syntax(sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->filename); 260 } 261 262 list($expect, $lineno) = array_pop($this->brackets); 263 if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) { 264 throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $expect), $lineno, $this->filename); 265 } 266 } 267 268 $this->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]); 269 ++$this->cursor; 270 } 271 // strings 272 elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) { 273 $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1))); 274 $this->moveCursor($match[0]); 275 } 276 // opening double quoted string 277 elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) { 278 $this->brackets[] = array('"', $this->lineno); 279 $this->pushState(self::STATE_STRING); 280 $this->moveCursor($match[0]); 281 } 282 // unlexable 283 else { 284 throw new Twig_Error_Syntax(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->filename); 285 } 286 } 287 288 protected function lexRawData($tag) 289 { 290 if ('raw' === $tag) { 291 @trigger_error(sprintf('Twig Tag "raw" is deprecated since version 1.21. Use "verbatim" instead in %s at line %d.', $this->filename, $this->lineno), E_USER_DEPRECATED); 292 } 293 294 if (!preg_match(str_replace('%s', $tag, $this->regexes['lex_raw_data']), $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) { 295 throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "%s" block.', $tag), $this->lineno, $this->filename); 296 } 297 298 $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor); 299 $this->moveCursor($text.$match[0][0]); 300 301 if (false !== strpos($match[1][0], $this->options['whitespace_trim'])) { 302 $text = rtrim($text); 303 } 304 305 $this->pushToken(Twig_Token::TEXT_TYPE, $text); 306 } 307 308 protected function lexComment() 309 { 310 if (!preg_match($this->regexes['lex_comment'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) { 311 throw new Twig_Error_Syntax('Unclosed comment.', $this->lineno, $this->filename); 312 } 313 314 $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]); 315 } 316 317 protected function lexString() 318 { 319 if (preg_match($this->regexes['interpolation_start'], $this->code, $match, null, $this->cursor)) { 320 $this->brackets[] = array($this->options['interpolation'][0], $this->lineno); 321 $this->pushToken(Twig_Token::INTERPOLATION_START_TYPE); 322 $this->moveCursor($match[0]); 323 $this->pushState(self::STATE_INTERPOLATION); 324 } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, null, $this->cursor) && strlen($match[0]) > 0) { 325 $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes($match[0])); 326 $this->moveCursor($match[0]); 327 } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) { 328 list($expect, $lineno) = array_pop($this->brackets); 329 if ($this->code[$this->cursor] != '"') { 330 throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $expect), $lineno, $this->filename); 331 } 332 333 $this->popState(); 334 ++$this->cursor; 335 } 336 } 337 338 protected function lexInterpolation() 339 { 340 $bracket = end($this->brackets); 341 if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, null, $this->cursor)) { 342 array_pop($this->brackets); 343 $this->pushToken(Twig_Token::INTERPOLATION_END_TYPE); 344 $this->moveCursor($match[0]); 345 $this->popState(); 346 } else { 347 $this->lexExpression(); 348 } 349 } 350 351 protected function pushToken($type, $value = '') 352 { 353 // do not push empty text tokens 354 if (Twig_Token::TEXT_TYPE === $type && '' === $value) { 355 return; 356 } 357 358 $this->tokens[] = new Twig_Token($type, $value, $this->lineno); 359 } 360 361 protected function moveCursor($text) 362 { 363 $this->cursor += strlen($text); 364 $this->lineno += substr_count($text, "\n"); 365 } 366 367 protected function getOperatorRegex() 368 { 369 $operators = array_merge( 370 array('='), 371 array_keys($this->env->getUnaryOperators()), 372 array_keys($this->env->getBinaryOperators()) 373 ); 374 375 $operators = array_combine($operators, array_map('strlen', $operators)); 376 arsort($operators); 377 378 $regex = array(); 379 foreach ($operators as $operator => $length) { 380 // an operator that ends with a character must be followed by 381 // a whitespace or a parenthesis 382 if (ctype_alpha($operator[$length - 1])) { 383 $r = preg_quote($operator, '/').'(?=[\s()])'; 384 } else { 385 $r = preg_quote($operator, '/'); 386 } 387 388 // an operator with a space can be any amount of whitespaces 389 $r = preg_replace('/\s+/', '\s+', $r); 390 391 $regex[] = $r; 392 } 393 394 return '/'.implode('|', $regex).'/A'; 395 } 396 397 protected function pushState($state) 398 { 399 $this->states[] = $this->state; 400 $this->state = $state; 401 } 402 403 protected function popState() 404 { 405 if (0 === count($this->states)) { 406 throw new Exception('Cannot pop state without a previous state'); 407 } 408 409 $this->state = array_pop($this->states); 410 } 411 }