shop.balmet.com

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

Lexer.php (16689B)


      1 <?php
      2 
      3 /*
      4  * This file is part of Twig.
      5  *
      6  * (c) 2009 Fabien Potencier
      7  * (c) 2009 Armin Ronacher
      8  *
      9  * For the full copyright and license information, please view the LICENSE
     10  * file that was distributed with this source code.
     11  */
     12 
     13 /**
     14  * Lexes a template string.
     15  *
     16  * @author Fabien Potencier <fabien@symfony.com>
     17  */
     18 class Twig_Lexer implements Twig_LexerInterface
     19 {
     20     protected $tokens;
     21     protected $code;
     22     protected $cursor;
     23     protected $lineno;
     24     protected $end;
     25     protected $state;
     26     protected $states;
     27     protected $brackets;
     28     protected $env;
     29     protected $filename;
     30     protected $options;
     31     protected $regexes;
     32     protected $position;
     33     protected $positions;
     34     protected $currentVarBlockLine;
     35 
     36     const STATE_DATA = 0;
     37     const STATE_BLOCK = 1;
     38     const STATE_VAR = 2;
     39     const STATE_STRING = 3;
     40     const STATE_INTERPOLATION = 4;
     41 
     42     const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
     43     const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A';
     44     const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
     45     const REGEX_DQ_STRING_DELIM = '/"/A';
     46     const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
     47     const PUNCTUATION = '()[]{}?:.,|';
     48 
     49     public function __construct(Twig_Environment $env, array $options = array())
     50     {
     51         $this->env = $env;
     52 
     53         $this->options = array_merge(array(
     54             'tag_comment' => array('{#', '#}'),
     55             'tag_block' => array('{%', '%}'),
     56             'tag_variable' => array('{{', '}}'),
     57             'whitespace_trim' => '-',
     58             'interpolation' => array('#{', '}'),
     59         ), $options);
     60 
     61         $this->regexes = array(
     62             'lex_var' => '/\s*'.preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_variable'][1], '/').'/A',
     63             'lex_block' => '/\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')\n?/A',
     64             'lex_raw_data' => '/('.preg_quote($this->options['tag_block'][0].$this->options['whitespace_trim'], '/').'|'.preg_quote($this->options['tag_block'][0], '/').')\s*(?:end%s)\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')/s',
     65             'operator' => $this->getOperatorRegex(),
     66             'lex_comment' => '/(?:'.preg_quote($this->options['whitespace_trim'], '/').preg_quote($this->options['tag_comment'][1], '/').'\s*|'.preg_quote($this->options['tag_comment'][1], '/').')\n?/s',
     67             'lex_block_raw' => '/\s*(raw|verbatim)\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')/As',
     68             'lex_block_line' => '/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As',
     69             'lex_tokens_start' => '/('.preg_quote($this->options['tag_variable'][0], '/').'|'.preg_quote($this->options['tag_block'][0], '/').'|'.preg_quote($this->options['tag_comment'][0], '/').')('.preg_quote($this->options['whitespace_trim'], '/').')?/s',
     70             'interpolation_start' => '/'.preg_quote($this->options['interpolation'][0], '/').'\s*/A',
     71             'interpolation_end' => '/\s*'.preg_quote($this->options['interpolation'][1], '/').'/A',
     72         );
     73     }
     74 
     75     /**
     76      * {@inheritdoc}
     77      */
     78     public function tokenize($code, $filename = null)
     79     {
     80         if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
     81             $mbEncoding = mb_internal_encoding();
     82             mb_internal_encoding('ASCII');
     83         } else {
     84             $mbEncoding = null;
     85         }
     86 
     87         $this->code = str_replace(array("\r\n", "\r"), "\n", $code);
     88         $this->filename = $filename;
     89         $this->cursor = 0;
     90         $this->lineno = 1;
     91         $this->end = strlen($this->code);
     92         $this->tokens = array();
     93         $this->state = self::STATE_DATA;
     94         $this->states = array();
     95         $this->brackets = array();
     96         $this->position = -1;
     97 
     98         // find all token starts in one go
     99         preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, PREG_OFFSET_CAPTURE);
    100         $this->positions = $matches;
    101 
    102         while ($this->cursor < $this->end) {
    103             // dispatch to the lexing functions depending
    104             // on the current state
    105             switch ($this->state) {
    106                 case self::STATE_DATA:
    107                     $this->lexData();
    108                     break;
    109 
    110                 case self::STATE_BLOCK:
    111                     $this->lexBlock();
    112                     break;
    113 
    114                 case self::STATE_VAR:
    115                     $this->lexVar();
    116                     break;
    117 
    118                 case self::STATE_STRING:
    119                     $this->lexString();
    120                     break;
    121 
    122                 case self::STATE_INTERPOLATION:
    123                     $this->lexInterpolation();
    124                     break;
    125             }
    126         }
    127 
    128         $this->pushToken(Twig_Token::EOF_TYPE);
    129 
    130         if (!empty($this->brackets)) {
    131             list($expect, $lineno) = array_pop($this->brackets);
    132             throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $expect), $lineno, $this->filename);
    133         }
    134 
    135         if ($mbEncoding) {
    136             mb_internal_encoding($mbEncoding);
    137         }
    138 
    139         return new Twig_TokenStream($this->tokens, $this->filename);
    140     }
    141 
    142     protected function lexData()
    143     {
    144         // if no matches are left we return the rest of the template as simple text token
    145         if ($this->position == count($this->positions[0]) - 1) {
    146             $this->pushToken(Twig_Token::TEXT_TYPE, substr($this->code, $this->cursor));
    147             $this->cursor = $this->end;
    148 
    149             return;
    150         }
    151 
    152         // Find the first token after the current cursor
    153         $position = $this->positions[0][++$this->position];
    154         while ($position[1] < $this->cursor) {
    155             if ($this->position == count($this->positions[0]) - 1) {
    156                 return;
    157             }
    158             $position = $this->positions[0][++$this->position];
    159         }
    160 
    161         // push the template text first
    162         $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
    163         if (isset($this->positions[2][$this->position][0])) {
    164             $text = rtrim($text);
    165         }
    166         $this->pushToken(Twig_Token::TEXT_TYPE, $text);
    167         $this->moveCursor($textContent.$position[0]);
    168 
    169         switch ($this->positions[1][$this->position][0]) {
    170             case $this->options['tag_comment'][0]:
    171                 $this->lexComment();
    172                 break;
    173 
    174             case $this->options['tag_block'][0]:
    175                 // raw data?
    176                 if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, null, $this->cursor)) {
    177                     $this->moveCursor($match[0]);
    178                     $this->lexRawData($match[1]);
    179                 // {% line \d+ %}
    180                 } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, null, $this->cursor)) {
    181                     $this->moveCursor($match[0]);
    182                     $this->lineno = (int) $match[1];
    183                 } else {
    184                     $this->pushToken(Twig_Token::BLOCK_START_TYPE);
    185                     $this->pushState(self::STATE_BLOCK);
    186                     $this->currentVarBlockLine = $this->lineno;
    187                 }
    188                 break;
    189 
    190             case $this->options['tag_variable'][0]:
    191                 $this->pushToken(Twig_Token::VAR_START_TYPE);
    192                 $this->pushState(self::STATE_VAR);
    193                 $this->currentVarBlockLine = $this->lineno;
    194                 break;
    195         }
    196     }
    197 
    198     protected function lexBlock()
    199     {
    200         if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, null, $this->cursor)) {
    201             $this->pushToken(Twig_Token::BLOCK_END_TYPE);
    202             $this->moveCursor($match[0]);
    203             $this->popState();
    204         } else {
    205             $this->lexExpression();
    206         }
    207     }
    208 
    209     protected function lexVar()
    210     {
    211         if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, null, $this->cursor)) {
    212             $this->pushToken(Twig_Token::VAR_END_TYPE);
    213             $this->moveCursor($match[0]);
    214             $this->popState();
    215         } else {
    216             $this->lexExpression();
    217         }
    218     }
    219 
    220     protected function lexExpression()
    221     {
    222         // whitespace
    223         if (preg_match('/\s+/A', $this->code, $match, null, $this->cursor)) {
    224             $this->moveCursor($match[0]);
    225 
    226             if ($this->cursor >= $this->end) {
    227                 throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $this->state === self::STATE_BLOCK ? 'block' : 'variable'), $this->currentVarBlockLine, $this->filename);
    228             }
    229         }
    230 
    231         // operators
    232         if (preg_match($this->regexes['operator'], $this->code, $match, null, $this->cursor)) {
    233             $this->pushToken(Twig_Token::OPERATOR_TYPE, preg_replace('/\s+/', ' ', $match[0]));
    234             $this->moveCursor($match[0]);
    235         }
    236         // names
    237         elseif (preg_match(self::REGEX_NAME, $this->code, $match, null, $this->cursor)) {
    238             $this->pushToken(Twig_Token::NAME_TYPE, $match[0]);
    239             $this->moveCursor($match[0]);
    240         }
    241         // numbers
    242         elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, null, $this->cursor)) {
    243             $number = (float) $match[0];  // floats
    244             if (ctype_digit($match[0]) && $number <= PHP_INT_MAX) {
    245                 $number = (int) $match[0]; // integers lower than the maximum
    246             }
    247             $this->pushToken(Twig_Token::NUMBER_TYPE, $number);
    248             $this->moveCursor($match[0]);
    249         }
    250         // punctuation
    251         elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
    252             // opening bracket
    253             if (false !== strpos('([{', $this->code[$this->cursor])) {
    254                 $this->brackets[] = array($this->code[$this->cursor], $this->lineno);
    255             }
    256             // closing bracket
    257             elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
    258                 if (empty($this->brackets)) {
    259                     throw new Twig_Error_Syntax(sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->filename);
    260                 }
    261 
    262                 list($expect, $lineno) = array_pop($this->brackets);
    263                 if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
    264                     throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $expect), $lineno, $this->filename);
    265                 }
    266             }
    267 
    268             $this->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
    269             ++$this->cursor;
    270         }
    271         // strings
    272         elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) {
    273             $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)));
    274             $this->moveCursor($match[0]);
    275         }
    276         // opening double quoted string
    277         elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) {
    278             $this->brackets[] = array('"', $this->lineno);
    279             $this->pushState(self::STATE_STRING);
    280             $this->moveCursor($match[0]);
    281         }
    282         // unlexable
    283         else {
    284             throw new Twig_Error_Syntax(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->filename);
    285         }
    286     }
    287 
    288     protected function lexRawData($tag)
    289     {
    290         if ('raw' === $tag) {
    291             @trigger_error(sprintf('Twig Tag "raw" is deprecated since version 1.21. Use "verbatim" instead in %s at line %d.', $this->filename, $this->lineno), E_USER_DEPRECATED);
    292         }
    293 
    294         if (!preg_match(str_replace('%s', $tag, $this->regexes['lex_raw_data']), $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
    295             throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "%s" block.', $tag), $this->lineno, $this->filename);
    296         }
    297 
    298         $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
    299         $this->moveCursor($text.$match[0][0]);
    300 
    301         if (false !== strpos($match[1][0], $this->options['whitespace_trim'])) {
    302             $text = rtrim($text);
    303         }
    304 
    305         $this->pushToken(Twig_Token::TEXT_TYPE, $text);
    306     }
    307 
    308     protected function lexComment()
    309     {
    310         if (!preg_match($this->regexes['lex_comment'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
    311             throw new Twig_Error_Syntax('Unclosed comment.', $this->lineno, $this->filename);
    312         }
    313 
    314         $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
    315     }
    316 
    317     protected function lexString()
    318     {
    319         if (preg_match($this->regexes['interpolation_start'], $this->code, $match, null, $this->cursor)) {
    320             $this->brackets[] = array($this->options['interpolation'][0], $this->lineno);
    321             $this->pushToken(Twig_Token::INTERPOLATION_START_TYPE);
    322             $this->moveCursor($match[0]);
    323             $this->pushState(self::STATE_INTERPOLATION);
    324         } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, null, $this->cursor) && strlen($match[0]) > 0) {
    325             $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes($match[0]));
    326             $this->moveCursor($match[0]);
    327         } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) {
    328             list($expect, $lineno) = array_pop($this->brackets);
    329             if ($this->code[$this->cursor] != '"') {
    330                 throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $expect), $lineno, $this->filename);
    331             }
    332 
    333             $this->popState();
    334             ++$this->cursor;
    335         }
    336     }
    337 
    338     protected function lexInterpolation()
    339     {
    340         $bracket = end($this->brackets);
    341         if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, null, $this->cursor)) {
    342             array_pop($this->brackets);
    343             $this->pushToken(Twig_Token::INTERPOLATION_END_TYPE);
    344             $this->moveCursor($match[0]);
    345             $this->popState();
    346         } else {
    347             $this->lexExpression();
    348         }
    349     }
    350 
    351     protected function pushToken($type, $value = '')
    352     {
    353         // do not push empty text tokens
    354         if (Twig_Token::TEXT_TYPE === $type && '' === $value) {
    355             return;
    356         }
    357 
    358         $this->tokens[] = new Twig_Token($type, $value, $this->lineno);
    359     }
    360 
    361     protected function moveCursor($text)
    362     {
    363         $this->cursor += strlen($text);
    364         $this->lineno += substr_count($text, "\n");
    365     }
    366 
    367     protected function getOperatorRegex()
    368     {
    369         $operators = array_merge(
    370             array('='),
    371             array_keys($this->env->getUnaryOperators()),
    372             array_keys($this->env->getBinaryOperators())
    373         );
    374 
    375         $operators = array_combine($operators, array_map('strlen', $operators));
    376         arsort($operators);
    377 
    378         $regex = array();
    379         foreach ($operators as $operator => $length) {
    380             // an operator that ends with a character must be followed by
    381             // a whitespace or a parenthesis
    382             if (ctype_alpha($operator[$length - 1])) {
    383                 $r = preg_quote($operator, '/').'(?=[\s()])';
    384             } else {
    385                 $r = preg_quote($operator, '/');
    386             }
    387 
    388             // an operator with a space can be any amount of whitespaces
    389             $r = preg_replace('/\s+/', '\s+', $r);
    390 
    391             $regex[] = $r;
    392         }
    393 
    394         return '/'.implode('|', $regex).'/A';
    395     }
    396 
    397     protected function pushState($state)
    398     {
    399         $this->states[] = $this->state;
    400         $this->state = $state;
    401     }
    402 
    403     protected function popState()
    404     {
    405         if (0 === count($this->states)) {
    406             throw new Exception('Cannot pop state without a previous state');
    407         }
    408 
    409         $this->state = array_pop($this->states);
    410     }
    411 }