vendor/twig/twig/src/Lexer.php line 475

  1. <?php
  2. /*
  3.  * This file is part of Twig.
  4.  *
  5.  * (c) Fabien Potencier
  6.  * (c) Armin Ronacher
  7.  *
  8.  * For the full copyright and license information, please view the LICENSE
  9.  * file that was distributed with this source code.
  10.  */
  11. namespace Twig;
  12. use Twig\Error\SyntaxError;
  13. /**
  14.  * @author Fabien Potencier <fabien@symfony.com>
  15.  */
  16. class Lexer
  17. {
  18.     private $isInitialized false;
  19.     private $tokens;
  20.     private $code;
  21.     private $cursor;
  22.     private $lineno;
  23.     private $end;
  24.     private $state;
  25.     private $states;
  26.     private $brackets;
  27.     private $env;
  28.     private $source;
  29.     private $options;
  30.     private $regexes;
  31.     private $position;
  32.     private $positions;
  33.     private $currentVarBlockLine;
  34.     public const STATE_DATA 0;
  35.     public const STATE_BLOCK 1;
  36.     public const STATE_VAR 2;
  37.     public const STATE_STRING 3;
  38.     public const STATE_INTERPOLATION 4;
  39.     public const REGEX_NAME '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  40.     public const REGEX_NUMBER '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A';
  41.     public const REGEX_STRING '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  42.     public const REGEX_DQ_STRING_DELIM '/"/A';
  43.     public const REGEX_DQ_STRING_PART '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  44.     public const PUNCTUATION '()[]{}?:.,|';
  45.     public function __construct(Environment $env, array $options = [])
  46.     {
  47.         $this->env $env;
  48.         $this->options array_merge([
  49.             'tag_comment' => ['{#''#}'],
  50.             'tag_block' => ['{%''%}'],
  51.             'tag_variable' => ['{{''}}'],
  52.             'whitespace_trim' => '-',
  53.             'whitespace_line_trim' => '~',
  54.             'whitespace_line_chars' => ' \t\0\x0B',
  55.             'interpolation' => ['#{''}'],
  56.         ], $options);
  57.     }
  58.     private function initialize()
  59.     {
  60.         if ($this->isInitialized) {
  61.             return;
  62.         }
  63.         // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  64.         $this->regexes = [
  65.             // }}
  66.             'lex_var' => '{
  67.                 \s*
  68.                 (?:'.
  69.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'// -}}\s*
  70.                     '|'.
  71.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~}}[ \t\0\x0B]*
  72.                     '|'.
  73.                     preg_quote($this->options['tag_variable'][1], '#'). // }}
  74.                 ')
  75.             }Ax',
  76.             // %}
  77.             'lex_block' => '{
  78.                 \s*
  79.                 (?:'.
  80.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'// -%}\s*\n?
  81.                     '|'.
  82.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  83.                     '|'.
  84.                     preg_quote($this->options['tag_block'][1], '#').'\n?'// %}\n?
  85.                 ')
  86.             }Ax',
  87.             // {% endverbatim %}
  88.             'lex_raw_data' => '{'.
  89.                 preg_quote($this->options['tag_block'][0], '#'). // {%
  90.                 '('.
  91.                     $this->options['whitespace_trim']. // -
  92.                     '|'.
  93.                     $this->options['whitespace_line_trim']. // ~
  94.                 ')?\s*endverbatim\s*'.
  95.                 '(?:'.
  96.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'// -%}
  97.                     '|'.
  98.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  99.                     '|'.
  100.                     preg_quote($this->options['tag_block'][1], '#'). // %}
  101.                 ')
  102.             }sx',
  103.             'operator' => $this->getOperatorRegex(),
  104.             // #}
  105.             'lex_comment' => '{
  106.                 (?:'.
  107.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'// -#}\s*\n?
  108.                     '|'.
  109.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~#}[ \t\0\x0B]*
  110.                     '|'.
  111.                     preg_quote($this->options['tag_comment'][1], '#').'\n?'// #}\n?
  112.                 ')
  113.             }sx',
  114.             // verbatim %}
  115.             'lex_block_raw' => '{
  116.                 \s*verbatim\s*
  117.                 (?:'.
  118.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'// -%}\s*
  119.                     '|'.
  120.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  121.                     '|'.
  122.                     preg_quote($this->options['tag_block'][1], '#'). // %}
  123.                 ')
  124.             }Asx',
  125.             'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  126.             // {{ or {% or {#
  127.             'lex_tokens_start' => '{
  128.                 ('.
  129.                     preg_quote($this->options['tag_variable'][0], '#'). // {{
  130.                     '|'.
  131.                     preg_quote($this->options['tag_block'][0], '#'). // {%
  132.                     '|'.
  133.                     preg_quote($this->options['tag_comment'][0], '#'). // {#
  134.                 ')('.
  135.                     preg_quote($this->options['whitespace_trim'], '#'). // -
  136.                     '|'.
  137.                     preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  138.                 ')?
  139.             }sx',
  140.             'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  141.             'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  142.         ];
  143.         $this->isInitialized true;
  144.     }
  145.     public function tokenize(Source $source): TokenStream
  146.     {
  147.         $this->initialize();
  148.         $this->source $source;
  149.         $this->code str_replace(["\r\n""\r"], "\n"$source->getCode());
  150.         $this->cursor 0;
  151.         $this->lineno 1;
  152.         $this->end \strlen($this->code);
  153.         $this->tokens = [];
  154.         $this->state self::STATE_DATA;
  155.         $this->states = [];
  156.         $this->brackets = [];
  157.         $this->position = -1;
  158.         // find all token starts in one go
  159.         preg_match_all($this->regexes['lex_tokens_start'], $this->code$matches\PREG_OFFSET_CAPTURE);
  160.         $this->positions $matches;
  161.         while ($this->cursor $this->end) {
  162.             // dispatch to the lexing functions depending
  163.             // on the current state
  164.             switch ($this->state) {
  165.                 case self::STATE_DATA:
  166.                     $this->lexData();
  167.                     break;
  168.                 case self::STATE_BLOCK:
  169.                     $this->lexBlock();
  170.                     break;
  171.                 case self::STATE_VAR:
  172.                     $this->lexVar();
  173.                     break;
  174.                 case self::STATE_STRING:
  175.                     $this->lexString();
  176.                     break;
  177.                 case self::STATE_INTERPOLATION:
  178.                     $this->lexInterpolation();
  179.                     break;
  180.             }
  181.         }
  182.         $this->pushToken(/* Token::EOF_TYPE */ -1);
  183.         if (!empty($this->brackets)) {
  184.             [$expect$lineno] = array_pop($this->brackets);
  185.             throw new SyntaxError(sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  186.         }
  187.         return new TokenStream($this->tokens$this->source);
  188.     }
  189.     private function lexData(): void
  190.     {
  191.         // if no matches are left we return the rest of the template as simple text token
  192.         if ($this->position == \count($this->positions[0]) - 1) {
  193.             $this->pushToken(/* Token::TEXT_TYPE */ 0substr($this->code$this->cursor));
  194.             $this->cursor $this->end;
  195.             return;
  196.         }
  197.         // Find the first token after the current cursor
  198.         $position $this->positions[0][++$this->position];
  199.         while ($position[1] < $this->cursor) {
  200.             if ($this->position == \count($this->positions[0]) - 1) {
  201.                 return;
  202.             }
  203.             $position $this->positions[0][++$this->position];
  204.         }
  205.         // push the template text first
  206.         $text $textContent substr($this->code$this->cursor$position[1] - $this->cursor);
  207.         // trim?
  208.         if (isset($this->positions[2][$this->position][0])) {
  209.             if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  210.                 // whitespace_trim detected ({%-, {{- or {#-)
  211.                 $text rtrim($text);
  212.             } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  213.                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  214.                 // don't trim \r and \n
  215.                 $text rtrim($text" \t\0\x0B");
  216.             }
  217.         }
  218.         $this->pushToken(/* Token::TEXT_TYPE */ 0$text);
  219.         $this->moveCursor($textContent.$position[0]);
  220.         switch ($this->positions[1][$this->position][0]) {
  221.             case $this->options['tag_comment'][0]:
  222.                 $this->lexComment();
  223.                 break;
  224.             case $this->options['tag_block'][0]:
  225.                 // raw data?
  226.                 if (preg_match($this->regexes['lex_block_raw'], $this->code$match0$this->cursor)) {
  227.                     $this->moveCursor($match[0]);
  228.                     $this->lexRawData();
  229.                 // {% line \d+ %}
  230.                 } elseif (preg_match($this->regexes['lex_block_line'], $this->code$match0$this->cursor)) {
  231.                     $this->moveCursor($match[0]);
  232.                     $this->lineno = (int) $match[1];
  233.                 } else {
  234.                     $this->pushToken(/* Token::BLOCK_START_TYPE */ 1);
  235.                     $this->pushState(self::STATE_BLOCK);
  236.                     $this->currentVarBlockLine $this->lineno;
  237.                 }
  238.                 break;
  239.             case $this->options['tag_variable'][0]:
  240.                 $this->pushToken(/* Token::VAR_START_TYPE */ 2);
  241.                 $this->pushState(self::STATE_VAR);
  242.                 $this->currentVarBlockLine $this->lineno;
  243.                 break;
  244.         }
  245.     }
  246.     private function lexBlock(): void
  247.     {
  248.         if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code$match0$this->cursor)) {
  249.             $this->pushToken(/* Token::BLOCK_END_TYPE */ 3);
  250.             $this->moveCursor($match[0]);
  251.             $this->popState();
  252.         } else {
  253.             $this->lexExpression();
  254.         }
  255.     }
  256.     private function lexVar(): void
  257.     {
  258.         if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code$match0$this->cursor)) {
  259.             $this->pushToken(/* Token::VAR_END_TYPE */ 4);
  260.             $this->moveCursor($match[0]);
  261.             $this->popState();
  262.         } else {
  263.             $this->lexExpression();
  264.         }
  265.     }
  266.     private function lexExpression(): void
  267.     {
  268.         // whitespace
  269.         if (preg_match('/\s+/A'$this->code$match0$this->cursor)) {
  270.             $this->moveCursor($match[0]);
  271.             if ($this->cursor >= $this->end) {
  272.                 throw new SyntaxError(sprintf('Unclosed "%s".'self::STATE_BLOCK === $this->state 'block' 'variable'), $this->currentVarBlockLine$this->source);
  273.             }
  274.         }
  275.         // spread operator
  276.         if ('.' === $this->code[$this->cursor] && ($this->cursor $this->end) && '.' === $this->code[$this->cursor 1] && '.' === $this->code[$this->cursor 2]) {
  277.             $this->pushToken(Token::SPREAD_TYPE'...');
  278.             $this->moveCursor('...');
  279.         }
  280.         // arrow function
  281.         elseif ('=' === $this->code[$this->cursor] && '>' === $this->code[$this->cursor 1]) {
  282.             $this->pushToken(Token::ARROW_TYPE'=>');
  283.             $this->moveCursor('=>');
  284.         }
  285.         // operators
  286.         elseif (preg_match($this->regexes['operator'], $this->code$match0$this->cursor)) {
  287.             $this->pushToken(/* Token::OPERATOR_TYPE */ 8preg_replace('/\s+/'' '$match[0]));
  288.             $this->moveCursor($match[0]);
  289.         }
  290.         // names
  291.         elseif (preg_match(self::REGEX_NAME$this->code$match0$this->cursor)) {
  292.             $this->pushToken(/* Token::NAME_TYPE */ 5$match[0]);
  293.             $this->moveCursor($match[0]);
  294.         }
  295.         // numbers
  296.         elseif (preg_match(self::REGEX_NUMBER$this->code$match0$this->cursor)) {
  297.             $number = (float) $match[0];  // floats
  298.             if (ctype_digit($match[0]) && $number <= \PHP_INT_MAX) {
  299.                 $number = (int) $match[0]; // integers lower than the maximum
  300.             }
  301.             $this->pushToken(/* Token::NUMBER_TYPE */ 6$number);
  302.             $this->moveCursor($match[0]);
  303.         }
  304.         // punctuation
  305.         elseif (str_contains(self::PUNCTUATION$this->code[$this->cursor])) {
  306.             // opening bracket
  307.             if (str_contains('([{'$this->code[$this->cursor])) {
  308.                 $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
  309.             }
  310.             // closing bracket
  311.             elseif (str_contains(')]}'$this->code[$this->cursor])) {
  312.                 if (empty($this->brackets)) {
  313.                     throw new SyntaxError(sprintf('Unexpected "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  314.                 }
  315.                 [$expect$lineno] = array_pop($this->brackets);
  316.                 if ($this->code[$this->cursor] != strtr($expect'([{'')]}')) {
  317.                     throw new SyntaxError(sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  318.                 }
  319.             }
  320.             $this->pushToken(/* Token::PUNCTUATION_TYPE */ 9$this->code[$this->cursor]);
  321.             ++$this->cursor;
  322.         }
  323.         // strings
  324.         elseif (preg_match(self::REGEX_STRING$this->code$match0$this->cursor)) {
  325.             $this->pushToken(/* Token::STRING_TYPE */ 7stripcslashes(substr($match[0], 1, -1)));
  326.             $this->moveCursor($match[0]);
  327.         }
  328.         // opening double quoted string
  329.         elseif (preg_match(self::REGEX_DQ_STRING_DELIM$this->code$match0$this->cursor)) {
  330.             $this->brackets[] = ['"'$this->lineno];
  331.             $this->pushState(self::STATE_STRING);
  332.             $this->moveCursor($match[0]);
  333.         }
  334.         // unlexable
  335.         else {
  336.             throw new SyntaxError(sprintf('Unexpected character "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  337.         }
  338.     }
  339.     private function lexRawData(): void
  340.     {
  341.         if (!preg_match($this->regexes['lex_raw_data'], $this->code$match\PREG_OFFSET_CAPTURE$this->cursor)) {
  342.             throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.'$this->lineno$this->source);
  343.         }
  344.         $text substr($this->code$this->cursor$match[0][1] - $this->cursor);
  345.         $this->moveCursor($text.$match[0][0]);
  346.         // trim?
  347.         if (isset($match[1][0])) {
  348.             if ($this->options['whitespace_trim'] === $match[1][0]) {
  349.                 // whitespace_trim detected ({%-, {{- or {#-)
  350.                 $text rtrim($text);
  351.             } else {
  352.                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  353.                 // don't trim \r and \n
  354.                 $text rtrim($text" \t\0\x0B");
  355.             }
  356.         }
  357.         $this->pushToken(/* Token::TEXT_TYPE */ 0$text);
  358.     }
  359.     private function lexComment(): void
  360.     {
  361.         if (!preg_match($this->regexes['lex_comment'], $this->code$match\PREG_OFFSET_CAPTURE$this->cursor)) {
  362.             throw new SyntaxError('Unclosed comment.'$this->lineno$this->source);
  363.         }
  364.         $this->moveCursor(substr($this->code$this->cursor$match[0][1] - $this->cursor).$match[0][0]);
  365.     }
  366.     private function lexString(): void
  367.     {
  368.         if (preg_match($this->regexes['interpolation_start'], $this->code$match0$this->cursor)) {
  369.             $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  370.             $this->pushToken(/* Token::INTERPOLATION_START_TYPE */ 10);
  371.             $this->moveCursor($match[0]);
  372.             $this->pushState(self::STATE_INTERPOLATION);
  373.         } elseif (preg_match(self::REGEX_DQ_STRING_PART$this->code$match0$this->cursor) && '' !== $match[0]) {
  374.             $this->pushToken(/* Token::STRING_TYPE */ 7stripcslashes($match[0]));
  375.             $this->moveCursor($match[0]);
  376.         } elseif (preg_match(self::REGEX_DQ_STRING_DELIM$this->code$match0$this->cursor)) {
  377.             [$expect$lineno] = array_pop($this->brackets);
  378.             if ('"' != $this->code[$this->cursor]) {
  379.                 throw new SyntaxError(sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  380.             }
  381.             $this->popState();
  382.             ++$this->cursor;
  383.         } else {
  384.             // unlexable
  385.             throw new SyntaxError(sprintf('Unexpected character "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  386.         }
  387.     }
  388.     private function lexInterpolation(): void
  389.     {
  390.         $bracket end($this->brackets);
  391.         if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code$match0$this->cursor)) {
  392.             array_pop($this->brackets);
  393.             $this->pushToken(/* Token::INTERPOLATION_END_TYPE */ 11);
  394.             $this->moveCursor($match[0]);
  395.             $this->popState();
  396.         } else {
  397.             $this->lexExpression();
  398.         }
  399.     }
  400.     private function pushToken($type$value ''): void
  401.     {
  402.         // do not push empty text tokens
  403.         if (/* Token::TEXT_TYPE */ === $type && '' === $value) {
  404.             return;
  405.         }
  406.         $this->tokens[] = new Token($type$value$this->lineno);
  407.     }
  408.     private function moveCursor($text): void
  409.     {
  410.         $this->cursor += \strlen($text);
  411.         $this->lineno += substr_count($text"\n");
  412.     }
  413.     private function getOperatorRegex(): string
  414.     {
  415.         $operators array_merge(
  416.             ['='],
  417.             array_keys($this->env->getUnaryOperators()),
  418.             array_keys($this->env->getBinaryOperators())
  419.         );
  420.         $operators array_combine($operatorsarray_map('strlen'$operators));
  421.         arsort($operators);
  422.         $regex = [];
  423.         foreach ($operators as $operator => $length) {
  424.             // an operator that ends with a character must be followed by
  425.             // a whitespace, a parenthesis, an opening map [ or sequence {
  426.             $r preg_quote($operator'/');
  427.             if (ctype_alpha($operator[$length 1])) {
  428.                 $r .= '(?=[\s()\[{])';
  429.             }
  430.             // an operator that begins with a character must not have a dot or pipe before
  431.             if (ctype_alpha($operator[0])) {
  432.                 $r '(?<![\.\|])'.$r;
  433.             }
  434.             // an operator with a space can be any amount of whitespaces
  435.             $r preg_replace('/\s+/''\s+'$r);
  436.             $regex[] = $r;
  437.         }
  438.         return '/'.implode('|'$regex).'/A';
  439.     }
  440.     private function pushState($state): void
  441.     {
  442.         $this->states[] = $this->state;
  443.         $this->state $state;
  444.     }
  445.     private function popState(): void
  446.     {
  447.         if (=== \count($this->states)) {
  448.             throw new \LogicException('Cannot pop state without a previous state.');
  449.         }
  450.         $this->state array_pop($this->states);
  451.     }
  452. }