Lexer.php 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. <?php
  2. class PHPParser_Lexer
  3. {
  4. protected $code;
  5. protected $tokens;
  6. protected $pos;
  7. protected $line;
  8. protected $tokenMap;
  9. protected $dropTokens;
  10. /**
  11. * Creates a Lexer.
  12. */
  13. public function __construct() {
  14. // map from internal tokens to PHPParser tokens
  15. $this->tokenMap = $this->createTokenMap();
  16. // map of tokens to drop while lexing (the map is only used for isset lookup,
  17. // that's why the value is simply set to 1; the value is never actually used.)
  18. $this->dropTokens = array_fill_keys(array(T_WHITESPACE, T_OPEN_TAG), 1);
  19. }
  20. /**
  21. * Initializes the lexer for lexing the provided source code.
  22. *
  23. * @param string $code The source code to lex
  24. *
  25. * @throws PHPParser_Error on lexing errors (unterminated comment or unexpected character)
  26. */
  27. public function startLexing($code) {
  28. $this->resetErrors();
  29. $this->tokens = @token_get_all($code);
  30. $this->handleErrors();
  31. $this->code = $code; // keep the code around for __halt_compiler() handling
  32. $this->pos = -1;
  33. $this->line = 1;
  34. }
  35. protected function resetErrors() {
  36. // clear error_get_last() by forcing an undefined variable error
  37. @$undefinedVariable;
  38. }
  39. protected function handleErrors() {
  40. $error = error_get_last();
  41. if (preg_match(
  42. '~^Unterminated comment starting line ([0-9]+)$~',
  43. $error['message'], $matches
  44. )) {
  45. throw new PHPParser_Error('Unterminated comment', $matches[1]);
  46. }
  47. if (preg_match(
  48. '~^Unexpected character in input: \'(.)\' \(ASCII=([0-9]+)\)~s',
  49. $error['message'], $matches
  50. )) {
  51. throw new PHPParser_Error(sprintf(
  52. 'Unexpected character "%s" (ASCII %d)',
  53. $matches[1], $matches[2]
  54. ));
  55. }
  56. // PHP cuts error message after null byte, so need special case
  57. if (preg_match('~^Unexpected character in input: \'$~', $error['message'])) {
  58. throw new PHPParser_Error('Unexpected null byte');
  59. }
  60. }
  61. /**
  62. * Fetches the next token.
  63. *
  64. * @param mixed $value Variable to store token content in
  65. * @param mixed $startAttributes Variable to store start attributes in
  66. * @param mixed $endAttributes Variable to store end attributes in
  67. *
  68. * @return int Token id
  69. */
  70. public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) {
  71. $startAttributes = array();
  72. $endAttributes = array();
  73. while (isset($this->tokens[++$this->pos])) {
  74. $token = $this->tokens[$this->pos];
  75. if (is_string($token)) {
  76. $startAttributes['startLine'] = $this->line;
  77. $endAttributes['endLine'] = $this->line;
  78. // bug in token_get_all
  79. if ('b"' === $token) {
  80. $value = 'b"';
  81. return ord('"');
  82. } else {
  83. $value = $token;
  84. return ord($token);
  85. }
  86. } else {
  87. $this->line += substr_count($token[1], "\n");
  88. if (T_COMMENT === $token[0]) {
  89. $startAttributes['comments'][] = new PHPParser_Comment($token[1], $token[2]);
  90. } elseif (T_DOC_COMMENT === $token[0]) {
  91. $startAttributes['comments'][] = new PHPParser_Comment_Doc($token[1], $token[2]);
  92. } elseif (!isset($this->dropTokens[$token[0]])) {
  93. $value = $token[1];
  94. $startAttributes['startLine'] = $token[2];
  95. $endAttributes['endLine'] = $this->line;
  96. return $this->tokenMap[$token[0]];
  97. }
  98. }
  99. }
  100. $startAttributes['startLine'] = $this->line;
  101. // 0 is the EOF token
  102. return 0;
  103. }
  104. /**
  105. * Handles __halt_compiler() by returning the text after it.
  106. *
  107. * @return string Remaining text
  108. */
  109. public function handleHaltCompiler() {
  110. // get the length of the text before the T_HALT_COMPILER token
  111. $textBefore = '';
  112. for ($i = 0; $i <= $this->pos; ++$i) {
  113. if (is_string($this->tokens[$i])) {
  114. $textBefore .= $this->tokens[$i];
  115. } else {
  116. $textBefore .= $this->tokens[$i][1];
  117. }
  118. }
  119. // text after T_HALT_COMPILER, still including ();
  120. $textAfter = substr($this->code, strlen($textBefore));
  121. // ensure that it is followed by ();
  122. // this simplifies the situation, by not allowing any comments
  123. // in between of the tokens.
  124. if (!preg_match('~\s*\(\s*\)\s*(?:;|\?>\r?\n?)~', $textAfter, $matches)) {
  125. throw new PHPParser_Error('__halt_compiler must be followed by "();"');
  126. }
  127. // prevent the lexer from returning any further tokens
  128. $this->pos = count($this->tokens);
  129. // return with (); removed
  130. return (string) substr($textAfter, strlen($matches[0])); // (string) converts false to ''
  131. }
  132. /**
  133. * Creates the token map.
  134. *
  135. * The token map maps the PHP internal token identifiers
  136. * to the identifiers used by the Parser. Additionally it
  137. * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
  138. *
  139. * @return array The token map
  140. */
  141. protected function createTokenMap() {
  142. $tokenMap = array();
  143. // 256 is the minimum possible token number, as everything below
  144. // it is an ASCII value
  145. for ($i = 256; $i < 1000; ++$i) {
  146. // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
  147. if (T_DOUBLE_COLON === $i) {
  148. $tokenMap[$i] = PHPParser_Parser::T_PAAMAYIM_NEKUDOTAYIM;
  149. // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
  150. } elseif(T_OPEN_TAG_WITH_ECHO === $i) {
  151. $tokenMap[$i] = PHPParser_Parser::T_ECHO;
  152. // T_CLOSE_TAG is equivalent to ';'
  153. } elseif(T_CLOSE_TAG === $i) {
  154. $tokenMap[$i] = ord(';');
  155. // and the others can be mapped directly
  156. } elseif ('UNKNOWN' !== ($name = token_name($i))
  157. && defined($name = 'PHPParser_Parser::' . $name)
  158. ) {
  159. $tokenMap[$i] = constant($name);
  160. }
  161. }
  162. return $tokenMap;
  163. }
  164. }