Emulative.php 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. <?php
  2. /**
  3. * ATTENTION: This code is WRITE-ONLY. Do not try to read it.
  4. */
  5. class PHPParser_Lexer_Emulative extends PHPParser_Lexer
  6. {
  7. protected $newKeywords;
  8. protected $inObjectAccess;
  9. public function __construct() {
  10. parent::__construct();
  11. $newKeywordsPerVersion = array(
  12. '5.5.0-dev' => array(
  13. 'finally' => PHPParser_Parser::T_FINALLY,
  14. 'yield' => PHPParser_Parser::T_YIELD,
  15. ),
  16. '5.4.0-dev' => array(
  17. 'callable' => PHPParser_Parser::T_CALLABLE,
  18. 'insteadof' => PHPParser_Parser::T_INSTEADOF,
  19. 'trait' => PHPParser_Parser::T_TRAIT,
  20. '__trait__' => PHPParser_Parser::T_TRAIT_C,
  21. ),
  22. '5.3.0-dev' => array(
  23. '__dir__' => PHPParser_Parser::T_DIR,
  24. 'goto' => PHPParser_Parser::T_GOTO,
  25. 'namespace' => PHPParser_Parser::T_NAMESPACE,
  26. '__namespace__' => PHPParser_Parser::T_NS_C,
  27. ),
  28. );
  29. $this->newKeywords = array();
  30. foreach ($newKeywordsPerVersion as $version => $newKeywords) {
  31. if (version_compare(PHP_VERSION, $version, '>=')) {
  32. break;
  33. }
  34. $this->newKeywords += $newKeywords;
  35. }
  36. }
  37. public function startLexing($code) {
  38. $this->inObjectAccess = false;
  39. // on PHP 5.4 don't do anything
  40. if (version_compare(PHP_VERSION, '5.4.0RC1', '>=')) {
  41. parent::startLexing($code);
  42. } else {
  43. $code = $this->preprocessCode($code);
  44. parent::startLexing($code);
  45. $this->postprocessTokens();
  46. }
  47. }
  48. /*
  49. * Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences.
  50. * ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely
  51. * use it here.
  52. * Later when preprocessing the tokens these sequences will either be replaced
  53. * by real tokens or replaced with their original content (e.g. if they occured
  54. * inside a string, i.e. a place where they don't have a special meaning).
  55. */
  56. protected function preprocessCode($code) {
  57. // binary notation (0b010101101001...)
  58. $code = preg_replace('(\b0b[01]+\b)', '~__EMU__BINARY__$0__~', $code);
  59. if (version_compare(PHP_VERSION, '5.3.0', '<')) {
  60. // namespace separator (backslash not followed by some special characters,
  61. // which are not valid after a NS separator, but would cause problems with
  62. // escape sequence parsing if one would replace the backslash there)
  63. $code = preg_replace('(\\\\(?!["\'`${\\\\]))', '~__EMU__NS__~', $code);
  64. // nowdoc (<<<'ABC'\ncontent\nABC;)
  65. $code = preg_replace_callback(
  66. '((*BSR_ANYCRLF) # set \R to (?>\r\n|\r|\n)
  67. (b?<<<[\t ]*\'([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)\'\R) # opening token
  68. ((?:(?!\2;?\R).*\R)*) # content
  69. (\2) # closing token
  70. (?=;?\R) # must be followed by newline (with optional semicolon)
  71. )x',
  72. array($this, 'encodeNowdocCallback'),
  73. $code
  74. );
  75. }
  76. return $code;
  77. }
  78. /*
  79. * As nowdocs can have arbitrary content but LABELs can only contain a certain
  80. * range of characters, the nowdoc content is encoded as hex and separated by
  81. * 'x' tokens. So the result of the encoding will look like this:
  82. * ~__EMU__NOWDOC__{HEX(START_TOKEN)}x{HEX(CONTENT)}x{HEX(END_TOKEN)}~
  83. */
  84. public function encodeNowdocCallback(array $matches) {
  85. return '~__EMU__NOWDOC__'
  86. . bin2hex($matches[1]) . 'x' . bin2hex($matches[3]) . 'x' . bin2hex($matches[4])
  87. . '__~';
  88. }
  89. /*
  90. * Replaces the ~__EMU__...~ sequences with real tokens or their original
  91. * value.
  92. */
  93. protected function postprocessTokens() {
  94. // we need to manually iterate and manage a count because we'll change
  95. // the tokens array on the way
  96. for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) {
  97. // first check that the following tokens are form ~LABEL~,
  98. // then match the __EMU__... sequence.
  99. if ('~' === $this->tokens[$i]
  100. && isset($this->tokens[$i + 2])
  101. && '~' === $this->tokens[$i + 2]
  102. && T_STRING === $this->tokens[$i + 1][0]
  103. && preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches)
  104. ) {
  105. if ('BINARY' === $matches[1]) {
  106. // the binary number can either be an integer or a double, so return a LNUMBER
  107. // or DNUMBER respectively
  108. $replace = array(
  109. array(is_int(bindec($matches[2])) ? T_LNUMBER : T_DNUMBER, $matches[2], $this->tokens[$i + 1][2])
  110. );
  111. } elseif ('NS' === $matches[1]) {
  112. // a \ single char token is returned here and replaced by a
  113. // PHPParser_Parser::T_NS_SEPARATOR token in ->getNextToken(). This hacks around
  114. // the limitations arising from T_NS_SEPARATOR not being defined on 5.3
  115. $replace = array('\\');
  116. } elseif ('NOWDOC' === $matches[1]) {
  117. // decode the encoded nowdoc payload; pack('H*' is bin2hex( for 5.3
  118. list($start, $content, $end) = explode('x', $matches[2]);
  119. list($start, $content, $end) = array(pack('H*', $start), pack('H*', $content), pack('H*', $end));
  120. $replace = array();
  121. $replace[] = array(T_START_HEREDOC, $start, $this->tokens[$i + 1][2]);
  122. if ('' !== $content) {
  123. $replace[] = array(T_ENCAPSED_AND_WHITESPACE, $content, -1);
  124. }
  125. $replace[] = array(T_END_HEREDOC, $end, -1);
  126. } else {
  127. // just ignore all other __EMU__ sequences
  128. continue;
  129. }
  130. array_splice($this->tokens, $i, 3, $replace);
  131. $c -= 3 - count($replace);
  132. // for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences
  133. // in their content with the original character sequence
  134. } elseif (is_array($this->tokens[$i])
  135. && 0 !== strpos($this->tokens[$i][1], '__EMU__')
  136. ) {
  137. $this->tokens[$i][1] = preg_replace_callback(
  138. '(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)',
  139. array($this, 'restoreContentCallback'),
  140. $this->tokens[$i][1]
  141. );
  142. }
  143. }
  144. }
  145. /*
  146. * This method is a callback for restoring EMU sequences in
  147. * multichar tokens (like strings) to their original value.
  148. */
  149. public function restoreContentCallback(array $matches) {
  150. if ('BINARY' === $matches[1]) {
  151. return $matches[2];
  152. } elseif ('NS' === $matches[1]) {
  153. return '\\';
  154. } elseif ('NOWDOC' === $matches[1]) {
  155. list($start, $content, $end) = explode('x', $matches[2]);
  156. return pack('H*', $start) . pack('H*', $content) . pack('H*', $end);
  157. } else {
  158. return $matches[0];
  159. }
  160. }
  161. public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) {
  162. $token = parent::getNextToken($value, $startAttributes, $endAttributes);
  163. // replace new keywords by their respective tokens. This is not done
  164. // if we currently are in an object access (e.g. in $obj->namespace
  165. // "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE)
  166. if (PHPParser_Parser::T_STRING === $token && !$this->inObjectAccess) {
  167. if (isset($this->newKeywords[strtolower($value)])) {
  168. return $this->newKeywords[strtolower($value)];
  169. }
  170. // backslashes are replaced by T_NS_SEPARATOR tokens
  171. } elseif (92 === $token) { // ord('\\')
  172. return PHPParser_Parser::T_NS_SEPARATOR;
  173. // keep track of whether we currently are in an object access (after ->)
  174. } elseif (PHPParser_Parser::T_OBJECT_OPERATOR === $token) {
  175. $this->inObjectAccess = true;
  176. } else {
  177. $this->inObjectAccess = false;
  178. }
  179. return $token;
  180. }
  181. }