rebuildParser.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. <?php
  2. $grammarFile = __DIR__ . '/zend_language_parser.phpy';
  3. $skeletonFile = __DIR__ . '/kmyacc.php.parser';
  4. $tmpGrammarFile = __DIR__ . '/tmp_parser.phpy';
  5. $tmpResultFile = __DIR__ . '/tmp_parser.php';
  6. $parserResultFile = __DIR__ . '/../lib/PHPParser/Parser.php';
  7. $debugParserResultFile = __DIR__ . '/../lib/PHPParser/Parser/Debug.php';
  8. // check for kmyacc.exe binary in this directory, otherwise fall back to global name
  9. $kmyacc = __DIR__ . '/kmyacc.exe';
  10. if (!file_exists($kmyacc)) {
  11. $kmyacc = 'kmyacc';
  12. }
  13. $options = array_flip($argv);
  14. $optionDebug = isset($options['--debug']);
  15. $optionKeepTmpGrammar = isset($options['--keep-tmp-grammar']);
  16. ///////////////////////////////
  17. /// Utility regex constants ///
  18. ///////////////////////////////
  19. const LIB = '(?(DEFINE)
  20. (?<singleQuotedString>\'[^\\\\\']*+(?:\\\\.[^\\\\\']*+)*+\')
  21. (?<doubleQuotedString>"[^\\\\"]*+(?:\\\\.[^\\\\"]*+)*+")
  22. (?<string>(?&singleQuotedString)|(?&doubleQuotedString))
  23. (?<comment>/\*[^*]*+(?:\*(?!/)[^*]*+)*+\*/)
  24. (?<code>\{[^\'"/{}]*+(?:(?:(?&string)|(?&comment)|(?&code)|/)[^\'"/{}]*+)*+})
  25. )';
  26. const PARAMS = '\[(?<params>[^[\]]*+(?:\[(?&params)\][^[\]]*+)*+)\]';
  27. const ARGS = '\((?<args>[^()]*+(?:\((?&args)\)[^()]*+)*+)\)';
  28. ///////////////////
  29. /// Main script ///
  30. ///////////////////
  31. echo 'Building temporary preproprocessed grammar file.', "\n";
  32. $grammarCode = file_get_contents($grammarFile);
  33. $grammarCode = resolveConstants($grammarCode);
  34. $grammarCode = resolveNodes($grammarCode);
  35. $grammarCode = resolveMacros($grammarCode);
  36. $grammarCode = resolveArrays($grammarCode);
  37. file_put_contents($tmpGrammarFile, $grammarCode);
  38. echo "Building parser.\n";
  39. $output = trim(shell_exec("$kmyacc -l -m $skeletonFile -p PHPParser_Parser $tmpGrammarFile 2>&1"));
  40. echo "Output: \"$output\"\n";
  41. moveFileWithDirCheck($tmpResultFile, $parserResultFile);
  42. if ($optionDebug) {
  43. echo "Building debug parser.\n";
  44. $output = trim(shell_exec("$kmyacc -t -v -l -m $skeletonFile -p PHPParser_Parser $tmpGrammarFile 2>&1"));
  45. echo "Output: \"$output\"\n";
  46. moveFileWithDirCheck($tmpResultFile, $debugParserResultFile);
  47. }
  48. if (!$optionKeepTmpGrammar) {
  49. unlink($tmpGrammarFile);
  50. }
  51. ///////////////////////////////
  52. /// Preprocessing functions ///
  53. ///////////////////////////////
  54. function resolveConstants($code) {
  55. return preg_replace('~[A-Z][a-zA-Z_]++::~', 'PHPParser_Node_$0', $code);
  56. }
  57. function resolveNodes($code) {
  58. return preg_replace_callback(
  59. '~(?<name>[A-Z][a-zA-Z_]++)\s*' . PARAMS . '~',
  60. function($matches) {
  61. // recurse
  62. $matches['params'] = resolveNodes($matches['params']);
  63. $params = magicSplit(
  64. '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,',
  65. $matches['params']
  66. );
  67. $paramCode = '';
  68. foreach ($params as $param) {
  69. $paramCode .= $param . ', ';
  70. }
  71. return 'new PHPParser_Node_' . $matches['name'] . '(' . $paramCode . '$attributes)';
  72. },
  73. $code
  74. );
  75. }
  76. function resolveMacros($code) {
  77. return preg_replace_callback(
  78. '~\b(?<!::|->)(?!array\()(?<name>[a-z][A-Za-z]++)' . ARGS . '~',
  79. function($matches) {
  80. // recurse
  81. $matches['args'] = resolveMacros($matches['args']);
  82. $name = $matches['name'];
  83. $args = magicSplit(
  84. '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,',
  85. $matches['args']
  86. );
  87. if ('error' == $name) {
  88. assertArgs(1, $args, $name);
  89. return 'throw new PHPParser_Error(' . $args[0] . ')';
  90. }
  91. if ('init' == $name) {
  92. return '$$ = array(' . implode(', ', $args) . ')';
  93. }
  94. if ('push' == $name) {
  95. assertArgs(2, $args, $name);
  96. return $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0];
  97. }
  98. if ('pushNormalizing' == $name) {
  99. assertArgs(2, $args, $name);
  100. return 'if (is_array(' . $args[1] . ')) { $$ = array_merge(' . $args[0] . ', ' . $args[1] . '); } else { ' . $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0] . '; }';
  101. }
  102. if ('toArray' == $name) {
  103. assertArgs(1, $args, $name);
  104. return 'is_array(' . $args[0] . ') ? ' . $args[0] . ' : array(' . $args[0] . ')';
  105. }
  106. if ('parseVar' == $name) {
  107. assertArgs(1, $args, $name);
  108. return 'substr(' . $args[0] . ', 1)';
  109. }
  110. if ('parseEncapsed' == $name) {
  111. assertArgs(2, $args, $name);
  112. return 'foreach (' . $args[0] . ' as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, ' . $args[1] . '); } }';
  113. }
  114. if ('parseEncapsedDoc' == $name) {
  115. assertArgs(1, $args, $name);
  116. return 'foreach (' . $args[0] . ' as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, null); } } $s = preg_replace(\'~(\r\n|\n|\r)$~\', \'\', $s); if (\'\' === $s) array_pop(' . $args[0] . ');';
  117. }
  118. throw new Exception(sprintf('Unknown macro "%s"', $name));
  119. },
  120. $code
  121. );
  122. }
  123. function assertArgs($num, $args, $name) {
  124. if ($num != count($args)) {
  125. die('Wrong argument count for ' . $name . '().');
  126. }
  127. }
  128. function resolveArrays($code) {
  129. return preg_replace_callback(
  130. '~' . PARAMS . '~',
  131. function ($matches) {
  132. $elements = magicSplit(
  133. '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,',
  134. $matches['params']
  135. );
  136. // don't convert [] to array, it might have different meaning
  137. if (empty($elements)) {
  138. return $matches[0];
  139. }
  140. $elementCodes = array();
  141. foreach ($elements as $element) {
  142. // convert only arrays where all elements have keys
  143. if (false === strpos($element, ':')) {
  144. return $matches[0];
  145. }
  146. list($key, $value) = explode(':', $element, 2);
  147. $elementCodes[] = "'" . $key . "' =>" . $value;
  148. }
  149. return 'array(' . implode(', ', $elementCodes) . ')';
  150. },
  151. $code
  152. );
  153. }
  154. function moveFileWithDirCheck($fromPath, $toPath) {
  155. $dir = dirname($toPath);
  156. if (!is_dir($dir)) {
  157. mkdir($dir, 0777, true);
  158. }
  159. rename($fromPath, $toPath);
  160. }
  161. //////////////////////////////
  162. /// Regex helper functions ///
  163. //////////////////////////////
  164. function regex($regex) {
  165. return '~' . LIB . '(?:' . str_replace('~', '\~', $regex) . ')~';
  166. }
  167. function magicSplit($regex, $string) {
  168. $pieces = preg_split(regex('(?:(?&string)|(?&comment)|(?&code))(*SKIP)(*FAIL)|' . $regex), $string);
  169. foreach ($pieces as &$piece) {
  170. $piece = trim($piece);
  171. }
  172. return array_filter($pieces);
  173. }