analyze.php 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. <?php
  2. const GRAMMAR_FILE = './zend_language_parser.phpy';
  3. const LIB = '(?(DEFINE)
  4. (?<singleQuotedString>\'[^\\\\\']*+(?:\\\\.[^\\\\\']*+)*+\')
  5. (?<doubleQuotedString>"[^\\\\"]*+(?:\\\\.[^\\\\"]*+)*+")
  6. (?<string>(?&singleQuotedString)|(?&doubleQuotedString))
  7. (?<comment>/\*[^*]*+(?:\*(?!/)[^*]*+)*+\*/)
  8. (?<code>\{[^\'"/{}]*+(?:(?:(?&string)|(?&comment)|(?&code)|/)[^\'"/{}]*+)*+})
  9. )';
  10. const RULE_BLOCK = '(?<name>[a-z_]++):(?<rules>[^\'"/{};]*+(?:(?:(?&string)|(?&comment)|(?&code)|/|})[^\'"/{};]*+)*+);';
  11. $usedTerminals = array_flip(array(
  12. 'T_VARIABLE', 'T_STRING', 'T_INLINE_HTML', 'T_ENCAPSED_AND_WHITESPACE',
  13. 'T_LNUMBER', 'T_DNUMBER', 'T_CONSTANT_ENCAPSED_STRING', 'T_STRING_VARNAME', 'T_NUM_STRING'
  14. ));
  15. $unusedNonterminals = array_flip(array(
  16. 'case_separator', 'optional_comma'
  17. ));
  18. function regex($regex) {
  19. return '~' . LIB . '(?:' . str_replace('~', '\~', $regex) . ')~';
  20. }
  21. function magicSplit($regex, $string) {
  22. $pieces = preg_split(regex('(?:(?&string)|(?&comment)|(?&code))(*SKIP)(*FAIL)|' . $regex), $string);
  23. foreach ($pieces as &$piece) {
  24. $piece = trim($piece);
  25. }
  26. return array_filter($pieces);
  27. }
  28. echo '<pre>';
  29. ////////////////////
  30. ////////////////////
  31. ////////////////////
  32. list($defs, $ruleBlocks) = magicSplit('%%', file_get_contents(GRAMMAR_FILE));
  33. if ('' !== trim(preg_replace(regex(RULE_BLOCK), '', $ruleBlocks))) {
  34. die('Not all rule blocks were properly recognized!');
  35. }
  36. preg_match_all(regex(RULE_BLOCK), $ruleBlocks, $ruleBlocksMatches, PREG_SET_ORDER);
  37. foreach ($ruleBlocksMatches as $match) {
  38. $ruleBlockName = $match['name'];
  39. $rules = magicSplit('\|', $match['rules']);
  40. foreach ($rules as &$rule) {
  41. $parts = magicSplit('\s+', $rule);
  42. $usedParts = array();
  43. foreach ($parts as $part) {
  44. if ('{' === $part[0]) {
  45. preg_match_all('~\$([0-9]+)~', $part, $backReferencesMatches, PREG_SET_ORDER);
  46. foreach ($backReferencesMatches as $match) {
  47. $usedParts[$match[1]] = true;
  48. }
  49. }
  50. }
  51. $i = 1;
  52. foreach ($parts as &$part) {
  53. if ('/' === $part[0]) {
  54. continue;
  55. }
  56. if (isset($usedParts[$i])) {
  57. if ('\'' === $part[0] || '{' === $part[0]
  58. || (ctype_upper($part[0]) && !isset($usedTerminals[$part]))
  59. || (ctype_lower($part[0]) && isset($unusedNonterminals[$part]))
  60. ) {
  61. $part = '<span style="background-color: red; color: white;">' . $part . '</span>';
  62. } else {
  63. $part = '<strong><em>' . $part . '</em></strong>';
  64. }
  65. } elseif ((ctype_upper($part[0]) && isset($usedTerminals[$part]))
  66. || (ctype_lower($part[0]) && !isset($unusedNonterminals[$part]))
  67. ) {
  68. $part = '<span style="background-color: blue; color: white;">' . $part . '</span>';
  69. }
  70. ++$i;
  71. }
  72. $rule = implode(' ', $parts);
  73. }
  74. echo $ruleBlockName, ':', "\n", ' ', implode("\n" . ' | ', $rules), "\n", ';', "\n\n";
  75. }