Link.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\DomCrawler;
  11. /**
  12. * Link represents an HTML link (an HTML a tag).
  13. *
  14. * @author Fabien Potencier <fabien@symfony.com>
  15. *
  16. * @api
  17. */
  18. class Link
  19. {
  20. /**
  21. * @var \DOMNode A \DOMNode instance
  22. */
  23. protected $node;
  24. /**
  25. * @var string The method to use for the link
  26. */
  27. protected $method;
  28. /**
  29. * @var string The URI of the page where the link is embedded (or the base href)
  30. */
  31. protected $currentUri;
  32. /**
  33. * Constructor.
  34. *
  35. * @param \DOMNode $node A \DOMNode instance
  36. * @param string $currentUri The URI of the page where the link is embedded (or the base href)
  37. * @param string $method The method to use for the link (get by default)
  38. *
  39. * @throws \InvalidArgumentException if the node is not a link
  40. *
  41. * @api
  42. */
  43. public function __construct(\DOMNode $node, $currentUri, $method = 'GET')
  44. {
  45. if (!in_array(strtolower(substr($currentUri, 0, 4)), array('http', 'file'))) {
  46. throw new \InvalidArgumentException(sprintf('Current URI must be an absolute URL ("%s").', $currentUri));
  47. }
  48. $this->setNode($node);
  49. $this->method = $method ? strtoupper($method) : null;
  50. $this->currentUri = $currentUri;
  51. }
  52. /**
  53. * Gets the node associated with this link.
  54. *
  55. * @return \DOMNode A \DOMNode instance
  56. */
  57. public function getNode()
  58. {
  59. return $this->node;
  60. }
  61. /**
  62. * Gets the method associated with this link.
  63. *
  64. * @return string The method
  65. *
  66. * @api
  67. */
  68. public function getMethod()
  69. {
  70. return $this->method;
  71. }
  72. /**
  73. * Gets the URI associated with this link.
  74. *
  75. * @return string The URI
  76. *
  77. * @api
  78. */
  79. public function getUri()
  80. {
  81. $uri = trim($this->getRawUri());
  82. // absolute URL?
  83. if (null !== parse_url($uri, PHP_URL_SCHEME)) {
  84. return $uri;
  85. }
  86. // empty URI
  87. if (!$uri) {
  88. return $this->currentUri;
  89. }
  90. // only an anchor
  91. if ('#' === $uri[0]) {
  92. $baseUri = $this->currentUri;
  93. if (false !== $pos = strpos($baseUri, '#')) {
  94. $baseUri = substr($baseUri, 0, $pos);
  95. }
  96. return $baseUri.$uri;
  97. }
  98. // only a query string
  99. if ('?' === $uri[0]) {
  100. $baseUri = $this->currentUri;
  101. // remove the query string from the current uri
  102. if (false !== $pos = strpos($baseUri, '?')) {
  103. $baseUri = substr($baseUri, 0, $pos);
  104. }
  105. return $baseUri.$uri;
  106. }
  107. // absolute URL with relative schema
  108. if (0 === strpos($uri, '//')) {
  109. return preg_replace('#^([^/]*)//.*$#', '$1', $this->currentUri).$uri;
  110. }
  111. $baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $this->currentUri);
  112. // absolute path
  113. if ('/' === $uri[0]) {
  114. return $baseUri.$uri;
  115. }
  116. // relative path
  117. $path = parse_url(substr($this->currentUri, strlen($baseUri)), PHP_URL_PATH);
  118. $path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);
  119. return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path;
  120. }
  121. /**
  122. * Returns raw uri data.
  123. *
  124. * @return string
  125. */
  126. protected function getRawUri()
  127. {
  128. return $this->node->getAttribute('href');
  129. }
  130. /**
  131. * Returns the canonicalized URI path (see RFC 3986, section 5.2.4)
  132. *
  133. * @param string $path URI path
  134. *
  135. * @return string
  136. */
  137. protected function canonicalizePath($path)
  138. {
  139. if ('' === $path || '/' === $path) {
  140. return $path;
  141. }
  142. if ('.' === substr($path, -1)) {
  143. $path = $path.'/';
  144. }
  145. $output = array();
  146. foreach (explode('/', $path) as $segment) {
  147. if ('..' === $segment) {
  148. array_pop($output);
  149. } elseif ('.' !== $segment) {
  150. array_push($output, $segment);
  151. }
  152. }
  153. return implode('/', $output);
  154. }
  155. /**
  156. * Sets current \DOMNode instance.
  157. *
  158. * @param \DOMNode $node A \DOMNode instance
  159. *
  160. * @throws \LogicException If given node is not an anchor
  161. */
  162. protected function setNode(\DOMNode $node)
  163. {
  164. if ('a' != $node->nodeName) {
  165. throw new \LogicException(sprintf('Unable to click on a "%s" tag.', $node->nodeName));
  166. }
  167. $this->node = $node;
  168. }
  169. }