ru-se.com

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

Locator.php (13085B)


      1 <?php
      2 /**
      3  * SimplePie
      4  *
      5  * A PHP-Based RSS and Atom Feed Framework.
      6  * Takes the hard work out of managing a complete RSS/Atom solution.
      7  *
      8  * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
      9  * All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without modification, are
     12  * permitted provided that the following conditions are met:
     13  *
     14  * 	* Redistributions of source code must retain the above copyright notice, this list of
     15  * 	  conditions and the following disclaimer.
     16  *
     17  * 	* Redistributions in binary form must reproduce the above copyright notice, this list
     18  * 	  of conditions and the following disclaimer in the documentation and/or other materials
     19  * 	  provided with the distribution.
     20  *
     21  * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
     22  * 	  to endorse or promote products derived from this software without specific prior
     23  * 	  written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
     26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
     27  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
     28  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     33  * POSSIBILITY OF SUCH DAMAGE.
     34  *
     35  * @package SimplePie
     36  * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
     37  * @author Ryan Parman
     38  * @author Sam Sneddon
     39  * @author Ryan McCue
     40  * @link http://simplepie.org/ SimplePie
     41  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
     42  */
     43 
     44 /**
     45  * Used for feed auto-discovery
     46  *
     47  *
     48  * This class can be overloaded with {@see SimplePie::set_locator_class()}
     49  *
     50  * @package SimplePie
     51  */
     52 class SimplePie_Locator
     53 {
     54 	var $useragent;
     55 	var $timeout;
     56 	var $file;
     57 	var $local = array();
     58 	var $elsewhere = array();
     59 	var $cached_entities = array();
     60 	var $http_base;
     61 	var $base;
     62 	var $base_location = 0;
     63 	var $checked_feeds = 0;
     64 	var $max_checked_feeds = 10;
     65 	var $force_fsockopen = false;
     66 	var $curl_options = array();
     67 	protected $registry;
     68 
     69 	public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10, $force_fsockopen = false, $curl_options = array())
     70 	{
     71 		$this->file = $file;
     72 		$this->useragent = $useragent;
     73 		$this->timeout = $timeout;
     74 		$this->max_checked_feeds = $max_checked_feeds;
     75 		$this->force_fsockopen = $force_fsockopen;
     76 		$this->curl_options = $curl_options;
     77 
     78 		if (class_exists('DOMDocument'))
     79 		{
     80 			$this->dom = new DOMDocument();
     81 
     82 			set_error_handler(array('SimplePie_Misc', 'silence_errors'));
     83 			$this->dom->loadHTML($this->file->body);
     84 			restore_error_handler();
     85 		}
     86 		else
     87 		{
     88 			$this->dom = null;
     89 		}
     90 	}
     91 
     92 	public function set_registry(SimplePie_Registry $registry)
     93 	{
     94 		$this->registry = $registry;
     95 	}
     96 
     97 	public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working = null)
     98 	{
     99 		if ($this->is_feed($this->file))
    100 		{
    101 			return $this->file;
    102 		}
    103 
    104 		if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
    105 		{
    106 			$sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
    107 			if ($sniffer->get_type() !== 'text/html')
    108 			{
    109 				return null;
    110 			}
    111 		}
    112 
    113 		if ($type & ~SIMPLEPIE_LOCATOR_NONE)
    114 		{
    115 			$this->get_base();
    116 		}
    117 
    118 		if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
    119 		{
    120 			return $working[0];
    121 		}
    122 
    123 		if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
    124 		{
    125 			if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
    126 			{
    127 				return $working[0];
    128 			}
    129 
    130 			if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
    131 			{
    132 				return $working[0];
    133 			}
    134 
    135 			if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
    136 			{
    137 				return $working[0];
    138 			}
    139 
    140 			if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
    141 			{
    142 				return $working[0];
    143 			}
    144 		}
    145 		return null;
    146 	}
    147 
    148 	public function is_feed($file, $check_html = false)
    149 	{
    150 		if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
    151 		{
    152 			$sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
    153 			$sniffed = $sniffer->get_type();
    154 			$mime_types = array('application/rss+xml', 'application/rdf+xml',
    155 			                    'text/rdf', 'application/atom+xml', 'text/xml',
    156 			                    'application/xml', 'application/x-rss+xml');
    157 			if ($check_html)
    158 			{
    159 				$mime_types[] = 'text/html';
    160 			}
    161 
    162 			return in_array($sniffed, $mime_types);
    163 		}
    164 		elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
    165 		{
    166 			return true;
    167 		}
    168 		else
    169 		{
    170 			return false;
    171 		}
    172 	}
    173 
    174 	public function get_base()
    175 	{
    176 		if ($this->dom === null)
    177 		{
    178 			throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
    179 		}
    180 		$this->http_base = $this->file->url;
    181 		$this->base = $this->http_base;
    182 		$elements = $this->dom->getElementsByTagName('base');
    183 		foreach ($elements as $element)
    184 		{
    185 			if ($element->hasAttribute('href'))
    186 			{
    187 				$base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
    188 				if ($base === false)
    189 				{
    190 					continue;
    191 				}
    192 				$this->base = $base;
    193 				$this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
    194 				break;
    195 			}
    196 		}
    197 	}
    198 
    199 	public function autodiscovery()
    200 	{
    201 		$done = array();
    202 		$feeds = array();
    203 		$feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
    204 		$feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
    205 		$feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
    206 
    207 		if (!empty($feeds))
    208 		{
    209 			return array_values($feeds);
    210 		}
    211 
    212 		return null;
    213 	}
    214 
    215 	protected function search_elements_by_tag($name, &$done, $feeds)
    216 	{
    217 		if ($this->dom === null)
    218 		{
    219 			throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
    220 		}
    221 
    222 		$links = $this->dom->getElementsByTagName($name);
    223 		foreach ($links as $link)
    224 		{
    225 			if ($this->checked_feeds === $this->max_checked_feeds)
    226 			{
    227 				break;
    228 			}
    229 			if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
    230 			{
    231 				$rel = array_unique($this->registry->call('Misc', 'space_separated_tokens', array(strtolower($link->getAttribute('rel')))));
    232 				$line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
    233 
    234 				if ($this->base_location < $line)
    235 				{
    236 					$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
    237 				}
    238 				else
    239 				{
    240 					$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
    241 				}
    242 				if ($href === false)
    243 				{
    244 					continue;
    245 				}
    246 
    247 				if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('text/html', 'application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
    248 				{
    249 					$this->checked_feeds++;
    250 					$headers = array(
    251 						'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
    252 					);
    253 					$feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
    254 					if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed, true))
    255 					{
    256 						$feeds[$href] = $feed;
    257 					}
    258 				}
    259 				$done[] = $href;
    260 			}
    261 		}
    262 
    263 		return $feeds;
    264 	}
    265 
    266 	public function get_links()
    267 	{
    268 		if ($this->dom === null)
    269 		{
    270 			throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
    271 		}
    272 
    273 		$links = $this->dom->getElementsByTagName('a');
    274 		foreach ($links as $link)
    275 		{
    276 			if ($link->hasAttribute('href'))
    277 			{
    278 				$href = trim($link->getAttribute('href'));
    279 				$parsed = $this->registry->call('Misc', 'parse_url', array($href));
    280 				if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme']))
    281 				{
    282 					if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo())
    283 					{
    284 						$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
    285 					}
    286 					else
    287 					{
    288 						$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
    289 					}
    290 					if ($href === false)
    291 					{
    292 						continue;
    293 					}
    294 
    295 					$current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
    296 
    297 					if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
    298 					{
    299 						$this->local[] = $href;
    300 					}
    301 					else
    302 					{
    303 						$this->elsewhere[] = $href;
    304 					}
    305 				}
    306 			}
    307 		}
    308 		$this->local = array_unique($this->local);
    309 		$this->elsewhere = array_unique($this->elsewhere);
    310 		if (!empty($this->local) || !empty($this->elsewhere))
    311 		{
    312 			return true;
    313 		}
    314 		return null;
    315 	}
    316 
    317 	public function get_rel_link($rel)
    318 	{
    319 		if ($this->dom === null)
    320 		{
    321 			throw new SimplePie_Exception('DOMDocument not found, unable to use '.
    322 			                              'locator');
    323 		}
    324 		if (!class_exists('DOMXpath'))
    325 		{
    326 			throw new SimplePie_Exception('DOMXpath not found, unable to use '.
    327 			                              'get_rel_link');
    328 		}
    329 
    330 		$xpath = new DOMXpath($this->dom);
    331 		$query = '//a[@rel and @href] | //link[@rel and @href]';
    332 		foreach ($xpath->query($query) as $link)
    333 		{
    334 			$href = trim($link->getAttribute('href'));
    335 			$parsed = $this->registry->call('Misc', 'parse_url', array($href));
    336 			if ($parsed['scheme'] === '' ||
    337 			    preg_match('/^https?$/i', $parsed['scheme']))
    338 			{
    339 				if (method_exists($link, 'getLineNo') &&
    340 				    $this->base_location < $link->getLineNo())
    341 				{
    342 					$href =
    343 						$this->registry->call('Misc', 'absolutize_url',
    344 						                      array(trim($link->getAttribute('href')),
    345 						                            $this->base));
    346 				}
    347 				else
    348 				{
    349 					$href =
    350 						$this->registry->call('Misc', 'absolutize_url',
    351 						                      array(trim($link->getAttribute('href')),
    352 						                            $this->http_base));
    353 				}
    354 				if ($href === false)
    355 				{
    356 					return null;
    357 				}
    358 				$rel_values = explode(' ', strtolower($link->getAttribute('rel')));
    359 				if (in_array($rel, $rel_values))
    360 				{
    361 					return $href;
    362 				}
    363 			}
    364 		}
    365 		return null;
    366 	}
    367 
    368 	public function extension(&$array)
    369 	{
    370 		foreach ($array as $key => $value)
    371 		{
    372 			if ($this->checked_feeds === $this->max_checked_feeds)
    373 			{
    374 				break;
    375 			}
    376 			if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
    377 			{
    378 				$this->checked_feeds++;
    379 
    380 				$headers = array(
    381 					'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
    382 				);
    383 				$feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
    384 				if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
    385 				{
    386 					return array($feed);
    387 				}
    388 				else
    389 				{
    390 					unset($array[$key]);
    391 				}
    392 			}
    393 		}
    394 		return null;
    395 	}
    396 
    397 	public function body(&$array)
    398 	{
    399 		foreach ($array as $key => $value)
    400 		{
    401 			if ($this->checked_feeds === $this->max_checked_feeds)
    402 			{
    403 				break;
    404 			}
    405 			if (preg_match('/(feed|rss|rdf|atom|xml)/i', $value))
    406 			{
    407 				$this->checked_feeds++;
    408 				$headers = array(
    409 					'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
    410 				);
    411 				$feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent, $this->force_fsockopen, $this->curl_options));
    412 				if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
    413 				{
    414 					return array($feed);
    415 				}
    416 				else
    417 				{
    418 					unset($array[$key]);
    419 				}
    420 			}
    421 		}
    422 		return null;
    423 	}
    424 }