balmet.com

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

Parser.php (28562B)


      1 <?php
      2 /**
      3  * SimplePie
      4  *
      5  * A PHP-Based RSS and Atom Feed Framework.
      6  * Takes the hard work out of managing a complete RSS/Atom solution.
      7  *
      8  * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
      9  * All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without modification, are
     12  * permitted provided that the following conditions are met:
     13  *
     14  * 	* Redistributions of source code must retain the above copyright notice, this list of
     15  * 	  conditions and the following disclaimer.
     16  *
     17  * 	* Redistributions in binary form must reproduce the above copyright notice, this list
     18  * 	  of conditions and the following disclaimer in the documentation and/or other materials
     19  * 	  provided with the distribution.
     20  *
     21  * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
     22  * 	  to endorse or promote products derived from this software without specific prior
     23  * 	  written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
     26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
     27  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
     28  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     33  * POSSIBILITY OF SUCH DAMAGE.
     34  *
     35  * @package SimplePie
     36  * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
     37  * @author Ryan Parman
     38  * @author Sam Sneddon
     39  * @author Ryan McCue
     40  * @link http://simplepie.org/ SimplePie
     41  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
     42  */
     43 
     44 /**
     45  * Parses XML into something sane
     46  *
     47  *
     48  * This class can be overloaded with {@see SimplePie::set_parser_class()}
     49  *
     50  * @package SimplePie
     51  * @subpackage Parsing
     52  */
     53 class SimplePie_Parser
     54 {
     55 	var $error_code;
     56 	var $error_string;
     57 	var $current_line;
     58 	var $current_column;
     59 	var $current_byte;
     60 	var $separator = ' ';
     61 	var $namespace = array('');
     62 	var $element = array('');
     63 	var $xml_base = array('');
     64 	var $xml_base_explicit = array(false);
     65 	var $xml_lang = array('');
     66 	var $data = array();
     67 	var $datas = array(array());
     68 	var $current_xhtml_construct = -1;
     69 	var $encoding;
     70 	protected $registry;
     71 
     72 	public function set_registry(SimplePie_Registry $registry)
     73 	{
     74 		$this->registry = $registry;
     75 	}
     76 
     77 	public function parse(&$data, $encoding, $url = '')
     78 	{
     79 		if (class_exists('DOMXpath') && function_exists('Mf2\parse')) {
     80 			$doc = new DOMDocument();
     81 			@$doc->loadHTML($data);
     82 			$xpath = new DOMXpath($doc);
     83 			// Check for both h-feed and h-entry, as both a feed with no entries
     84 			// and a list of entries without an h-feed wrapper are both valid.
     85 			$query = '//*[contains(concat(" ", @class, " "), " h-feed ") or '.
     86 				'contains(concat(" ", @class, " "), " h-entry ")]';
     87 			$result = $xpath->query($query);
     88 			if ($result->length !== 0) {
     89 				return $this->parse_microformats($data, $url);
     90 			}
     91 		}
     92 
     93 		// Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
     94 		if (strtoupper($encoding) === 'US-ASCII')
     95 		{
     96 			$this->encoding = 'UTF-8';
     97 		}
     98 		else
     99 		{
    100 			$this->encoding = $encoding;
    101 		}
    102 
    103 		// Strip BOM:
    104 		// UTF-32 Big Endian BOM
    105 		if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
    106 		{
    107 			$data = substr($data, 4);
    108 		}
    109 		// UTF-32 Little Endian BOM
    110 		elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
    111 		{
    112 			$data = substr($data, 4);
    113 		}
    114 		// UTF-16 Big Endian BOM
    115 		elseif (substr($data, 0, 2) === "\xFE\xFF")
    116 		{
    117 			$data = substr($data, 2);
    118 		}
    119 		// UTF-16 Little Endian BOM
    120 		elseif (substr($data, 0, 2) === "\xFF\xFE")
    121 		{
    122 			$data = substr($data, 2);
    123 		}
    124 		// UTF-8 BOM
    125 		elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
    126 		{
    127 			$data = substr($data, 3);
    128 		}
    129 
    130 		if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
    131 		{
    132 			$declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
    133 			if ($declaration->parse())
    134 			{
    135 				$data = substr($data, $pos + 2);
    136 				$data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' ."\n". $this->declare_html_entities() . $data;
    137 			}
    138 			else
    139 			{
    140 				$this->error_string = 'SimplePie bug! Please report this!';
    141 				return false;
    142 			}
    143 		}
    144 
    145 		$return = true;
    146 
    147 		static $xml_is_sane = null;
    148 		if ($xml_is_sane === null)
    149 		{
    150 			$parser_check = xml_parser_create();
    151 			xml_parse_into_struct($parser_check, '<foo>&amp;</foo>', $values);
    152 			xml_parser_free($parser_check);
    153 			$xml_is_sane = isset($values[0]['value']);
    154 		}
    155 
    156 		// Create the parser
    157 		if ($xml_is_sane)
    158 		{
    159 			$xml = xml_parser_create_ns($this->encoding, $this->separator);
    160 			xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
    161 			xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
    162 			xml_set_object($xml, $this);
    163 			xml_set_character_data_handler($xml, 'cdata');
    164 			xml_set_element_handler($xml, 'tag_open', 'tag_close');
    165 
    166 			// Parse!
    167 			if (!xml_parse($xml, $data, true))
    168 			{
    169 				$this->error_code = xml_get_error_code($xml);
    170 				$this->error_string = xml_error_string($this->error_code);
    171 				$return = false;
    172 			}
    173 			$this->current_line = xml_get_current_line_number($xml);
    174 			$this->current_column = xml_get_current_column_number($xml);
    175 			$this->current_byte = xml_get_current_byte_index($xml);
    176 			xml_parser_free($xml);
    177 			return $return;
    178 		}
    179 
    180 		libxml_clear_errors();
    181 		$xml = new XMLReader();
    182 		$xml->xml($data);
    183 		while (@$xml->read())
    184 		{
    185 			switch ($xml->nodeType)
    186 			{
    187 
    188 				case constant('XMLReader::END_ELEMENT'):
    189 					if ($xml->namespaceURI !== '')
    190 					{
    191 						$tagName = $xml->namespaceURI . $this->separator . $xml->localName;
    192 					}
    193 					else
    194 					{
    195 						$tagName = $xml->localName;
    196 					}
    197 					$this->tag_close(null, $tagName);
    198 					break;
    199 				case constant('XMLReader::ELEMENT'):
    200 					$empty = $xml->isEmptyElement;
    201 					if ($xml->namespaceURI !== '')
    202 					{
    203 						$tagName = $xml->namespaceURI . $this->separator . $xml->localName;
    204 					}
    205 					else
    206 					{
    207 						$tagName = $xml->localName;
    208 					}
    209 					$attributes = array();
    210 					while ($xml->moveToNextAttribute())
    211 					{
    212 						if ($xml->namespaceURI !== '')
    213 						{
    214 							$attrName = $xml->namespaceURI . $this->separator . $xml->localName;
    215 						}
    216 						else
    217 						{
    218 							$attrName = $xml->localName;
    219 						}
    220 						$attributes[$attrName] = $xml->value;
    221 					}
    222 					$this->tag_open(null, $tagName, $attributes);
    223 					if ($empty)
    224 					{
    225 						$this->tag_close(null, $tagName);
    226 					}
    227 					break;
    228 				case constant('XMLReader::TEXT'):
    229 
    230 				case constant('XMLReader::CDATA'):
    231 					$this->cdata(null, $xml->value);
    232 					break;
    233 			}
    234 		}
    235 		if ($error = libxml_get_last_error())
    236 		{
    237 			$this->error_code = $error->code;
    238 			$this->error_string = $error->message;
    239 			$this->current_line = $error->line;
    240 			$this->current_column = $error->column;
    241 			return false;
    242 		}
    243 
    244 		return true;
    245 	}
    246 
    247 	public function get_error_code()
    248 	{
    249 		return $this->error_code;
    250 	}
    251 
    252 	public function get_error_string()
    253 	{
    254 		return $this->error_string;
    255 	}
    256 
    257 	public function get_current_line()
    258 	{
    259 		return $this->current_line;
    260 	}
    261 
    262 	public function get_current_column()
    263 	{
    264 		return $this->current_column;
    265 	}
    266 
    267 	public function get_current_byte()
    268 	{
    269 		return $this->current_byte;
    270 	}
    271 
    272 	public function get_data()
    273 	{
    274 		return $this->data;
    275 	}
    276 
    277 	public function tag_open($parser, $tag, $attributes)
    278 	{
    279 		list($this->namespace[], $this->element[]) = $this->split_ns($tag);
    280 
    281 		$attribs = array();
    282 		foreach ($attributes as $name => $value)
    283 		{
    284 			list($attrib_namespace, $attribute) = $this->split_ns($name);
    285 			$attribs[$attrib_namespace][$attribute] = $value;
    286 		}
    287 
    288 		if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
    289 		{
    290 			$base = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
    291 			if ($base !== false)
    292 			{
    293 				$this->xml_base[] = $base;
    294 				$this->xml_base_explicit[] = true;
    295 			}
    296 		}
    297 		else
    298 		{
    299 			$this->xml_base[] = end($this->xml_base);
    300 			$this->xml_base_explicit[] = end($this->xml_base_explicit);
    301 		}
    302 
    303 		if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
    304 		{
    305 			$this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
    306 		}
    307 		else
    308 		{
    309 			$this->xml_lang[] = end($this->xml_lang);
    310 		}
    311 
    312 		if ($this->current_xhtml_construct >= 0)
    313 		{
    314 			$this->current_xhtml_construct++;
    315 			if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
    316 			{
    317 				$this->data['data'] .= '<' . end($this->element);
    318 				if (isset($attribs['']))
    319 				{
    320 					foreach ($attribs[''] as $name => $value)
    321 					{
    322 						$this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
    323 					}
    324 				}
    325 				$this->data['data'] .= '>';
    326 			}
    327 		}
    328 		else
    329 		{
    330 			$this->datas[] =& $this->data;
    331 			$this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
    332 			$this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
    333 			if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
    334 			|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')
    335 			|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title')))
    336 			|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title')))
    337 			|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title'))))
    338 			{
    339 				$this->current_xhtml_construct = 0;
    340 			}
    341 		}
    342 	}
    343 
    344 	public function cdata($parser, $cdata)
    345 	{
    346 		if ($this->current_xhtml_construct >= 0)
    347 		{
    348 			$this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
    349 		}
    350 		else
    351 		{
    352 			$this->data['data'] .= $cdata;
    353 		}
    354 	}
    355 
    356 	public function tag_close($parser, $tag)
    357 	{
    358 		if ($this->current_xhtml_construct >= 0)
    359 		{
    360 			$this->current_xhtml_construct--;
    361 			if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
    362 			{
    363 				$this->data['data'] .= '</' . end($this->element) . '>';
    364 			}
    365 		}
    366 		if ($this->current_xhtml_construct === -1)
    367 		{
    368 			$this->data =& $this->datas[count($this->datas) - 1];
    369 			array_pop($this->datas);
    370 		}
    371 
    372 		array_pop($this->element);
    373 		array_pop($this->namespace);
    374 		array_pop($this->xml_base);
    375 		array_pop($this->xml_base_explicit);
    376 		array_pop($this->xml_lang);
    377 	}
    378 
    379 	public function split_ns($string)
    380 	{
    381 		static $cache = array();
    382 		if (!isset($cache[$string]))
    383 		{
    384 			if ($pos = strpos($string, $this->separator))
    385 			{
    386 				static $separator_length;
    387 				if (!$separator_length)
    388 				{
    389 					$separator_length = strlen($this->separator);
    390 				}
    391 				$namespace = substr($string, 0, $pos);
    392 				$local_name = substr($string, $pos + $separator_length);
    393 				if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
    394 				{
    395 					$namespace = SIMPLEPIE_NAMESPACE_ITUNES;
    396 				}
    397 
    398 				// Normalize the Media RSS namespaces
    399 				if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG ||
    400 					$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 ||
    401 					$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 ||
    402 					$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 ||
    403 					$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 )
    404 				{
    405 					$namespace = SIMPLEPIE_NAMESPACE_MEDIARSS;
    406 				}
    407 				$cache[$string] = array($namespace, $local_name);
    408 			}
    409 			else
    410 			{
    411 				$cache[$string] = array('', $string);
    412 			}
    413 		}
    414 		return $cache[$string];
    415 	}
    416 
    417 	private function parse_hcard($data, $category = false) {
    418 		$name = '';
    419 		$link = '';
    420 		// Check if h-card is set and pass that information on in the link.
    421 		if (isset($data['type']) && in_array('h-card', $data['type'])) {
    422 			if (isset($data['properties']['name'][0])) {
    423 				$name = $data['properties']['name'][0];
    424 			}
    425 			if (isset($data['properties']['url'][0])) {
    426 				$link = $data['properties']['url'][0];
    427 				if ($name === '') {
    428 					$name = $link;
    429 				}
    430 				else {
    431 					// can't have commas in categories.
    432 					$name = str_replace(',', '', $name);
    433 				}
    434 				$person_tag = $category ? '<span class="person-tag"></span>' : '';
    435 				return '<a class="h-card" href="'.$link.'">'.$person_tag.$name.'</a>';
    436 			}
    437 		}
    438 		return isset($data['value']) ? $data['value'] : '';
    439 	}
    440 
    441 	private function parse_microformats(&$data, $url) {
    442 		$feed_title = '';
    443 		$feed_author = NULL;
    444 		$author_cache = array();
    445 		$items = array();
    446 		$entries = array();
    447 		$mf = Mf2\parse($data, $url);
    448 		// First look for an h-feed.
    449 		$h_feed = array();
    450 		foreach ($mf['items'] as $mf_item) {
    451 			if (in_array('h-feed', $mf_item['type'])) {
    452 				$h_feed = $mf_item;
    453 				break;
    454 			}
    455 			// Also look for h-feed or h-entry in the children of each top level item.
    456 			if (!isset($mf_item['children'][0]['type'])) continue;
    457 			if (in_array('h-feed', $mf_item['children'][0]['type'])) {
    458 				$h_feed = $mf_item['children'][0];
    459 				// In this case the parent of the h-feed may be an h-card, so use it as
    460 				// the feed_author.
    461 				if (in_array('h-card', $mf_item['type'])) $feed_author = $mf_item;
    462 				break;
    463 			}
    464 			else if (in_array('h-entry', $mf_item['children'][0]['type'])) {
    465 				$entries = $mf_item['children'];
    466 				// In this case the parent of the h-entry list may be an h-card, so use
    467 				// it as the feed_author.
    468 				if (in_array('h-card', $mf_item['type'])) $feed_author = $mf_item;
    469 				break;
    470 			}
    471 		}
    472 		if (isset($h_feed['children'])) {
    473 			$entries = $h_feed['children'];
    474 			// Also set the feed title and store author from the h-feed if available.
    475 			if (isset($mf['items'][0]['properties']['name'][0])) {
    476 				$feed_title = $mf['items'][0]['properties']['name'][0];
    477 			}
    478 			if (isset($mf['items'][0]['properties']['author'][0])) {
    479 				$feed_author = $mf['items'][0]['properties']['author'][0];
    480 			}
    481 		}
    482 		else if (count($entries) === 0) {
    483 			$entries = $mf['items'];
    484 		}
    485 		for ($i = 0; $i < count($entries); $i++) {
    486 			$entry = $entries[$i];
    487 			if (in_array('h-entry', $entry['type'])) {
    488 				$item = array();
    489 				$title = '';
    490 				$description = '';
    491 				if (isset($entry['properties']['url'][0])) {
    492 					$link = $entry['properties']['url'][0];
    493 					if (isset($link['value'])) $link = $link['value'];
    494 					$item['link'] = array(array('data' => $link));
    495 				}
    496 				if (isset($entry['properties']['uid'][0])) {
    497 					$guid = $entry['properties']['uid'][0];
    498 					if (isset($guid['value'])) $guid = $guid['value'];
    499 					$item['guid'] = array(array('data' => $guid));
    500 				}
    501 				if (isset($entry['properties']['name'][0])) {
    502 					$title = $entry['properties']['name'][0];
    503 					if (isset($title['value'])) $title = $title['value'];
    504 					$item['title'] = array(array('data' => $title));
    505 				}
    506 				if (isset($entry['properties']['author'][0]) || isset($feed_author)) {
    507 					// author is a special case, it can be plain text or an h-card array.
    508 					// If it's plain text it can also be a url that should be followed to
    509 					// get the actual h-card.
    510 					$author = isset($entry['properties']['author'][0]) ?
    511 						$entry['properties']['author'][0] : $feed_author;
    512 					if (!is_string($author)) {
    513 						$author = $this->parse_hcard($author);
    514 					}
    515 					else if (strpos($author, 'http') === 0) {
    516 						if (isset($author_cache[$author])) {
    517 							$author = $author_cache[$author];
    518 						}
    519 						else {
    520 							$mf = Mf2\fetch($author);
    521 							foreach ($mf['items'] as $hcard) {
    522 								// Only interested in an h-card by itself in this case.
    523 								if (!in_array('h-card', $hcard['type'])) {
    524 									continue;
    525 								}
    526 								// It must have a url property matching what we fetched.
    527 								if (!isset($hcard['properties']['url']) ||
    528 										!(in_array($author, $hcard['properties']['url']))) {
    529 									continue;
    530 								}
    531 								// Save parse_hcard the trouble of finding the correct url.
    532 								$hcard['properties']['url'][0] = $author;
    533 								// Cache this h-card for the next h-entry to check.
    534 								$author_cache[$author] = $this->parse_hcard($hcard);
    535 								$author = $author_cache[$author];
    536 								break;
    537 							}
    538 						}
    539 					}
    540 					$item['author'] = array(array('data' => $author));
    541 				}
    542 				if (isset($entry['properties']['photo'][0])) {
    543 					// If a photo is also in content, don't need to add it again here.
    544 					$content = '';
    545 					if (isset($entry['properties']['content'][0]['html'])) {
    546 						$content = $entry['properties']['content'][0]['html'];
    547 					}
    548 					$photo_list = array();
    549 					for ($j = 0; $j < count($entry['properties']['photo']); $j++) {
    550 						$photo = $entry['properties']['photo'][$j];
    551 						if (!empty($photo) && strpos($content, $photo) === false) {
    552 							$photo_list[] = $photo;
    553 						}
    554 					}
    555 					// When there's more than one photo show the first and use a lightbox.
    556 					// Need a permanent, unique name for the image set, but don't have
    557 					// anything unique except for the content itself, so use that.
    558 					$count = count($photo_list);
    559 					if ($count > 1) {
    560 						$image_set_id = preg_replace('/[[:^alnum:]]/', '', $photo_list[0]);
    561 						$description = '<p>';
    562 						for ($j = 0; $j < $count; $j++) {
    563 							$hidden = $j === 0 ? '' : 'class="hidden" ';
    564 							$description .= '<a href="'.$photo_list[$j].'" '.$hidden.
    565 								'data-lightbox="image-set-'.$image_set_id.'">'.
    566 								'<img src="'.$photo_list[$j].'"></a>';
    567 						}
    568 						$description .= '<br><b>'.$count.' photos</b></p>';
    569 					}
    570 					else if ($count == 1) {
    571 						$description = '<p><img src="'.$photo_list[0].'"></p>';
    572 					}
    573 				}
    574 				if (isset($entry['properties']['content'][0]['html'])) {
    575 					// e-content['value'] is the same as p-name when they are on the same
    576 					// element. Use this to replace title with a strip_tags version so
    577 					// that alt text from images is not included in the title.
    578 					if ($entry['properties']['content'][0]['value'] === $title) {
    579 						$title = strip_tags($entry['properties']['content'][0]['html']);
    580 						$item['title'] = array(array('data' => $title));
    581 					}
    582 					$description .= $entry['properties']['content'][0]['html'];
    583 					if (isset($entry['properties']['in-reply-to'][0])) {
    584 						$in_reply_to = '';
    585 						if (is_string($entry['properties']['in-reply-to'][0])) {
    586 							$in_reply_to = $entry['properties']['in-reply-to'][0];
    587 						}
    588 						else if (isset($entry['properties']['in-reply-to'][0]['value'])) {
    589 							$in_reply_to = $entry['properties']['in-reply-to'][0]['value'];
    590 						}
    591 						if ($in_reply_to !== '') {
    592 							$description .= '<p><span class="in-reply-to"></span> '.
    593 								'<a href="'.$in_reply_to.'">'.$in_reply_to.'</a><p>';
    594 						}
    595 					}
    596 					$item['description'] = array(array('data' => $description));
    597 				}
    598 				if (isset($entry['properties']['category'])) {
    599 					$category_csv = '';
    600 					// Categories can also contain h-cards.
    601 					foreach ($entry['properties']['category'] as $category) {
    602 						if ($category_csv !== '') $category_csv .= ', ';
    603 						if (is_string($category)) {
    604 							// Can't have commas in categories.
    605 							$category_csv .= str_replace(',', '', $category);
    606 						}
    607 						else {
    608 							$category_csv .= $this->parse_hcard($category, true);
    609 						}
    610 					}
    611 					$item['category'] = array(array('data' => $category_csv));
    612 				}
    613 				if (isset($entry['properties']['published'][0])) {
    614 					$timestamp = strtotime($entry['properties']['published'][0]);
    615 					$pub_date = date('F j Y g:ia', $timestamp).' GMT';
    616 					$item['pubDate'] = array(array('data' => $pub_date));
    617 				}
    618 				// The title and description are set to the empty string to represent
    619 				// a deleted item (which also makes it an invalid rss item).
    620 				if (isset($entry['properties']['deleted'][0])) {
    621 					$item['title'] = array(array('data' => ''));
    622 					$item['description'] = array(array('data' => ''));
    623 				}
    624 				$items[] = array('child' => array('' => $item));
    625 			}
    626 		}
    627 		// Mimic RSS data format when storing microformats.
    628 		$link = array(array('data' => $url));
    629 		$image = '';
    630 		if (!is_string($feed_author) &&
    631 				isset($feed_author['properties']['photo'][0])) {
    632 			$image = array(array('child' => array('' => array('url' =>
    633 				array(array('data' => $feed_author['properties']['photo'][0]))))));
    634 		}
    635 		// Use the name given for the h-feed, or get the title from the html.
    636 		if ($feed_title !== '') {
    637 			$feed_title = array(array('data' => htmlspecialchars($feed_title)));
    638 		}
    639 		else if ($position = strpos($data, '<title>')) {
    640 			$start = $position < 200 ? 0 : $position - 200;
    641 			$check = substr($data, $start, 400);
    642 			$matches = array();
    643 			if (preg_match('/<title>(.+)<\/title>/', $check, $matches)) {
    644 				$feed_title = array(array('data' => htmlspecialchars($matches[1])));
    645 			}
    646 		}
    647 		$channel = array('channel' => array(array('child' => array('' =>
    648 			array('link' => $link, 'image' => $image, 'title' => $feed_title,
    649 			      'item' => $items)))));
    650 		$rss = array(array('attribs' => array('' => array('version' => '2.0')),
    651 		                   'child' => array('' => $channel)));
    652 		$this->data = array('child' => array('' => array('rss' => $rss)));
    653 		return true;
    654 	}
    655 
    656 	private function declare_html_entities() {
    657 		// This is required because the RSS specification says that entity-encoded
    658 		// html is allowed, but the xml specification says they must be declared.
    659 		return '<!DOCTYPE html [ <!ENTITY nbsp "&#x00A0;"> <!ENTITY iexcl "&#x00A1;"> <!ENTITY cent "&#x00A2;"> <!ENTITY pound "&#x00A3;"> <!ENTITY curren "&#x00A4;"> <!ENTITY yen "&#x00A5;"> <!ENTITY brvbar "&#x00A6;"> <!ENTITY sect "&#x00A7;"> <!ENTITY uml "&#x00A8;"> <!ENTITY copy "&#x00A9;"> <!ENTITY ordf "&#x00AA;"> <!ENTITY laquo "&#x00AB;"> <!ENTITY not "&#x00AC;"> <!ENTITY shy "&#x00AD;"> <!ENTITY reg "&#x00AE;"> <!ENTITY macr "&#x00AF;"> <!ENTITY deg "&#x00B0;"> <!ENTITY plusmn "&#x00B1;"> <!ENTITY sup2 "&#x00B2;"> <!ENTITY sup3 "&#x00B3;"> <!ENTITY acute "&#x00B4;"> <!ENTITY micro "&#x00B5;"> <!ENTITY para "&#x00B6;"> <!ENTITY middot "&#x00B7;"> <!ENTITY cedil "&#x00B8;"> <!ENTITY sup1 "&#x00B9;"> <!ENTITY ordm "&#x00BA;"> <!ENTITY raquo "&#x00BB;"> <!ENTITY frac14 "&#x00BC;"> <!ENTITY frac12 "&#x00BD;"> <!ENTITY frac34 "&#x00BE;"> <!ENTITY iquest "&#x00BF;"> <!ENTITY Agrave "&#x00C0;"> <!ENTITY Aacute "&#x00C1;"> <!ENTITY Acirc "&#x00C2;"> <!ENTITY Atilde "&#x00C3;"> <!ENTITY Auml "&#x00C4;"> <!ENTITY Aring "&#x00C5;"> <!ENTITY AElig "&#x00C6;"> <!ENTITY Ccedil "&#x00C7;"> <!ENTITY Egrave "&#x00C8;"> <!ENTITY Eacute "&#x00C9;"> <!ENTITY Ecirc "&#x00CA;"> <!ENTITY Euml "&#x00CB;"> <!ENTITY Igrave "&#x00CC;"> <!ENTITY Iacute "&#x00CD;"> <!ENTITY Icirc "&#x00CE;"> <!ENTITY Iuml "&#x00CF;"> <!ENTITY ETH "&#x00D0;"> <!ENTITY Ntilde "&#x00D1;"> <!ENTITY Ograve "&#x00D2;"> <!ENTITY Oacute "&#x00D3;"> <!ENTITY Ocirc "&#x00D4;"> <!ENTITY Otilde "&#x00D5;"> <!ENTITY Ouml "&#x00D6;"> <!ENTITY times "&#x00D7;"> <!ENTITY Oslash "&#x00D8;"> <!ENTITY Ugrave "&#x00D9;"> <!ENTITY Uacute "&#x00DA;"> <!ENTITY Ucirc "&#x00DB;"> <!ENTITY Uuml "&#x00DC;"> <!ENTITY Yacute "&#x00DD;"> <!ENTITY THORN "&#x00DE;"> <!ENTITY szlig "&#x00DF;"> <!ENTITY agrave "&#x00E0;"> <!ENTITY aacute "&#x00E1;"> <!ENTITY acirc "&#x00E2;"> <!ENTITY atilde "&#x00E3;"> <!ENTITY auml "&#x00E4;"> <!ENTITY aring "&#x00E5;"> <!ENTITY aelig "&#x00E6;"> <!ENTITY ccedil "&#x00E7;"> <!ENTITY egrave "&#x00E8;"> <!ENTITY eacute "&#x00E9;"> <!ENTITY ecirc "&#x00EA;"> <!ENTITY euml "&#x00EB;"> <!ENTITY igrave "&#x00EC;"> <!ENTITY iacute "&#x00ED;"> <!ENTITY icirc "&#x00EE;"> <!ENTITY iuml "&#x00EF;"> <!ENTITY eth "&#x00F0;"> <!ENTITY ntilde "&#x00F1;"> <!ENTITY ograve "&#x00F2;"> <!ENTITY oacute "&#x00F3;"> <!ENTITY ocirc "&#x00F4;"> <!ENTITY otilde "&#x00F5;"> <!ENTITY ouml "&#x00F6;"> <!ENTITY divide "&#x00F7;"> <!ENTITY oslash "&#x00F8;"> <!ENTITY ugrave "&#x00F9;"> <!ENTITY uacute "&#x00FA;"> <!ENTITY ucirc "&#x00FB;"> <!ENTITY uuml "&#x00FC;"> <!ENTITY yacute "&#x00FD;"> <!ENTITY thorn "&#x00FE;"> <!ENTITY yuml "&#x00FF;"> <!ENTITY OElig "&#x0152;"> <!ENTITY oelig "&#x0153;"> <!ENTITY Scaron "&#x0160;"> <!ENTITY scaron "&#x0161;"> <!ENTITY Yuml "&#x0178;"> <!ENTITY fnof "&#x0192;"> <!ENTITY circ "&#x02C6;"> <!ENTITY tilde "&#x02DC;"> <!ENTITY Alpha "&#x0391;"> <!ENTITY Beta "&#x0392;"> <!ENTITY Gamma "&#x0393;"> <!ENTITY Epsilon "&#x0395;"> <!ENTITY Zeta "&#x0396;"> <!ENTITY Eta "&#x0397;"> <!ENTITY Theta "&#x0398;"> <!ENTITY Iota "&#x0399;"> <!ENTITY Kappa "&#x039A;"> <!ENTITY Lambda "&#x039B;"> <!ENTITY Mu "&#x039C;"> <!ENTITY Nu "&#x039D;"> <!ENTITY Xi "&#x039E;"> <!ENTITY Omicron "&#x039F;"> <!ENTITY Pi "&#x03A0;"> <!ENTITY Rho "&#x03A1;"> <!ENTITY Sigma "&#x03A3;"> <!ENTITY Tau "&#x03A4;"> <!ENTITY Upsilon "&#x03A5;"> <!ENTITY Phi "&#x03A6;"> <!ENTITY Chi "&#x03A7;"> <!ENTITY Psi "&#x03A8;"> <!ENTITY Omega "&#x03A9;"> <!ENTITY alpha "&#x03B1;"> <!ENTITY beta "&#x03B2;"> <!ENTITY gamma "&#x03B3;"> <!ENTITY delta "&#x03B4;"> <!ENTITY epsilon "&#x03B5;"> <!ENTITY zeta "&#x03B6;"> <!ENTITY eta "&#x03B7;"> <!ENTITY theta "&#x03B8;"> <!ENTITY iota "&#x03B9;"> <!ENTITY kappa "&#x03BA;"> <!ENTITY lambda "&#x03BB;"> <!ENTITY mu "&#x03BC;"> <!ENTITY nu "&#x03BD;"> <!ENTITY xi "&#x03BE;"> <!ENTITY omicron "&#x03BF;"> <!ENTITY pi "&#x03C0;"> <!ENTITY rho "&#x03C1;"> <!ENTITY sigmaf "&#x03C2;"> <!ENTITY sigma "&#x03C3;"> <!ENTITY tau "&#x03C4;"> <!ENTITY upsilon "&#x03C5;"> <!ENTITY phi "&#x03C6;"> <!ENTITY chi "&#x03C7;"> <!ENTITY psi "&#x03C8;"> <!ENTITY omega "&#x03C9;"> <!ENTITY thetasym "&#x03D1;"> <!ENTITY upsih "&#x03D2;"> <!ENTITY piv "&#x03D6;"> <!ENTITY ensp "&#x2002;"> <!ENTITY emsp "&#x2003;"> <!ENTITY thinsp "&#x2009;"> <!ENTITY zwnj "&#x200C;"> <!ENTITY zwj "&#x200D;"> <!ENTITY lrm "&#x200E;"> <!ENTITY rlm "&#x200F;"> <!ENTITY ndash "&#x2013;"> <!ENTITY mdash "&#x2014;"> <!ENTITY lsquo "&#x2018;"> <!ENTITY rsquo "&#x2019;"> <!ENTITY sbquo "&#x201A;"> <!ENTITY ldquo "&#x201C;"> <!ENTITY rdquo "&#x201D;"> <!ENTITY bdquo "&#x201E;"> <!ENTITY dagger "&#x2020;"> <!ENTITY Dagger "&#x2021;"> <!ENTITY bull "&#x2022;"> <!ENTITY hellip "&#x2026;"> <!ENTITY permil "&#x2030;"> <!ENTITY prime "&#x2032;"> <!ENTITY Prime "&#x2033;"> <!ENTITY lsaquo "&#x2039;"> <!ENTITY rsaquo "&#x203A;"> <!ENTITY oline "&#x203E;"> <!ENTITY frasl "&#x2044;"> <!ENTITY euro "&#x20AC;"> <!ENTITY image "&#x2111;"> <!ENTITY weierp "&#x2118;"> <!ENTITY real "&#x211C;"> <!ENTITY trade "&#x2122;"> <!ENTITY alefsym "&#x2135;"> <!ENTITY larr "&#x2190;"> <!ENTITY uarr "&#x2191;"> <!ENTITY rarr "&#x2192;"> <!ENTITY darr "&#x2193;"> <!ENTITY harr "&#x2194;"> <!ENTITY crarr "&#x21B5;"> <!ENTITY lArr "&#x21D0;"> <!ENTITY uArr "&#x21D1;"> <!ENTITY rArr "&#x21D2;"> <!ENTITY dArr "&#x21D3;"> <!ENTITY hArr "&#x21D4;"> <!ENTITY forall "&#x2200;"> <!ENTITY part "&#x2202;"> <!ENTITY exist "&#x2203;"> <!ENTITY empty "&#x2205;"> <!ENTITY nabla "&#x2207;"> <!ENTITY isin "&#x2208;"> <!ENTITY notin "&#x2209;"> <!ENTITY ni "&#x220B;"> <!ENTITY prod "&#x220F;"> <!ENTITY sum "&#x2211;"> <!ENTITY minus "&#x2212;"> <!ENTITY lowast "&#x2217;"> <!ENTITY radic "&#x221A;"> <!ENTITY prop "&#x221D;"> <!ENTITY infin "&#x221E;"> <!ENTITY ang "&#x2220;"> <!ENTITY and "&#x2227;"> <!ENTITY or "&#x2228;"> <!ENTITY cap "&#x2229;"> <!ENTITY cup "&#x222A;"> <!ENTITY int "&#x222B;"> <!ENTITY there4 "&#x2234;"> <!ENTITY sim "&#x223C;"> <!ENTITY cong "&#x2245;"> <!ENTITY asymp "&#x2248;"> <!ENTITY ne "&#x2260;"> <!ENTITY equiv "&#x2261;"> <!ENTITY le "&#x2264;"> <!ENTITY ge "&#x2265;"> <!ENTITY sub "&#x2282;"> <!ENTITY sup "&#x2283;"> <!ENTITY nsub "&#x2284;"> <!ENTITY sube "&#x2286;"> <!ENTITY supe "&#x2287;"> <!ENTITY oplus "&#x2295;"> <!ENTITY otimes "&#x2297;"> <!ENTITY perp "&#x22A5;"> <!ENTITY sdot "&#x22C5;"> <!ENTITY lceil "&#x2308;"> <!ENTITY rceil "&#x2309;"> <!ENTITY lfloor "&#x230A;"> <!ENTITY rfloor "&#x230B;"> <!ENTITY lang "&#x2329;"> <!ENTITY rang "&#x232A;"> <!ENTITY loz "&#x25CA;"> <!ENTITY spades "&#x2660;"> <!ENTITY clubs "&#x2663;"> <!ENTITY hearts "&#x2665;"> <!ENTITY diams "&#x2666;"> ]>';
    660 	}
    661 }