balmet.com

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

Parser.php (11515B)


      1 <?php
      2 /**
      3  * SimplePie
      4  *
      5  * A PHP-Based RSS and Atom Feed Framework.
      6  * Takes the hard work out of managing a complete RSS/Atom solution.
      7  *
      8  * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
      9  * All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without modification, are
     12  * permitted provided that the following conditions are met:
     13  *
     14  * 	* Redistributions of source code must retain the above copyright notice, this list of
     15  * 	  conditions and the following disclaimer.
     16  *
     17  * 	* Redistributions in binary form must reproduce the above copyright notice, this list
     18  * 	  of conditions and the following disclaimer in the documentation and/or other materials
     19  * 	  provided with the distribution.
     20  *
     21  * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
     22  * 	  to endorse or promote products derived from this software without specific prior
     23  * 	  written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
     26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
     27  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
     28  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     33  * POSSIBILITY OF SUCH DAMAGE.
     34  *
     35  * @package SimplePie
     36  * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
     37  * @author Ryan Parman
     38  * @author Sam Sneddon
     39  * @author Ryan McCue
     40  * @link http://simplepie.org/ SimplePie
     41  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
     42  */
     43 
     44 
     45 /**
     46  * HTTP Response Parser
     47  *
     48  * @package SimplePie
     49  * @subpackage HTTP
     50  */
     51 class SimplePie_HTTP_Parser
     52 {
     53 	/**
     54 	 * HTTP Version
     55 	 *
     56 	 * @var float
     57 	 */
     58 	public $http_version = 0.0;
     59 
     60 	/**
     61 	 * Status code
     62 	 *
     63 	 * @var int
     64 	 */
     65 	public $status_code = 0;
     66 
     67 	/**
     68 	 * Reason phrase
     69 	 *
     70 	 * @var string
     71 	 */
     72 	public $reason = '';
     73 
     74 	/**
     75 	 * Key/value pairs of the headers
     76 	 *
     77 	 * @var array
     78 	 */
     79 	public $headers = array();
     80 
     81 	/**
     82 	 * Body of the response
     83 	 *
     84 	 * @var string
     85 	 */
     86 	public $body = '';
     87 
     88 	/**
     89 	 * Current state of the state machine
     90 	 *
     91 	 * @var string
     92 	 */
     93 	protected $state = 'http_version';
     94 
     95 	/**
     96 	 * Input data
     97 	 *
     98 	 * @var string
     99 	 */
    100 	protected $data = '';
    101 
    102 	/**
    103 	 * Input data length (to avoid calling strlen() everytime this is needed)
    104 	 *
    105 	 * @var int
    106 	 */
    107 	protected $data_length = 0;
    108 
    109 	/**
    110 	 * Current position of the pointer
    111 	 *
    112 	 * @var int
    113 	 */
    114 	protected $position = 0;
    115 
    116 	/**
    117 	 * Name of the hedaer currently being parsed
    118 	 *
    119 	 * @var string
    120 	 */
    121 	protected $name = '';
    122 
    123 	/**
    124 	 * Value of the hedaer currently being parsed
    125 	 *
    126 	 * @var string
    127 	 */
    128 	protected $value = '';
    129 
    130 	/**
    131 	 * Create an instance of the class with the input data
    132 	 *
    133 	 * @param string $data Input data
    134 	 */
    135 	public function __construct($data)
    136 	{
    137 		$this->data = $data;
    138 		$this->data_length = strlen($this->data);
    139 	}
    140 
    141 	/**
    142 	 * Parse the input data
    143 	 *
    144 	 * @return bool true on success, false on failure
    145 	 */
    146 	public function parse()
    147 	{
    148 		while ($this->state && $this->state !== 'emit' && $this->has_data())
    149 		{
    150 			$state = $this->state;
    151 			$this->$state();
    152 		}
    153 		$this->data = '';
    154 		if ($this->state === 'emit' || $this->state === 'body')
    155 		{
    156 			return true;
    157 		}
    158 
    159 		$this->http_version = '';
    160 		$this->status_code = '';
    161 		$this->reason = '';
    162 		$this->headers = array();
    163 		$this->body = '';
    164 		return false;
    165 	}
    166 
    167 	/**
    168 	 * Check whether there is data beyond the pointer
    169 	 *
    170 	 * @return bool true if there is further data, false if not
    171 	 */
    172 	protected function has_data()
    173 	{
    174 		return (bool) ($this->position < $this->data_length);
    175 	}
    176 
    177 	/**
    178 	 * See if the next character is LWS
    179 	 *
    180 	 * @return bool true if the next character is LWS, false if not
    181 	 */
    182 	protected function is_linear_whitespace()
    183 	{
    184 		return (bool) ($this->data[$this->position] === "\x09"
    185 			|| $this->data[$this->position] === "\x20"
    186 			|| ($this->data[$this->position] === "\x0A"
    187 				&& isset($this->data[$this->position + 1])
    188 				&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
    189 	}
    190 
    191 	/**
    192 	 * Parse the HTTP version
    193 	 */
    194 	protected function http_version()
    195 	{
    196 		if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
    197 		{
    198 			$len = strspn($this->data, '0123456789.', 5);
    199 			$this->http_version = substr($this->data, 5, $len);
    200 			$this->position += 5 + $len;
    201 			if (substr_count($this->http_version, '.') <= 1)
    202 			{
    203 				$this->http_version = (float) $this->http_version;
    204 				$this->position += strspn($this->data, "\x09\x20", $this->position);
    205 				$this->state = 'status';
    206 			}
    207 			else
    208 			{
    209 				$this->state = false;
    210 			}
    211 		}
    212 		else
    213 		{
    214 			$this->state = false;
    215 		}
    216 	}
    217 
    218 	/**
    219 	 * Parse the status code
    220 	 */
    221 	protected function status()
    222 	{
    223 		if ($len = strspn($this->data, '0123456789', $this->position))
    224 		{
    225 			$this->status_code = (int) substr($this->data, $this->position, $len);
    226 			$this->position += $len;
    227 			$this->state = 'reason';
    228 		}
    229 		else
    230 		{
    231 			$this->state = false;
    232 		}
    233 	}
    234 
    235 	/**
    236 	 * Parse the reason phrase
    237 	 */
    238 	protected function reason()
    239 	{
    240 		$len = strcspn($this->data, "\x0A", $this->position);
    241 		$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
    242 		$this->position += $len + 1;
    243 		$this->state = 'new_line';
    244 	}
    245 
    246 	/**
    247 	 * Deal with a new line, shifting data around as needed
    248 	 */
    249 	protected function new_line()
    250 	{
    251 		$this->value = trim($this->value, "\x0D\x20");
    252 		if ($this->name !== '' && $this->value !== '')
    253 		{
    254 			$this->name = strtolower($this->name);
    255 			// We should only use the last Content-Type header. c.f. issue #1
    256 			if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
    257 			{
    258 				$this->headers[$this->name] .= ', ' . $this->value;
    259 			}
    260 			else
    261 			{
    262 				$this->headers[$this->name] = $this->value;
    263 			}
    264 		}
    265 		$this->name = '';
    266 		$this->value = '';
    267 		if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
    268 		{
    269 			$this->position += 2;
    270 			$this->state = 'body';
    271 		}
    272 		elseif ($this->data[$this->position] === "\x0A")
    273 		{
    274 			$this->position++;
    275 			$this->state = 'body';
    276 		}
    277 		else
    278 		{
    279 			$this->state = 'name';
    280 		}
    281 	}
    282 
    283 	/**
    284 	 * Parse a header name
    285 	 */
    286 	protected function name()
    287 	{
    288 		$len = strcspn($this->data, "\x0A:", $this->position);
    289 		if (isset($this->data[$this->position + $len]))
    290 		{
    291 			if ($this->data[$this->position + $len] === "\x0A")
    292 			{
    293 				$this->position += $len;
    294 				$this->state = 'new_line';
    295 			}
    296 			else
    297 			{
    298 				$this->name = substr($this->data, $this->position, $len);
    299 				$this->position += $len + 1;
    300 				$this->state = 'value';
    301 			}
    302 		}
    303 		else
    304 		{
    305 			$this->state = false;
    306 		}
    307 	}
    308 
    309 	/**
    310 	 * Parse LWS, replacing consecutive LWS characters with a single space
    311 	 */
    312 	protected function linear_whitespace()
    313 	{
    314 		do
    315 		{
    316 			if (substr($this->data, $this->position, 2) === "\x0D\x0A")
    317 			{
    318 				$this->position += 2;
    319 			}
    320 			elseif ($this->data[$this->position] === "\x0A")
    321 			{
    322 				$this->position++;
    323 			}
    324 			$this->position += strspn($this->data, "\x09\x20", $this->position);
    325 		} while ($this->has_data() && $this->is_linear_whitespace());
    326 		$this->value .= "\x20";
    327 	}
    328 
    329 	/**
    330 	 * See what state to move to while within non-quoted header values
    331 	 */
    332 	protected function value()
    333 	{
    334 		if ($this->is_linear_whitespace())
    335 		{
    336 			$this->linear_whitespace();
    337 		}
    338 		else
    339 		{
    340 			switch ($this->data[$this->position])
    341 			{
    342 				case '"':
    343 					// Workaround for ETags: we have to include the quotes as
    344 					// part of the tag.
    345 					if (strtolower($this->name) === 'etag')
    346 					{
    347 						$this->value .= '"';
    348 						$this->position++;
    349 						$this->state = 'value_char';
    350 						break;
    351 					}
    352 					$this->position++;
    353 					$this->state = 'quote';
    354 					break;
    355 
    356 				case "\x0A":
    357 					$this->position++;
    358 					$this->state = 'new_line';
    359 					break;
    360 
    361 				default:
    362 					$this->state = 'value_char';
    363 					break;
    364 			}
    365 		}
    366 	}
    367 
    368 	/**
    369 	 * Parse a header value while outside quotes
    370 	 */
    371 	protected function value_char()
    372 	{
    373 		$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
    374 		$this->value .= substr($this->data, $this->position, $len);
    375 		$this->position += $len;
    376 		$this->state = 'value';
    377 	}
    378 
    379 	/**
    380 	 * See what state to move to while within quoted header values
    381 	 */
    382 	protected function quote()
    383 	{
    384 		if ($this->is_linear_whitespace())
    385 		{
    386 			$this->linear_whitespace();
    387 		}
    388 		else
    389 		{
    390 			switch ($this->data[$this->position])
    391 			{
    392 				case '"':
    393 					$this->position++;
    394 					$this->state = 'value';
    395 					break;
    396 
    397 				case "\x0A":
    398 					$this->position++;
    399 					$this->state = 'new_line';
    400 					break;
    401 
    402 				case '\\':
    403 					$this->position++;
    404 					$this->state = 'quote_escaped';
    405 					break;
    406 
    407 				default:
    408 					$this->state = 'quote_char';
    409 					break;
    410 			}
    411 		}
    412 	}
    413 
    414 	/**
    415 	 * Parse a header value while within quotes
    416 	 */
    417 	protected function quote_char()
    418 	{
    419 		$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
    420 		$this->value .= substr($this->data, $this->position, $len);
    421 		$this->position += $len;
    422 		$this->state = 'value';
    423 	}
    424 
    425 	/**
    426 	 * Parse an escaped character within quotes
    427 	 */
    428 	protected function quote_escaped()
    429 	{
    430 		$this->value .= $this->data[$this->position];
    431 		$this->position++;
    432 		$this->state = 'quote';
    433 	}
    434 
    435 	/**
    436 	 * Parse the body
    437 	 */
    438 	protected function body()
    439 	{
    440 		$this->body = substr($this->data, $this->position);
    441 		if (!empty($this->headers['transfer-encoding']))
    442 		{
    443 			unset($this->headers['transfer-encoding']);
    444 			$this->state = 'chunked';
    445 		}
    446 		else
    447 		{
    448 			$this->state = 'emit';
    449 		}
    450 	}
    451 
    452 	/**
    453 	 * Parsed a "Transfer-Encoding: chunked" body
    454 	 */
    455 	protected function chunked()
    456 	{
    457 		if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
    458 		{
    459 			$this->state = 'emit';
    460 			return;
    461 		}
    462 
    463 		$decoded = '';
    464 		$encoded = $this->body;
    465 
    466 		while (true)
    467 		{
    468 			$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
    469 			if (!$is_chunked)
    470 			{
    471 				// Looks like it's not chunked after all
    472 				$this->state = 'emit';
    473 				return;
    474 			}
    475 
    476 			$length = hexdec(trim($matches[1]));
    477 			if ($length === 0)
    478 			{
    479 				// Ignore trailer headers
    480 				$this->state = 'emit';
    481 				$this->body = $decoded;
    482 				return;
    483 			}
    484 
    485 			$chunk_length = strlen($matches[0]);
    486 			$decoded .= $part = substr($encoded, $chunk_length, $length);
    487 			$encoded = substr($encoded, $chunk_length + $length + 2);
    488 
    489 			if (trim($encoded) === '0' || empty($encoded))
    490 			{
    491 				$this->state = 'emit';
    492 				$this->body = $decoded;
    493 				return;
    494 			}
    495 		}
    496 	}
    497 
    498 	/**
    499 	 * Prepare headers (take care of proxies headers)
    500 	 *
    501 	 * @param string  $headers Raw headers
    502 	 * @param integer $count   Redirection count. Default to 1.
    503 	 *
    504 	 * @return string
    505 	 */
    506 	static public function prepareHeaders($headers, $count = 1)
    507 	{
    508 		$data = explode("\r\n\r\n", $headers, $count);
    509 		$data = array_pop($data);
    510 		if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n\r\n")) {
    511 			$data = str_ireplace("HTTP/1.0 200 Connection established\r\n\r\n", '', $data);
    512 		}
    513 		if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n\r\n")) {
    514 			$data = str_ireplace("HTTP/1.1 200 Connection established\r\n\r\n", '', $data);
    515 		}
    516 		return $data;
    517 	}
    518 }