root / drupal7 / sites / all / libraries / simplepie / library / SimplePie / HTTP / Parser.php @ 41cc1b08
1 | 41cc1b08 | Assos Assos | <?php
|
---|---|---|---|
2 | /**
|
||
3 | * SimplePie
|
||
4 | *
|
||
5 | * A PHP-Based RSS and Atom Feed Framework.
|
||
6 | * Takes the hard work out of managing a complete RSS/Atom solution.
|
||
7 | *
|
||
8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
|
||
9 | * All rights reserved.
|
||
10 | *
|
||
11 | * Redistribution and use in source and binary forms, with or without modification, are
|
||
12 | * permitted provided that the following conditions are met:
|
||
13 | *
|
||
14 | * * Redistributions of source code must retain the above copyright notice, this list of
|
||
15 | * conditions and the following disclaimer.
|
||
16 | *
|
||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list
|
||
18 | * of conditions and the following disclaimer in the documentation and/or other materials
|
||
19 | * provided with the distribution.
|
||
20 | *
|
||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used
|
||
22 | * to endorse or promote products derived from this software without specific prior
|
||
23 | * written permission.
|
||
24 | *
|
||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
|
||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
33 | * POSSIBILITY OF SUCH DAMAGE.
|
||
34 | *
|
||
35 | * @package SimplePie
|
||
36 | * @version 1.3.1
|
||
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
|
||
38 | * @author Ryan Parman
|
||
39 | * @author Geoffrey Sneddon
|
||
40 | * @author Ryan McCue
|
||
41 | * @link http://simplepie.org/ SimplePie
|
||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License
|
||
43 | */
|
||
44 | |||
45 | |||
46 | /**
|
||
47 | * HTTP Response Parser
|
||
48 | *
|
||
49 | * @package SimplePie
|
||
50 | * @subpackage HTTP
|
||
51 | */
|
||
52 | class SimplePie_HTTP_Parser |
||
53 | { |
||
54 | /**
|
||
55 | * HTTP Version
|
||
56 | *
|
||
57 | * @var float
|
||
58 | */
|
||
59 | public $http_version = 0.0; |
||
60 | |||
61 | /**
|
||
62 | * Status code
|
||
63 | *
|
||
64 | * @var int
|
||
65 | */
|
||
66 | public $status_code = 0; |
||
67 | |||
68 | /**
|
||
69 | * Reason phrase
|
||
70 | *
|
||
71 | * @var string
|
||
72 | */
|
||
73 | public $reason = ''; |
||
74 | |||
75 | /**
|
||
76 | * Key/value pairs of the headers
|
||
77 | *
|
||
78 | * @var array
|
||
79 | */
|
||
80 | public $headers = array(); |
||
81 | |||
82 | /**
|
||
83 | * Body of the response
|
||
84 | *
|
||
85 | * @var string
|
||
86 | */
|
||
87 | public $body = ''; |
||
88 | |||
89 | /**
|
||
90 | * Current state of the state machine
|
||
91 | *
|
||
92 | * @var string
|
||
93 | */
|
||
94 | protected $state = 'http_version'; |
||
95 | |||
96 | /**
|
||
97 | * Input data
|
||
98 | *
|
||
99 | * @var string
|
||
100 | */
|
||
101 | protected $data = ''; |
||
102 | |||
103 | /**
|
||
104 | * Input data length (to avoid calling strlen() everytime this is needed)
|
||
105 | *
|
||
106 | * @var int
|
||
107 | */
|
||
108 | protected $data_length = 0; |
||
109 | |||
110 | /**
|
||
111 | * Current position of the pointer
|
||
112 | *
|
||
113 | * @var int
|
||
114 | */
|
||
115 | protected $position = 0; |
||
116 | |||
117 | /**
|
||
118 | * Name of the hedaer currently being parsed
|
||
119 | *
|
||
120 | * @var string
|
||
121 | */
|
||
122 | protected $name = ''; |
||
123 | |||
124 | /**
|
||
125 | * Value of the hedaer currently being parsed
|
||
126 | *
|
||
127 | * @var string
|
||
128 | */
|
||
129 | protected $value = ''; |
||
130 | |||
131 | /**
|
||
132 | * Create an instance of the class with the input data
|
||
133 | *
|
||
134 | * @param string $data Input data
|
||
135 | */
|
||
136 | public function __construct($data) |
||
137 | { |
||
138 | $this->data = $data; |
||
139 | $this->data_length = strlen($this->data); |
||
140 | } |
||
141 | |||
142 | /**
|
||
143 | * Parse the input data
|
||
144 | *
|
||
145 | * @return bool true on success, false on failure
|
||
146 | */
|
||
147 | public function parse() |
||
148 | { |
||
149 | while ($this->state && $this->state !== 'emit' && $this->has_data()) |
||
150 | { |
||
151 | $state = $this->state; |
||
152 | $this->$state(); |
||
153 | } |
||
154 | $this->data = ''; |
||
155 | if ($this->state === 'emit' || $this->state === 'body') |
||
156 | { |
||
157 | return true; |
||
158 | } |
||
159 | else
|
||
160 | { |
||
161 | $this->http_version = ''; |
||
162 | $this->status_code = ''; |
||
163 | $this->reason = ''; |
||
164 | $this->headers = array(); |
||
165 | $this->body = ''; |
||
166 | return false; |
||
167 | } |
||
168 | } |
||
169 | |||
170 | /**
|
||
171 | * Check whether there is data beyond the pointer
|
||
172 | *
|
||
173 | * @return bool true if there is further data, false if not
|
||
174 | */
|
||
175 | protected function has_data() |
||
176 | { |
||
177 | return (bool) ($this->position < $this->data_length); |
||
178 | } |
||
179 | |||
180 | /**
|
||
181 | * See if the next character is LWS
|
||
182 | *
|
||
183 | * @return bool true if the next character is LWS, false if not
|
||
184 | */
|
||
185 | protected function is_linear_whitespace() |
||
186 | { |
||
187 | return (bool) ($this->data[$this->position] === "\x09" |
||
188 | || $this->data[$this->position] === "\x20" |
||
189 | || ($this->data[$this->position] === "\x0A" |
||
190 | && isset($this->data[$this->position + 1]) |
||
191 | && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20"))); |
||
192 | } |
||
193 | |||
194 | /**
|
||
195 | * Parse the HTTP version
|
||
196 | */
|
||
197 | protected function http_version() |
||
198 | { |
||
199 | if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') |
||
200 | { |
||
201 | $len = strspn($this->data, '0123456789.', 5); |
||
202 | $this->http_version = substr($this->data, 5, $len); |
||
203 | $this->position += 5 + $len; |
||
204 | if (substr_count($this->http_version, '.') <= 1) |
||
205 | { |
||
206 | $this->http_version = (float) $this->http_version; |
||
207 | $this->position += strspn($this->data, "\x09\x20", $this->position); |
||
208 | $this->state = 'status'; |
||
209 | } |
||
210 | else
|
||
211 | { |
||
212 | $this->state = false; |
||
213 | } |
||
214 | } |
||
215 | else
|
||
216 | { |
||
217 | $this->state = false; |
||
218 | } |
||
219 | } |
||
220 | |||
221 | /**
|
||
222 | * Parse the status code
|
||
223 | */
|
||
224 | protected function status() |
||
225 | { |
||
226 | if ($len = strspn($this->data, '0123456789', $this->position)) |
||
227 | { |
||
228 | $this->status_code = (int) substr($this->data, $this->position, $len); |
||
229 | $this->position += $len; |
||
230 | $this->state = 'reason'; |
||
231 | } |
||
232 | else
|
||
233 | { |
||
234 | $this->state = false; |
||
235 | } |
||
236 | } |
||
237 | |||
238 | /**
|
||
239 | * Parse the reason phrase
|
||
240 | */
|
||
241 | protected function reason() |
||
242 | { |
||
243 | $len = strcspn($this->data, "\x0A", $this->position); |
||
244 | $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20"); |
||
245 | $this->position += $len + 1; |
||
246 | $this->state = 'new_line'; |
||
247 | } |
||
248 | |||
249 | /**
|
||
250 | * Deal with a new line, shifting data around as needed
|
||
251 | */
|
||
252 | protected function new_line() |
||
253 | { |
||
254 | $this->value = trim($this->value, "\x0D\x20"); |
||
255 | if ($this->name !== '' && $this->value !== '') |
||
256 | { |
||
257 | $this->name = strtolower($this->name); |
||
258 | // We should only use the last Content-Type header. c.f. issue #1
|
||
259 | if (isset($this->headers[$this->name]) && $this->name !== 'content-type') |
||
260 | { |
||
261 | $this->headers[$this->name] .= ', ' . $this->value; |
||
262 | } |
||
263 | else
|
||
264 | { |
||
265 | $this->headers[$this->name] = $this->value; |
||
266 | } |
||
267 | } |
||
268 | $this->name = ''; |
||
269 | $this->value = ''; |
||
270 | if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") |
||
271 | { |
||
272 | $this->position += 2; |
||
273 | $this->state = 'body'; |
||
274 | } |
||
275 | elseif ($this->data[$this->position] === "\x0A") |
||
276 | { |
||
277 | $this->position++;
|
||
278 | $this->state = 'body'; |
||
279 | } |
||
280 | else
|
||
281 | { |
||
282 | $this->state = 'name'; |
||
283 | } |
||
284 | } |
||
285 | |||
286 | /**
|
||
287 | * Parse a header name
|
||
288 | */
|
||
289 | protected function name() |
||
290 | { |
||
291 | $len = strcspn($this->data, "\x0A:", $this->position); |
||
292 | if (isset($this->data[$this->position + $len])) |
||
293 | { |
||
294 | if ($this->data[$this->position + $len] === "\x0A") |
||
295 | { |
||
296 | $this->position += $len; |
||
297 | $this->state = 'new_line'; |
||
298 | } |
||
299 | else
|
||
300 | { |
||
301 | $this->name = substr($this->data, $this->position, $len); |
||
302 | $this->position += $len + 1; |
||
303 | $this->state = 'value'; |
||
304 | } |
||
305 | } |
||
306 | else
|
||
307 | { |
||
308 | $this->state = false; |
||
309 | } |
||
310 | } |
||
311 | |||
312 | /**
|
||
313 | * Parse LWS, replacing consecutive LWS characters with a single space
|
||
314 | */
|
||
315 | protected function linear_whitespace() |
||
316 | { |
||
317 | do
|
||
318 | { |
||
319 | if (substr($this->data, $this->position, 2) === "\x0D\x0A") |
||
320 | { |
||
321 | $this->position += 2; |
||
322 | } |
||
323 | elseif ($this->data[$this->position] === "\x0A") |
||
324 | { |
||
325 | $this->position++;
|
||
326 | } |
||
327 | $this->position += strspn($this->data, "\x09\x20", $this->position); |
||
328 | } while ($this->has_data() && $this->is_linear_whitespace()); |
||
329 | $this->value .= "\x20"; |
||
330 | } |
||
331 | |||
332 | /**
|
||
333 | * See what state to move to while within non-quoted header values
|
||
334 | */
|
||
335 | protected function value() |
||
336 | { |
||
337 | if ($this->is_linear_whitespace()) |
||
338 | { |
||
339 | $this->linear_whitespace();
|
||
340 | } |
||
341 | else
|
||
342 | { |
||
343 | switch ($this->data[$this->position]) |
||
344 | { |
||
345 | case '"': |
||
346 | // Workaround for ETags: we have to include the quotes as
|
||
347 | // part of the tag.
|
||
348 | if (strtolower($this->name) === 'etag') |
||
349 | { |
||
350 | $this->value .= '"'; |
||
351 | $this->position++;
|
||
352 | $this->state = 'value_char'; |
||
353 | break;
|
||
354 | } |
||
355 | $this->position++;
|
||
356 | $this->state = 'quote'; |
||
357 | break;
|
||
358 | |||
359 | case "\x0A": |
||
360 | $this->position++;
|
||
361 | $this->state = 'new_line'; |
||
362 | break;
|
||
363 | |||
364 | default:
|
||
365 | $this->state = 'value_char'; |
||
366 | break;
|
||
367 | } |
||
368 | } |
||
369 | } |
||
370 | |||
371 | /**
|
||
372 | * Parse a header value while outside quotes
|
||
373 | */
|
||
374 | protected function value_char() |
||
375 | { |
||
376 | $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position); |
||
377 | $this->value .= substr($this->data, $this->position, $len); |
||
378 | $this->position += $len; |
||
379 | $this->state = 'value'; |
||
380 | } |
||
381 | |||
382 | /**
|
||
383 | * See what state to move to while within quoted header values
|
||
384 | */
|
||
385 | protected function quote() |
||
386 | { |
||
387 | if ($this->is_linear_whitespace()) |
||
388 | { |
||
389 | $this->linear_whitespace();
|
||
390 | } |
||
391 | else
|
||
392 | { |
||
393 | switch ($this->data[$this->position]) |
||
394 | { |
||
395 | case '"': |
||
396 | $this->position++;
|
||
397 | $this->state = 'value'; |
||
398 | break;
|
||
399 | |||
400 | case "\x0A": |
||
401 | $this->position++;
|
||
402 | $this->state = 'new_line'; |
||
403 | break;
|
||
404 | |||
405 | case '\\': |
||
406 | $this->position++;
|
||
407 | $this->state = 'quote_escaped'; |
||
408 | break;
|
||
409 | |||
410 | default:
|
||
411 | $this->state = 'quote_char'; |
||
412 | break;
|
||
413 | } |
||
414 | } |
||
415 | } |
||
416 | |||
417 | /**
|
||
418 | * Parse a header value while within quotes
|
||
419 | */
|
||
420 | protected function quote_char() |
||
421 | { |
||
422 | $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position); |
||
423 | $this->value .= substr($this->data, $this->position, $len); |
||
424 | $this->position += $len; |
||
425 | $this->state = 'value'; |
||
426 | } |
||
427 | |||
428 | /**
|
||
429 | * Parse an escaped character within quotes
|
||
430 | */
|
||
431 | protected function quote_escaped() |
||
432 | { |
||
433 | $this->value .= $this->data[$this->position]; |
||
434 | $this->position++;
|
||
435 | $this->state = 'quote'; |
||
436 | } |
||
437 | |||
438 | /**
|
||
439 | * Parse the body
|
||
440 | */
|
||
441 | protected function body() |
||
442 | { |
||
443 | $this->body = substr($this->data, $this->position); |
||
444 | if (!empty($this->headers['transfer-encoding'])) |
||
445 | { |
||
446 | unset($this->headers['transfer-encoding']); |
||
447 | $this->state = 'chunked'; |
||
448 | } |
||
449 | else
|
||
450 | { |
||
451 | $this->state = 'emit'; |
||
452 | } |
||
453 | } |
||
454 | |||
455 | /**
|
||
456 | * Parsed a "Transfer-Encoding: chunked" body
|
||
457 | */
|
||
458 | protected function chunked() |
||
459 | { |
||
460 | if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) |
||
461 | { |
||
462 | $this->state = 'emit'; |
||
463 | return;
|
||
464 | } |
||
465 | |||
466 | $decoded = ''; |
||
467 | $encoded = $this->body; |
||
468 | |||
469 | while (true) |
||
470 | { |
||
471 | $is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches ); |
||
472 | if (!$is_chunked) |
||
473 | { |
||
474 | // Looks like it's not chunked after all
|
||
475 | $this->state = 'emit'; |
||
476 | return;
|
||
477 | } |
||
478 | |||
479 | $length = hexdec(trim($matches[1])); |
||
480 | if ($length === 0) |
||
481 | { |
||
482 | // Ignore trailer headers
|
||
483 | $this->state = 'emit'; |
||
484 | $this->body = $decoded; |
||
485 | return;
|
||
486 | } |
||
487 | |||
488 | $chunk_length = strlen($matches[0]); |
||
489 | $decoded .= $part = substr($encoded, $chunk_length, $length); |
||
490 | $encoded = substr($encoded, $chunk_length + $length + 2); |
||
491 | |||
492 | if (trim($encoded) === '0' || empty($encoded)) |
||
493 | { |
||
494 | $this->state = 'emit'; |
||
495 | $this->body = $decoded; |
||
496 | return;
|
||
497 | } |
||
498 | } |
||
499 | } |
||
500 | } |