root / drupal7 / sites / all / libraries / simplepie-1.3.1 / library / SimplePie / IRI.php @ 7295e063
1 |
<?php
|
---|---|
2 |
/**
|
3 |
* SimplePie
|
4 |
*
|
5 |
* A PHP-Based RSS and Atom Feed Framework.
|
6 |
* Takes the hard work out of managing a complete RSS/Atom solution.
|
7 |
*
|
8 |
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
|
9 |
* All rights reserved.
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without modification, are
|
12 |
* permitted provided that the following conditions are met:
|
13 |
*
|
14 |
* * Redistributions of source code must retain the above copyright notice, this list of
|
15 |
* conditions and the following disclaimer.
|
16 |
*
|
17 |
* * Redistributions in binary form must reproduce the above copyright notice, this list
|
18 |
* of conditions and the following disclaimer in the documentation and/or other materials
|
19 |
* provided with the distribution.
|
20 |
*
|
21 |
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
|
22 |
* to endorse or promote products derived from this software without specific prior
|
23 |
* written permission.
|
24 |
*
|
25 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
26 |
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
27 |
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
|
28 |
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
29 |
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
30 |
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
31 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
32 |
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
33 |
* POSSIBILITY OF SUCH DAMAGE.
|
34 |
*
|
35 |
* @package SimplePie
|
36 |
* @version 1.3.1
|
37 |
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
|
38 |
* @author Ryan Parman
|
39 |
* @author Geoffrey Sneddon
|
40 |
* @author Ryan McCue
|
41 |
* @link http://simplepie.org/ SimplePie
|
42 |
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
|
43 |
*/
|
44 |
|
45 |
/**
|
46 |
* IRI parser/serialiser/normaliser
|
47 |
*
|
48 |
* @package SimplePie
|
49 |
* @subpackage HTTP
|
50 |
* @author Geoffrey Sneddon
|
51 |
* @author Steve Minutillo
|
52 |
* @author Ryan McCue
|
53 |
* @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue
|
54 |
* @license http://www.opensource.org/licenses/bsd-license.php
|
55 |
*/
|
56 |
class SimplePie_IRI |
57 |
{ |
58 |
/**
|
59 |
* Scheme
|
60 |
*
|
61 |
* @var string
|
62 |
*/
|
63 |
protected $scheme = null; |
64 |
|
65 |
/**
|
66 |
* User Information
|
67 |
*
|
68 |
* @var string
|
69 |
*/
|
70 |
protected $iuserinfo = null; |
71 |
|
72 |
/**
|
73 |
* ihost
|
74 |
*
|
75 |
* @var string
|
76 |
*/
|
77 |
protected $ihost = null; |
78 |
|
79 |
/**
|
80 |
* Port
|
81 |
*
|
82 |
* @var string
|
83 |
*/
|
84 |
protected $port = null; |
85 |
|
86 |
/**
|
87 |
* ipath
|
88 |
*
|
89 |
* @var string
|
90 |
*/
|
91 |
protected $ipath = ''; |
92 |
|
93 |
/**
|
94 |
* iquery
|
95 |
*
|
96 |
* @var string
|
97 |
*/
|
98 |
protected $iquery = null; |
99 |
|
100 |
/**
|
101 |
* ifragment
|
102 |
*
|
103 |
* @var string
|
104 |
*/
|
105 |
protected $ifragment = null; |
106 |
|
107 |
/**
|
108 |
* Normalization database
|
109 |
*
|
110 |
* Each key is the scheme, each value is an array with each key as the IRI
|
111 |
* part and value as the default value for that part.
|
112 |
*/
|
113 |
protected $normalization = array( |
114 |
'acap' => array( |
115 |
'port' => 674 |
116 |
), |
117 |
'dict' => array( |
118 |
'port' => 2628 |
119 |
), |
120 |
'file' => array( |
121 |
'ihost' => 'localhost' |
122 |
), |
123 |
'http' => array( |
124 |
'port' => 80, |
125 |
'ipath' => '/' |
126 |
), |
127 |
'https' => array( |
128 |
'port' => 443, |
129 |
'ipath' => '/' |
130 |
), |
131 |
); |
132 |
|
133 |
/**
|
134 |
* Return the entire IRI when you try and read the object as a string
|
135 |
*
|
136 |
* @return string
|
137 |
*/
|
138 |
public function __toString() |
139 |
{ |
140 |
return $this->get_iri(); |
141 |
} |
142 |
|
143 |
/**
|
144 |
* Overload __set() to provide access via properties
|
145 |
*
|
146 |
* @param string $name Property name
|
147 |
* @param mixed $value Property value
|
148 |
*/
|
149 |
public function __set($name, $value) |
150 |
{ |
151 |
if (method_exists($this, 'set_' . $name)) |
152 |
{ |
153 |
call_user_func(array($this, 'set_' . $name), $value); |
154 |
} |
155 |
elseif (
|
156 |
$name === 'iauthority' |
157 |
|| $name === 'iuserinfo' |
158 |
|| $name === 'ihost' |
159 |
|| $name === 'ipath' |
160 |
|| $name === 'iquery' |
161 |
|| $name === 'ifragment' |
162 |
) |
163 |
{ |
164 |
call_user_func(array($this, 'set_' . substr($name, 1)), $value); |
165 |
} |
166 |
} |
167 |
|
168 |
/**
|
169 |
* Overload __get() to provide access via properties
|
170 |
*
|
171 |
* @param string $name Property name
|
172 |
* @return mixed
|
173 |
*/
|
174 |
public function __get($name) |
175 |
{ |
176 |
// isset() returns false for null, we don't want to do that
|
177 |
// Also why we use array_key_exists below instead of isset()
|
178 |
$props = get_object_vars($this); |
179 |
|
180 |
if (
|
181 |
$name === 'iri' || |
182 |
$name === 'uri' || |
183 |
$name === 'iauthority' || |
184 |
$name === 'authority' |
185 |
) |
186 |
{ |
187 |
$return = $this->{"get_$name"}(); |
188 |
} |
189 |
elseif (array_key_exists($name, $props)) |
190 |
{ |
191 |
$return = $this->$name; |
192 |
} |
193 |
// host -> ihost
|
194 |
elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) |
195 |
{ |
196 |
$name = $prop; |
197 |
$return = $this->$prop; |
198 |
} |
199 |
// ischeme -> scheme
|
200 |
elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) |
201 |
{ |
202 |
$name = $prop; |
203 |
$return = $this->$prop; |
204 |
} |
205 |
else
|
206 |
{ |
207 |
trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE); |
208 |
$return = null; |
209 |
} |
210 |
|
211 |
if ($return === null && isset($this->normalization[$this->scheme][$name])) |
212 |
{ |
213 |
return $this->normalization[$this->scheme][$name]; |
214 |
} |
215 |
else
|
216 |
{ |
217 |
return $return; |
218 |
} |
219 |
} |
220 |
|
221 |
/**
|
222 |
* Overload __isset() to provide access via properties
|
223 |
*
|
224 |
* @param string $name Property name
|
225 |
* @return bool
|
226 |
*/
|
227 |
public function __isset($name) |
228 |
{ |
229 |
if (method_exists($this, 'get_' . $name) || isset($this->$name)) |
230 |
{ |
231 |
return true; |
232 |
} |
233 |
else
|
234 |
{ |
235 |
return false; |
236 |
} |
237 |
} |
238 |
|
239 |
/**
|
240 |
* Overload __unset() to provide access via properties
|
241 |
*
|
242 |
* @param string $name Property name
|
243 |
*/
|
244 |
public function __unset($name) |
245 |
{ |
246 |
if (method_exists($this, 'set_' . $name)) |
247 |
{ |
248 |
call_user_func(array($this, 'set_' . $name), ''); |
249 |
} |
250 |
} |
251 |
|
252 |
/**
|
253 |
* Create a new IRI object, from a specified string
|
254 |
*
|
255 |
* @param string $iri
|
256 |
*/
|
257 |
public function __construct($iri = null) |
258 |
{ |
259 |
$this->set_iri($iri); |
260 |
} |
261 |
|
262 |
/**
|
263 |
* Create a new IRI object by resolving a relative IRI
|
264 |
*
|
265 |
* Returns false if $base is not absolute, otherwise an IRI.
|
266 |
*
|
267 |
* @param IRI|string $base (Absolute) Base IRI
|
268 |
* @param IRI|string $relative Relative IRI
|
269 |
* @return IRI|false
|
270 |
*/
|
271 |
public static function absolutize($base, $relative) |
272 |
{ |
273 |
if (!($relative instanceof SimplePie_IRI)) |
274 |
{ |
275 |
$relative = new SimplePie_IRI($relative); |
276 |
} |
277 |
if (!$relative->is_valid()) |
278 |
{ |
279 |
return false; |
280 |
} |
281 |
elseif ($relative->scheme !== null) |
282 |
{ |
283 |
return clone $relative; |
284 |
} |
285 |
else
|
286 |
{ |
287 |
if (!($base instanceof SimplePie_IRI)) |
288 |
{ |
289 |
$base = new SimplePie_IRI($base); |
290 |
} |
291 |
if ($base->scheme !== null && $base->is_valid()) |
292 |
{ |
293 |
if ($relative->get_iri() !== '') |
294 |
{ |
295 |
if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) |
296 |
{ |
297 |
$target = clone $relative; |
298 |
$target->scheme = $base->scheme; |
299 |
} |
300 |
else
|
301 |
{ |
302 |
$target = new SimplePie_IRI; |
303 |
$target->scheme = $base->scheme; |
304 |
$target->iuserinfo = $base->iuserinfo; |
305 |
$target->ihost = $base->ihost; |
306 |
$target->port = $base->port; |
307 |
if ($relative->ipath !== '') |
308 |
{ |
309 |
if ($relative->ipath[0] === '/') |
310 |
{ |
311 |
$target->ipath = $relative->ipath; |
312 |
} |
313 |
elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') |
314 |
{ |
315 |
$target->ipath = '/' . $relative->ipath; |
316 |
} |
317 |
elseif (($last_segment = strrpos($base->ipath, '/')) !== false) |
318 |
{ |
319 |
$target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath; |
320 |
} |
321 |
else
|
322 |
{ |
323 |
$target->ipath = $relative->ipath; |
324 |
} |
325 |
$target->ipath = $target->remove_dot_segments($target->ipath); |
326 |
$target->iquery = $relative->iquery; |
327 |
} |
328 |
else
|
329 |
{ |
330 |
$target->ipath = $base->ipath; |
331 |
if ($relative->iquery !== null) |
332 |
{ |
333 |
$target->iquery = $relative->iquery; |
334 |
} |
335 |
elseif ($base->iquery !== null) |
336 |
{ |
337 |
$target->iquery = $base->iquery; |
338 |
} |
339 |
} |
340 |
$target->ifragment = $relative->ifragment; |
341 |
} |
342 |
} |
343 |
else
|
344 |
{ |
345 |
$target = clone $base; |
346 |
$target->ifragment = null; |
347 |
} |
348 |
$target->scheme_normalization();
|
349 |
return $target; |
350 |
} |
351 |
else
|
352 |
{ |
353 |
return false; |
354 |
} |
355 |
} |
356 |
} |
357 |
|
358 |
/**
|
359 |
* Parse an IRI into scheme/authority/path/query/fragment segments
|
360 |
*
|
361 |
* @param string $iri
|
362 |
* @return array
|
363 |
*/
|
364 |
protected function parse_iri($iri) |
365 |
{ |
366 |
$iri = trim($iri, "\x20\x09\x0A\x0C\x0D"); |
367 |
if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) |
368 |
{ |
369 |
if ($match[1] === '') |
370 |
{ |
371 |
$match['scheme'] = null; |
372 |
} |
373 |
if (!isset($match[3]) || $match[3] === '') |
374 |
{ |
375 |
$match['authority'] = null; |
376 |
} |
377 |
if (!isset($match[5])) |
378 |
{ |
379 |
$match['path'] = ''; |
380 |
} |
381 |
if (!isset($match[6]) || $match[6] === '') |
382 |
{ |
383 |
$match['query'] = null; |
384 |
} |
385 |
if (!isset($match[8]) || $match[8] === '') |
386 |
{ |
387 |
$match['fragment'] = null; |
388 |
} |
389 |
return $match; |
390 |
} |
391 |
else
|
392 |
{ |
393 |
// This can occur when a paragraph is accidentally parsed as a URI
|
394 |
return false; |
395 |
} |
396 |
} |
397 |
|
398 |
/**
|
399 |
* Remove dot segments from a path
|
400 |
*
|
401 |
* @param string $input
|
402 |
* @return string
|
403 |
*/
|
404 |
protected function remove_dot_segments($input) |
405 |
{ |
406 |
$output = ''; |
407 |
while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') |
408 |
{ |
409 |
// A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
|
410 |
if (strpos($input, '../') === 0) |
411 |
{ |
412 |
$input = substr($input, 3); |
413 |
} |
414 |
elseif (strpos($input, './') === 0) |
415 |
{ |
416 |
$input = substr($input, 2); |
417 |
} |
418 |
// B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
|
419 |
elseif (strpos($input, '/./') === 0) |
420 |
{ |
421 |
$input = substr($input, 2); |
422 |
} |
423 |
elseif ($input === '/.') |
424 |
{ |
425 |
$input = '/'; |
426 |
} |
427 |
// C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
|
428 |
elseif (strpos($input, '/../') === 0) |
429 |
{ |
430 |
$input = substr($input, 3); |
431 |
$output = substr_replace($output, '', strrpos($output, '/')); |
432 |
} |
433 |
elseif ($input === '/..') |
434 |
{ |
435 |
$input = '/'; |
436 |
$output = substr_replace($output, '', strrpos($output, '/')); |
437 |
} |
438 |
// D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
|
439 |
elseif ($input === '.' || $input === '..') |
440 |
{ |
441 |
$input = ''; |
442 |
} |
443 |
// E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
|
444 |
elseif (($pos = strpos($input, '/', 1)) !== false) |
445 |
{ |
446 |
$output .= substr($input, 0, $pos); |
447 |
$input = substr_replace($input, '', 0, $pos); |
448 |
} |
449 |
else
|
450 |
{ |
451 |
$output .= $input; |
452 |
$input = ''; |
453 |
} |
454 |
} |
455 |
return $output . $input; |
456 |
} |
457 |
|
458 |
/**
|
459 |
* Replace invalid character with percent encoding
|
460 |
*
|
461 |
* @param string $string Input string
|
462 |
* @param string $extra_chars Valid characters not in iunreserved or
|
463 |
* iprivate (this is ASCII-only)
|
464 |
* @param bool $iprivate Allow iprivate
|
465 |
* @return string
|
466 |
*/
|
467 |
protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) |
468 |
{ |
469 |
// Normalize as many pct-encoded sections as possible
|
470 |
$string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string); |
471 |
|
472 |
// Replace invalid percent characters
|
473 |
$string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); |
474 |
|
475 |
// Add unreserved and % to $extra_chars (the latter is safe because all
|
476 |
// pct-encoded sections are now valid).
|
477 |
$extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; |
478 |
|
479 |
// Now replace any bytes that aren't allowed with their pct-encoded versions
|
480 |
$position = 0; |
481 |
$strlen = strlen($string); |
482 |
while (($position += strspn($string, $extra_chars, $position)) < $strlen) |
483 |
{ |
484 |
$value = ord($string[$position]); |
485 |
|
486 |
// Start position
|
487 |
$start = $position; |
488 |
|
489 |
// By default we are valid
|
490 |
$valid = true; |
491 |
|
492 |
// No one byte sequences are valid due to the while.
|
493 |
// Two byte sequence:
|
494 |
if (($value & 0xE0) === 0xC0) |
495 |
{ |
496 |
$character = ($value & 0x1F) << 6; |
497 |
$length = 2; |
498 |
$remaining = 1; |
499 |
} |
500 |
// Three byte sequence:
|
501 |
elseif (($value & 0xF0) === 0xE0) |
502 |
{ |
503 |
$character = ($value & 0x0F) << 12; |
504 |
$length = 3; |
505 |
$remaining = 2; |
506 |
} |
507 |
// Four byte sequence:
|
508 |
elseif (($value & 0xF8) === 0xF0) |
509 |
{ |
510 |
$character = ($value & 0x07) << 18; |
511 |
$length = 4; |
512 |
$remaining = 3; |
513 |
} |
514 |
// Invalid byte:
|
515 |
else
|
516 |
{ |
517 |
$valid = false; |
518 |
$length = 1; |
519 |
$remaining = 0; |
520 |
} |
521 |
|
522 |
if ($remaining) |
523 |
{ |
524 |
if ($position + $length <= $strlen) |
525 |
{ |
526 |
for ($position++; $remaining; $position++) |
527 |
{ |
528 |
$value = ord($string[$position]); |
529 |
|
530 |
// Check that the byte is valid, then add it to the character:
|
531 |
if (($value & 0xC0) === 0x80) |
532 |
{ |
533 |
$character |= ($value & 0x3F) << (--$remaining * 6); |
534 |
} |
535 |
// If it is invalid, count the sequence as invalid and reprocess the current byte:
|
536 |
else
|
537 |
{ |
538 |
$valid = false; |
539 |
$position--;
|
540 |
break;
|
541 |
} |
542 |
} |
543 |
} |
544 |
else
|
545 |
{ |
546 |
$position = $strlen - 1; |
547 |
$valid = false; |
548 |
} |
549 |
} |
550 |
|
551 |
// Percent encode anything invalid or not in ucschar
|
552 |
if (
|
553 |
// Invalid sequences
|
554 |
!$valid
|
555 |
// Non-shortest form sequences are invalid
|
556 |
|| $length > 1 && $character <= 0x7F |
557 |
|| $length > 2 && $character <= 0x7FF |
558 |
|| $length > 3 && $character <= 0xFFFF |
559 |
// Outside of range of ucschar codepoints
|
560 |
// Noncharacters
|
561 |
|| ($character & 0xFFFE) === 0xFFFE |
562 |
|| $character >= 0xFDD0 && $character <= 0xFDEF |
563 |
|| ( |
564 |
// Everything else not in ucschar
|
565 |
$character > 0xD7FF && $character < 0xF900 |
566 |
|| $character < 0xA0 |
567 |
|| $character > 0xEFFFD |
568 |
) |
569 |
&& ( |
570 |
// Everything not in iprivate, if it applies
|
571 |
!$iprivate
|
572 |
|| $character < 0xE000 |
573 |
|| $character > 0x10FFFD |
574 |
) |
575 |
) |
576 |
{ |
577 |
// If we were a character, pretend we weren't, but rather an error.
|
578 |
if ($valid) |
579 |
$position--;
|
580 |
|
581 |
for ($j = $start; $j <= $position; $j++) |
582 |
{ |
583 |
$string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); |
584 |
$j += 2; |
585 |
$position += 2; |
586 |
$strlen += 2; |
587 |
} |
588 |
} |
589 |
} |
590 |
|
591 |
return $string; |
592 |
} |
593 |
|
594 |
/**
|
595 |
* Callback function for preg_replace_callback.
|
596 |
*
|
597 |
* Removes sequences of percent encoded bytes that represent UTF-8
|
598 |
* encoded characters in iunreserved
|
599 |
*
|
600 |
* @param array $match PCRE match
|
601 |
* @return string Replacement
|
602 |
*/
|
603 |
protected function remove_iunreserved_percent_encoded($match) |
604 |
{ |
605 |
// As we just have valid percent encoded sequences we can just explode
|
606 |
// and ignore the first member of the returned array (an empty string).
|
607 |
$bytes = explode('%', $match[0]); |
608 |
|
609 |
// Initialize the new string (this is what will be returned) and that
|
610 |
// there are no bytes remaining in the current sequence (unsurprising
|
611 |
// at the first byte!).
|
612 |
$string = ''; |
613 |
$remaining = 0; |
614 |
|
615 |
// Loop over each and every byte, and set $value to its value
|
616 |
for ($i = 1, $len = count($bytes); $i < $len; $i++) |
617 |
{ |
618 |
$value = hexdec($bytes[$i]); |
619 |
|
620 |
// If we're the first byte of sequence:
|
621 |
if (!$remaining) |
622 |
{ |
623 |
// Start position
|
624 |
$start = $i; |
625 |
|
626 |
// By default we are valid
|
627 |
$valid = true; |
628 |
|
629 |
// One byte sequence:
|
630 |
if ($value <= 0x7F) |
631 |
{ |
632 |
$character = $value; |
633 |
$length = 1; |
634 |
} |
635 |
// Two byte sequence:
|
636 |
elseif (($value & 0xE0) === 0xC0) |
637 |
{ |
638 |
$character = ($value & 0x1F) << 6; |
639 |
$length = 2; |
640 |
$remaining = 1; |
641 |
} |
642 |
// Three byte sequence:
|
643 |
elseif (($value & 0xF0) === 0xE0) |
644 |
{ |
645 |
$character = ($value & 0x0F) << 12; |
646 |
$length = 3; |
647 |
$remaining = 2; |
648 |
} |
649 |
// Four byte sequence:
|
650 |
elseif (($value & 0xF8) === 0xF0) |
651 |
{ |
652 |
$character = ($value & 0x07) << 18; |
653 |
$length = 4; |
654 |
$remaining = 3; |
655 |
} |
656 |
// Invalid byte:
|
657 |
else
|
658 |
{ |
659 |
$valid = false; |
660 |
$remaining = 0; |
661 |
} |
662 |
} |
663 |
// Continuation byte:
|
664 |
else
|
665 |
{ |
666 |
// Check that the byte is valid, then add it to the character:
|
667 |
if (($value & 0xC0) === 0x80) |
668 |
{ |
669 |
$remaining--;
|
670 |
$character |= ($value & 0x3F) << ($remaining * 6); |
671 |
} |
672 |
// If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
|
673 |
else
|
674 |
{ |
675 |
$valid = false; |
676 |
$remaining = 0; |
677 |
$i--;
|
678 |
} |
679 |
} |
680 |
|
681 |
// If we've reached the end of the current byte sequence, append it to Unicode::$data
|
682 |
if (!$remaining) |
683 |
{ |
684 |
// Percent encode anything invalid or not in iunreserved
|
685 |
if (
|
686 |
// Invalid sequences
|
687 |
!$valid
|
688 |
// Non-shortest form sequences are invalid
|
689 |
|| $length > 1 && $character <= 0x7F |
690 |
|| $length > 2 && $character <= 0x7FF |
691 |
|| $length > 3 && $character <= 0xFFFF |
692 |
// Outside of range of iunreserved codepoints
|
693 |
|| $character < 0x2D |
694 |
|| $character > 0xEFFFD |
695 |
// Noncharacters
|
696 |
|| ($character & 0xFFFE) === 0xFFFE |
697 |
|| $character >= 0xFDD0 && $character <= 0xFDEF |
698 |
// Everything else not in iunreserved (this is all BMP)
|
699 |
|| $character === 0x2F |
700 |
|| $character > 0x39 && $character < 0x41 |
701 |
|| $character > 0x5A && $character < 0x61 |
702 |
|| $character > 0x7A && $character < 0x7E |
703 |
|| $character > 0x7E && $character < 0xA0 |
704 |
|| $character > 0xD7FF && $character < 0xF900 |
705 |
) |
706 |
{ |
707 |
for ($j = $start; $j <= $i; $j++) |
708 |
{ |
709 |
$string .= '%' . strtoupper($bytes[$j]); |
710 |
} |
711 |
} |
712 |
else
|
713 |
{ |
714 |
for ($j = $start; $j <= $i; $j++) |
715 |
{ |
716 |
$string .= chr(hexdec($bytes[$j])); |
717 |
} |
718 |
} |
719 |
} |
720 |
} |
721 |
|
722 |
// If we have any bytes left over they are invalid (i.e., we are
|
723 |
// mid-way through a multi-byte sequence)
|
724 |
if ($remaining) |
725 |
{ |
726 |
for ($j = $start; $j < $len; $j++) |
727 |
{ |
728 |
$string .= '%' . strtoupper($bytes[$j]); |
729 |
} |
730 |
} |
731 |
|
732 |
return $string; |
733 |
} |
734 |
|
735 |
protected function scheme_normalization() |
736 |
{ |
737 |
if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) |
738 |
{ |
739 |
$this->iuserinfo = null; |
740 |
} |
741 |
if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) |
742 |
{ |
743 |
$this->ihost = null; |
744 |
} |
745 |
if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) |
746 |
{ |
747 |
$this->port = null; |
748 |
} |
749 |
if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) |
750 |
{ |
751 |
$this->ipath = ''; |
752 |
} |
753 |
if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) |
754 |
{ |
755 |
$this->iquery = null; |
756 |
} |
757 |
if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) |
758 |
{ |
759 |
$this->ifragment = null; |
760 |
} |
761 |
} |
762 |
|
763 |
/**
|
764 |
* Check if the object represents a valid IRI. This needs to be done on each
|
765 |
* call as some things change depending on another part of the IRI.
|
766 |
*
|
767 |
* @return bool
|
768 |
*/
|
769 |
public function is_valid() |
770 |
{ |
771 |
$isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null; |
772 |
if ($this->ipath !== '' && |
773 |
( |
774 |
$isauthority && (
|
775 |
$this->ipath[0] !== '/' || |
776 |
substr($this->ipath, 0, 2) === '//' |
777 |
) || |
778 |
( |
779 |
$this->scheme === null && |
780 |
!$isauthority &&
|
781 |
strpos($this->ipath, ':') !== false && |
782 |
(strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/')) |
783 |
) |
784 |
) |
785 |
) |
786 |
{ |
787 |
return false; |
788 |
} |
789 |
|
790 |
return true; |
791 |
} |
792 |
|
793 |
/**
|
794 |
* Set the entire IRI. Returns true on success, false on failure (if there
|
795 |
* are any invalid characters).
|
796 |
*
|
797 |
* @param string $iri
|
798 |
* @return bool
|
799 |
*/
|
800 |
public function set_iri($iri) |
801 |
{ |
802 |
static $cache; |
803 |
if (!$cache) |
804 |
{ |
805 |
$cache = array(); |
806 |
} |
807 |
|
808 |
if ($iri === null) |
809 |
{ |
810 |
return true; |
811 |
} |
812 |
elseif (isset($cache[$iri])) |
813 |
{ |
814 |
list($this->scheme, |
815 |
$this->iuserinfo,
|
816 |
$this->ihost,
|
817 |
$this->port,
|
818 |
$this->ipath,
|
819 |
$this->iquery,
|
820 |
$this->ifragment,
|
821 |
$return) = $cache[$iri]; |
822 |
return $return; |
823 |
} |
824 |
else
|
825 |
{ |
826 |
$parsed = $this->parse_iri((string) $iri); |
827 |
if (!$parsed) |
828 |
{ |
829 |
return false; |
830 |
} |
831 |
|
832 |
$return = $this->set_scheme($parsed['scheme']) |
833 |
&& $this->set_authority($parsed['authority']) |
834 |
&& $this->set_path($parsed['path']) |
835 |
&& $this->set_query($parsed['query']) |
836 |
&& $this->set_fragment($parsed['fragment']); |
837 |
|
838 |
$cache[$iri] = array($this->scheme, |
839 |
$this->iuserinfo,
|
840 |
$this->ihost,
|
841 |
$this->port,
|
842 |
$this->ipath,
|
843 |
$this->iquery,
|
844 |
$this->ifragment,
|
845 |
$return);
|
846 |
return $return; |
847 |
} |
848 |
} |
849 |
|
850 |
/**
|
851 |
* Set the scheme. Returns true on success, false on failure (if there are
|
852 |
* any invalid characters).
|
853 |
*
|
854 |
* @param string $scheme
|
855 |
* @return bool
|
856 |
*/
|
857 |
public function set_scheme($scheme) |
858 |
{ |
859 |
if ($scheme === null) |
860 |
{ |
861 |
$this->scheme = null; |
862 |
} |
863 |
elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) |
864 |
{ |
865 |
$this->scheme = null; |
866 |
return false; |
867 |
} |
868 |
else
|
869 |
{ |
870 |
$this->scheme = strtolower($scheme); |
871 |
} |
872 |
return true; |
873 |
} |
874 |
|
875 |
/**
|
876 |
* Set the authority. Returns true on success, false on failure (if there are
|
877 |
* any invalid characters).
|
878 |
*
|
879 |
* @param string $authority
|
880 |
* @return bool
|
881 |
*/
|
882 |
public function set_authority($authority) |
883 |
{ |
884 |
static $cache; |
885 |
if (!$cache) |
886 |
$cache = array(); |
887 |
|
888 |
if ($authority === null) |
889 |
{ |
890 |
$this->iuserinfo = null; |
891 |
$this->ihost = null; |
892 |
$this->port = null; |
893 |
return true; |
894 |
} |
895 |
elseif (isset($cache[$authority])) |
896 |
{ |
897 |
list($this->iuserinfo, |
898 |
$this->ihost,
|
899 |
$this->port,
|
900 |
$return) = $cache[$authority]; |
901 |
|
902 |
return $return; |
903 |
} |
904 |
else
|
905 |
{ |
906 |
$remaining = $authority; |
907 |
if (($iuserinfo_end = strrpos($remaining, '@')) !== false) |
908 |
{ |
909 |
$iuserinfo = substr($remaining, 0, $iuserinfo_end); |
910 |
$remaining = substr($remaining, $iuserinfo_end + 1); |
911 |
} |
912 |
else
|
913 |
{ |
914 |
$iuserinfo = null; |
915 |
} |
916 |
if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false) |
917 |
{ |
918 |
if (($port = substr($remaining, $port_start + 1)) === false) |
919 |
{ |
920 |
$port = null; |
921 |
} |
922 |
$remaining = substr($remaining, 0, $port_start); |
923 |
} |
924 |
else
|
925 |
{ |
926 |
$port = null; |
927 |
} |
928 |
|
929 |
$return = $this->set_userinfo($iuserinfo) && |
930 |
$this->set_host($remaining) && |
931 |
$this->set_port($port); |
932 |
|
933 |
$cache[$authority] = array($this->iuserinfo, |
934 |
$this->ihost,
|
935 |
$this->port,
|
936 |
$return);
|
937 |
|
938 |
return $return; |
939 |
} |
940 |
} |
941 |
|
942 |
/**
|
943 |
* Set the iuserinfo.
|
944 |
*
|
945 |
* @param string $iuserinfo
|
946 |
* @return bool
|
947 |
*/
|
948 |
public function set_userinfo($iuserinfo) |
949 |
{ |
950 |
if ($iuserinfo === null) |
951 |
{ |
952 |
$this->iuserinfo = null; |
953 |
} |
954 |
else
|
955 |
{ |
956 |
$this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:'); |
957 |
$this->scheme_normalization();
|
958 |
} |
959 |
|
960 |
return true; |
961 |
} |
962 |
|
963 |
/**
|
964 |
* Set the ihost. Returns true on success, false on failure (if there are
|
965 |
* any invalid characters).
|
966 |
*
|
967 |
* @param string $ihost
|
968 |
* @return bool
|
969 |
*/
|
970 |
public function set_host($ihost) |
971 |
{ |
972 |
if ($ihost === null) |
973 |
{ |
974 |
$this->ihost = null; |
975 |
return true; |
976 |
} |
977 |
elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') |
978 |
{ |
979 |
if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1))) |
980 |
{ |
981 |
$this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']'; |
982 |
} |
983 |
else
|
984 |
{ |
985 |
$this->ihost = null; |
986 |
return false; |
987 |
} |
988 |
} |
989 |
else
|
990 |
{ |
991 |
$ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;='); |
992 |
|
993 |
// Lowercase, but ignore pct-encoded sections (as they should
|
994 |
// remain uppercase). This must be done after the previous step
|
995 |
// as that can add unescaped characters.
|
996 |
$position = 0; |
997 |
$strlen = strlen($ihost); |
998 |
while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) |
999 |
{ |
1000 |
if ($ihost[$position] === '%') |
1001 |
{ |
1002 |
$position += 3; |
1003 |
} |
1004 |
else
|
1005 |
{ |
1006 |
$ihost[$position] = strtolower($ihost[$position]); |
1007 |
$position++;
|
1008 |
} |
1009 |
} |
1010 |
|
1011 |
$this->ihost = $ihost; |
1012 |
} |
1013 |
|
1014 |
$this->scheme_normalization();
|
1015 |
|
1016 |
return true; |
1017 |
} |
1018 |
|
1019 |
/**
|
1020 |
* Set the port. Returns true on success, false on failure (if there are
|
1021 |
* any invalid characters).
|
1022 |
*
|
1023 |
* @param string $port
|
1024 |
* @return bool
|
1025 |
*/
|
1026 |
public function set_port($port) |
1027 |
{ |
1028 |
if ($port === null) |
1029 |
{ |
1030 |
$this->port = null; |
1031 |
return true; |
1032 |
} |
1033 |
elseif (strspn($port, '0123456789') === strlen($port)) |
1034 |
{ |
1035 |
$this->port = (int) $port; |
1036 |
$this->scheme_normalization();
|
1037 |
return true; |
1038 |
} |
1039 |
else
|
1040 |
{ |
1041 |
$this->port = null; |
1042 |
return false; |
1043 |
} |
1044 |
} |
1045 |
|
1046 |
/**
|
1047 |
* Set the ipath.
|
1048 |
*
|
1049 |
* @param string $ipath
|
1050 |
* @return bool
|
1051 |
*/
|
1052 |
public function set_path($ipath) |
1053 |
{ |
1054 |
static $cache; |
1055 |
if (!$cache) |
1056 |
{ |
1057 |
$cache = array(); |
1058 |
} |
1059 |
|
1060 |
$ipath = (string) $ipath; |
1061 |
|
1062 |
if (isset($cache[$ipath])) |
1063 |
{ |
1064 |
$this->ipath = $cache[$ipath][(int) ($this->scheme !== null)]; |
1065 |
} |
1066 |
else
|
1067 |
{ |
1068 |
$valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/'); |
1069 |
$removed = $this->remove_dot_segments($valid); |
1070 |
|
1071 |
$cache[$ipath] = array($valid, $removed); |
1072 |
$this->ipath = ($this->scheme !== null) ? $removed : $valid; |
1073 |
} |
1074 |
|
1075 |
$this->scheme_normalization();
|
1076 |
return true; |
1077 |
} |
1078 |
|
1079 |
/**
|
1080 |
* Set the iquery.
|
1081 |
*
|
1082 |
* @param string $iquery
|
1083 |
* @return bool
|
1084 |
*/
|
1085 |
public function set_query($iquery) |
1086 |
{ |
1087 |
if ($iquery === null) |
1088 |
{ |
1089 |
$this->iquery = null; |
1090 |
} |
1091 |
else
|
1092 |
{ |
1093 |
$this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true); |
1094 |
$this->scheme_normalization();
|
1095 |
} |
1096 |
return true; |
1097 |
} |
1098 |
|
1099 |
/**
|
1100 |
* Set the ifragment.
|
1101 |
*
|
1102 |
* @param string $ifragment
|
1103 |
* @return bool
|
1104 |
*/
|
1105 |
public function set_fragment($ifragment) |
1106 |
{ |
1107 |
if ($ifragment === null) |
1108 |
{ |
1109 |
$this->ifragment = null; |
1110 |
} |
1111 |
else
|
1112 |
{ |
1113 |
$this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?'); |
1114 |
$this->scheme_normalization();
|
1115 |
} |
1116 |
return true; |
1117 |
} |
1118 |
|
1119 |
/**
|
1120 |
* Convert an IRI to a URI (or parts thereof)
|
1121 |
*
|
1122 |
* @return string
|
1123 |
*/
|
1124 |
public function to_uri($string) |
1125 |
{ |
1126 |
static $non_ascii; |
1127 |
if (!$non_ascii) |
1128 |
{ |
1129 |
$non_ascii = implode('', range("\x80", "\xFF")); |
1130 |
} |
1131 |
|
1132 |
$position = 0; |
1133 |
$strlen = strlen($string); |
1134 |
while (($position += strcspn($string, $non_ascii, $position)) < $strlen) |
1135 |
{ |
1136 |
$string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1); |
1137 |
$position += 3; |
1138 |
$strlen += 2; |
1139 |
} |
1140 |
|
1141 |
return $string; |
1142 |
} |
1143 |
|
1144 |
/**
|
1145 |
* Get the complete IRI
|
1146 |
*
|
1147 |
* @return string
|
1148 |
*/
|
1149 |
public function get_iri() |
1150 |
{ |
1151 |
if (!$this->is_valid()) |
1152 |
{ |
1153 |
return false; |
1154 |
} |
1155 |
|
1156 |
$iri = ''; |
1157 |
if ($this->scheme !== null) |
1158 |
{ |
1159 |
$iri .= $this->scheme . ':'; |
1160 |
} |
1161 |
if (($iauthority = $this->get_iauthority()) !== null) |
1162 |
{ |
1163 |
$iri .= '//' . $iauthority; |
1164 |
} |
1165 |
if ($this->ipath !== '') |
1166 |
{ |
1167 |
$iri .= $this->ipath; |
1168 |
} |
1169 |
elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') |
1170 |
{ |
1171 |
$iri .= $this->normalization[$this->scheme]['ipath']; |
1172 |
} |
1173 |
if ($this->iquery !== null) |
1174 |
{ |
1175 |
$iri .= '?' . $this->iquery; |
1176 |
} |
1177 |
if ($this->ifragment !== null) |
1178 |
{ |
1179 |
$iri .= '#' . $this->ifragment; |
1180 |
} |
1181 |
|
1182 |
return $iri; |
1183 |
} |
1184 |
|
1185 |
/**
|
1186 |
* Get the complete URI
|
1187 |
*
|
1188 |
* @return string
|
1189 |
*/
|
1190 |
public function get_uri() |
1191 |
{ |
1192 |
return $this->to_uri($this->get_iri()); |
1193 |
} |
1194 |
|
1195 |
/**
|
1196 |
* Get the complete iauthority
|
1197 |
*
|
1198 |
* @return string
|
1199 |
*/
|
1200 |
protected function get_iauthority() |
1201 |
{ |
1202 |
if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) |
1203 |
{ |
1204 |
$iauthority = ''; |
1205 |
if ($this->iuserinfo !== null) |
1206 |
{ |
1207 |
$iauthority .= $this->iuserinfo . '@'; |
1208 |
} |
1209 |
if ($this->ihost !== null) |
1210 |
{ |
1211 |
$iauthority .= $this->ihost; |
1212 |
} |
1213 |
if ($this->port !== null) |
1214 |
{ |
1215 |
$iauthority .= ':' . $this->port; |
1216 |
} |
1217 |
return $iauthority; |
1218 |
} |
1219 |
else
|
1220 |
{ |
1221 |
return null; |
1222 |
} |
1223 |
} |
1224 |
|
1225 |
/**
|
1226 |
* Get the complete authority
|
1227 |
*
|
1228 |
* @return string
|
1229 |
*/
|
1230 |
protected function get_authority() |
1231 |
{ |
1232 |
$iauthority = $this->get_iauthority(); |
1233 |
if (is_string($iauthority)) |
1234 |
return $this->to_uri($iauthority); |
1235 |
else
|
1236 |
return $iauthority; |
1237 |
} |
1238 |
} |