root / drupal7 / sites / all / libraries / simplepie / library / SimplePie / IRI.php @ 41cc1b08
1 | 41cc1b08 | Assos Assos | <?php
|
---|---|---|---|
2 | /**
|
||
3 | * SimplePie
|
||
4 | *
|
||
5 | * A PHP-Based RSS and Atom Feed Framework.
|
||
6 | * Takes the hard work out of managing a complete RSS/Atom solution.
|
||
7 | *
|
||
8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
|
||
9 | * All rights reserved.
|
||
10 | *
|
||
11 | * Redistribution and use in source and binary forms, with or without modification, are
|
||
12 | * permitted provided that the following conditions are met:
|
||
13 | *
|
||
14 | * * Redistributions of source code must retain the above copyright notice, this list of
|
||
15 | * conditions and the following disclaimer.
|
||
16 | *
|
||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list
|
||
18 | * of conditions and the following disclaimer in the documentation and/or other materials
|
||
19 | * provided with the distribution.
|
||
20 | *
|
||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used
|
||
22 | * to endorse or promote products derived from this software without specific prior
|
||
23 | * written permission.
|
||
24 | *
|
||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
|
||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
33 | * POSSIBILITY OF SUCH DAMAGE.
|
||
34 | *
|
||
35 | * @package SimplePie
|
||
36 | * @version 1.3.1
|
||
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
|
||
38 | * @author Ryan Parman
|
||
39 | * @author Geoffrey Sneddon
|
||
40 | * @author Ryan McCue
|
||
41 | * @link http://simplepie.org/ SimplePie
|
||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License
|
||
43 | */
|
||
44 | |||
45 | /**
|
||
46 | * IRI parser/serialiser/normaliser
|
||
47 | *
|
||
48 | * @package SimplePie
|
||
49 | * @subpackage HTTP
|
||
50 | * @author Geoffrey Sneddon
|
||
51 | * @author Steve Minutillo
|
||
52 | * @author Ryan McCue
|
||
53 | * @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue
|
||
54 | * @license http://www.opensource.org/licenses/bsd-license.php
|
||
55 | */
|
||
56 | class SimplePie_IRI |
||
57 | { |
||
58 | /**
|
||
59 | * Scheme
|
||
60 | *
|
||
61 | * @var string
|
||
62 | */
|
||
63 | protected $scheme = null; |
||
64 | |||
65 | /**
|
||
66 | * User Information
|
||
67 | *
|
||
68 | * @var string
|
||
69 | */
|
||
70 | protected $iuserinfo = null; |
||
71 | |||
72 | /**
|
||
73 | * ihost
|
||
74 | *
|
||
75 | * @var string
|
||
76 | */
|
||
77 | protected $ihost = null; |
||
78 | |||
79 | /**
|
||
80 | * Port
|
||
81 | *
|
||
82 | * @var string
|
||
83 | */
|
||
84 | protected $port = null; |
||
85 | |||
86 | /**
|
||
87 | * ipath
|
||
88 | *
|
||
89 | * @var string
|
||
90 | */
|
||
91 | protected $ipath = ''; |
||
92 | |||
93 | /**
|
||
94 | * iquery
|
||
95 | *
|
||
96 | * @var string
|
||
97 | */
|
||
98 | protected $iquery = null; |
||
99 | |||
100 | /**
|
||
101 | * ifragment
|
||
102 | *
|
||
103 | * @var string
|
||
104 | */
|
||
105 | protected $ifragment = null; |
||
106 | |||
107 | /**
|
||
108 | * Normalization database
|
||
109 | *
|
||
110 | * Each key is the scheme, each value is an array with each key as the IRI
|
||
111 | * part and value as the default value for that part.
|
||
112 | */
|
||
113 | protected $normalization = array( |
||
114 | 'acap' => array( |
||
115 | 'port' => 674 |
||
116 | ), |
||
117 | 'dict' => array( |
||
118 | 'port' => 2628 |
||
119 | ), |
||
120 | 'file' => array( |
||
121 | 'ihost' => 'localhost' |
||
122 | ), |
||
123 | 'http' => array( |
||
124 | 'port' => 80, |
||
125 | 'ipath' => '/' |
||
126 | ), |
||
127 | 'https' => array( |
||
128 | 'port' => 443, |
||
129 | 'ipath' => '/' |
||
130 | ), |
||
131 | ); |
||
132 | |||
133 | /**
|
||
134 | * Return the entire IRI when you try and read the object as a string
|
||
135 | *
|
||
136 | * @return string
|
||
137 | */
|
||
138 | public function __toString() |
||
139 | { |
||
140 | return $this->get_iri(); |
||
141 | } |
||
142 | |||
143 | /**
|
||
144 | * Overload __set() to provide access via properties
|
||
145 | *
|
||
146 | * @param string $name Property name
|
||
147 | * @param mixed $value Property value
|
||
148 | */
|
||
149 | public function __set($name, $value) |
||
150 | { |
||
151 | if (method_exists($this, 'set_' . $name)) |
||
152 | { |
||
153 | call_user_func(array($this, 'set_' . $name), $value); |
||
154 | } |
||
155 | elseif (
|
||
156 | $name === 'iauthority' |
||
157 | || $name === 'iuserinfo' |
||
158 | || $name === 'ihost' |
||
159 | || $name === 'ipath' |
||
160 | || $name === 'iquery' |
||
161 | || $name === 'ifragment' |
||
162 | ) |
||
163 | { |
||
164 | call_user_func(array($this, 'set_' . substr($name, 1)), $value); |
||
165 | } |
||
166 | } |
||
167 | |||
168 | /**
|
||
169 | * Overload __get() to provide access via properties
|
||
170 | *
|
||
171 | * @param string $name Property name
|
||
172 | * @return mixed
|
||
173 | */
|
||
174 | public function __get($name) |
||
175 | { |
||
176 | // isset() returns false for null, we don't want to do that
|
||
177 | // Also why we use array_key_exists below instead of isset()
|
||
178 | $props = get_object_vars($this); |
||
179 | |||
180 | if (
|
||
181 | $name === 'iri' || |
||
182 | $name === 'uri' || |
||
183 | $name === 'iauthority' || |
||
184 | $name === 'authority' |
||
185 | ) |
||
186 | { |
||
187 | $return = $this->{"get_$name"}(); |
||
188 | } |
||
189 | elseif (array_key_exists($name, $props)) |
||
190 | { |
||
191 | $return = $this->$name; |
||
192 | } |
||
193 | // host -> ihost
|
||
194 | elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) |
||
195 | { |
||
196 | $name = $prop; |
||
197 | $return = $this->$prop; |
||
198 | } |
||
199 | // ischeme -> scheme
|
||
200 | elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) |
||
201 | { |
||
202 | $name = $prop; |
||
203 | $return = $this->$prop; |
||
204 | } |
||
205 | else
|
||
206 | { |
||
207 | trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE); |
||
208 | $return = null; |
||
209 | } |
||
210 | |||
211 | if ($return === null && isset($this->normalization[$this->scheme][$name])) |
||
212 | { |
||
213 | return $this->normalization[$this->scheme][$name]; |
||
214 | } |
||
215 | else
|
||
216 | { |
||
217 | return $return; |
||
218 | } |
||
219 | } |
||
220 | |||
221 | /**
|
||
222 | * Overload __isset() to provide access via properties
|
||
223 | *
|
||
224 | * @param string $name Property name
|
||
225 | * @return bool
|
||
226 | */
|
||
227 | public function __isset($name) |
||
228 | { |
||
229 | if (method_exists($this, 'get_' . $name) || isset($this->$name)) |
||
230 | { |
||
231 | return true; |
||
232 | } |
||
233 | else
|
||
234 | { |
||
235 | return false; |
||
236 | } |
||
237 | } |
||
238 | |||
239 | /**
|
||
240 | * Overload __unset() to provide access via properties
|
||
241 | *
|
||
242 | * @param string $name Property name
|
||
243 | */
|
||
244 | public function __unset($name) |
||
245 | { |
||
246 | if (method_exists($this, 'set_' . $name)) |
||
247 | { |
||
248 | call_user_func(array($this, 'set_' . $name), ''); |
||
249 | } |
||
250 | } |
||
251 | |||
252 | /**
|
||
253 | * Create a new IRI object, from a specified string
|
||
254 | *
|
||
255 | * @param string $iri
|
||
256 | */
|
||
257 | public function __construct($iri = null) |
||
258 | { |
||
259 | $this->set_iri($iri); |
||
260 | } |
||
261 | |||
262 | /**
|
||
263 | * Create a new IRI object by resolving a relative IRI
|
||
264 | *
|
||
265 | * Returns false if $base is not absolute, otherwise an IRI.
|
||
266 | *
|
||
267 | * @param IRI|string $base (Absolute) Base IRI
|
||
268 | * @param IRI|string $relative Relative IRI
|
||
269 | * @return IRI|false
|
||
270 | */
|
||
271 | public static function absolutize($base, $relative) |
||
272 | { |
||
273 | if (!($relative instanceof SimplePie_IRI)) |
||
274 | { |
||
275 | $relative = new SimplePie_IRI($relative); |
||
276 | } |
||
277 | if (!$relative->is_valid()) |
||
278 | { |
||
279 | return false; |
||
280 | } |
||
281 | elseif ($relative->scheme !== null) |
||
282 | { |
||
283 | return clone $relative; |
||
284 | } |
||
285 | else
|
||
286 | { |
||
287 | if (!($base instanceof SimplePie_IRI)) |
||
288 | { |
||
289 | $base = new SimplePie_IRI($base); |
||
290 | } |
||
291 | if ($base->scheme !== null && $base->is_valid()) |
||
292 | { |
||
293 | if ($relative->get_iri() !== '') |
||
294 | { |
||
295 | if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) |
||
296 | { |
||
297 | $target = clone $relative; |
||
298 | $target->scheme = $base->scheme; |
||
299 | } |
||
300 | else
|
||
301 | { |
||
302 | $target = new SimplePie_IRI; |
||
303 | $target->scheme = $base->scheme; |
||
304 | $target->iuserinfo = $base->iuserinfo; |
||
305 | $target->ihost = $base->ihost; |
||
306 | $target->port = $base->port; |
||
307 | if ($relative->ipath !== '') |
||
308 | { |
||
309 | if ($relative->ipath[0] === '/') |
||
310 | { |
||
311 | $target->ipath = $relative->ipath; |
||
312 | } |
||
313 | elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') |
||
314 | { |
||
315 | $target->ipath = '/' . $relative->ipath; |
||
316 | } |
||
317 | elseif (($last_segment = strrpos($base->ipath, '/')) !== false) |
||
318 | { |
||
319 | $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath; |
||
320 | } |
||
321 | else
|
||
322 | { |
||
323 | $target->ipath = $relative->ipath; |
||
324 | } |
||
325 | $target->ipath = $target->remove_dot_segments($target->ipath); |
||
326 | $target->iquery = $relative->iquery; |
||
327 | } |
||
328 | else
|
||
329 | { |
||
330 | $target->ipath = $base->ipath; |
||
331 | if ($relative->iquery !== null) |
||
332 | { |
||
333 | $target->iquery = $relative->iquery; |
||
334 | } |
||
335 | elseif ($base->iquery !== null) |
||
336 | { |
||
337 | $target->iquery = $base->iquery; |
||
338 | } |
||
339 | } |
||
340 | $target->ifragment = $relative->ifragment; |
||
341 | } |
||
342 | } |
||
343 | else
|
||
344 | { |
||
345 | $target = clone $base; |
||
346 | $target->ifragment = null; |
||
347 | } |
||
348 | $target->scheme_normalization();
|
||
349 | return $target; |
||
350 | } |
||
351 | else
|
||
352 | { |
||
353 | return false; |
||
354 | } |
||
355 | } |
||
356 | } |
||
357 | |||
358 | /**
|
||
359 | * Parse an IRI into scheme/authority/path/query/fragment segments
|
||
360 | *
|
||
361 | * @param string $iri
|
||
362 | * @return array
|
||
363 | */
|
||
364 | protected function parse_iri($iri) |
||
365 | { |
||
366 | $iri = trim($iri, "\x20\x09\x0A\x0C\x0D"); |
||
367 | if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) |
||
368 | { |
||
369 | if ($match[1] === '') |
||
370 | { |
||
371 | $match['scheme'] = null; |
||
372 | } |
||
373 | if (!isset($match[3]) || $match[3] === '') |
||
374 | { |
||
375 | $match['authority'] = null; |
||
376 | } |
||
377 | if (!isset($match[5])) |
||
378 | { |
||
379 | $match['path'] = ''; |
||
380 | } |
||
381 | if (!isset($match[6]) || $match[6] === '') |
||
382 | { |
||
383 | $match['query'] = null; |
||
384 | } |
||
385 | if (!isset($match[8]) || $match[8] === '') |
||
386 | { |
||
387 | $match['fragment'] = null; |
||
388 | } |
||
389 | return $match; |
||
390 | } |
||
391 | else
|
||
392 | { |
||
393 | // This can occur when a paragraph is accidentally parsed as a URI
|
||
394 | return false; |
||
395 | } |
||
396 | } |
||
397 | |||
398 | /**
|
||
399 | * Remove dot segments from a path
|
||
400 | *
|
||
401 | * @param string $input
|
||
402 | * @return string
|
||
403 | */
|
||
404 | protected function remove_dot_segments($input) |
||
405 | { |
||
406 | $output = ''; |
||
407 | while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') |
||
408 | { |
||
409 | // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
|
||
410 | if (strpos($input, '../') === 0) |
||
411 | { |
||
412 | $input = substr($input, 3); |
||
413 | } |
||
414 | elseif (strpos($input, './') === 0) |
||
415 | { |
||
416 | $input = substr($input, 2); |
||
417 | } |
||
418 | // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
|
||
419 | elseif (strpos($input, '/./') === 0) |
||
420 | { |
||
421 | $input = substr($input, 2); |
||
422 | } |
||
423 | elseif ($input === '/.') |
||
424 | { |
||
425 | $input = '/'; |
||
426 | } |
||
427 | // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
|
||
428 | elseif (strpos($input, '/../') === 0) |
||
429 | { |
||
430 | $input = substr($input, 3); |
||
431 | $output = substr_replace($output, '', strrpos($output, '/')); |
||
432 | } |
||
433 | elseif ($input === '/..') |
||
434 | { |
||
435 | $input = '/'; |
||
436 | $output = substr_replace($output, '', strrpos($output, '/')); |
||
437 | } |
||
438 | // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
|
||
439 | elseif ($input === '.' || $input === '..') |
||
440 | { |
||
441 | $input = ''; |
||
442 | } |
||
443 | // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
|
||
444 | elseif (($pos = strpos($input, '/', 1)) !== false) |
||
445 | { |
||
446 | $output .= substr($input, 0, $pos); |
||
447 | $input = substr_replace($input, '', 0, $pos); |
||
448 | } |
||
449 | else
|
||
450 | { |
||
451 | $output .= $input; |
||
452 | $input = ''; |
||
453 | } |
||
454 | } |
||
455 | return $output . $input; |
||
456 | } |
||
457 | |||
458 | /**
|
||
459 | * Replace invalid character with percent encoding
|
||
460 | *
|
||
461 | * @param string $string Input string
|
||
462 | * @param string $extra_chars Valid characters not in iunreserved or
|
||
463 | * iprivate (this is ASCII-only)
|
||
464 | * @param bool $iprivate Allow iprivate
|
||
465 | * @return string
|
||
466 | */
|
||
467 | protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) |
||
468 | { |
||
469 | // Normalize as many pct-encoded sections as possible
|
||
470 | $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string); |
||
471 | |||
472 | // Replace invalid percent characters
|
||
473 | $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); |
||
474 | |||
475 | // Add unreserved and % to $extra_chars (the latter is safe because all
|
||
476 | // pct-encoded sections are now valid).
|
||
477 | $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; |
||
478 | |||
479 | // Now replace any bytes that aren't allowed with their pct-encoded versions
|
||
480 | $position = 0; |
||
481 | $strlen = strlen($string); |
||
482 | while (($position += strspn($string, $extra_chars, $position)) < $strlen) |
||
483 | { |
||
484 | $value = ord($string[$position]); |
||
485 | |||
486 | // Start position
|
||
487 | $start = $position; |
||
488 | |||
489 | // By default we are valid
|
||
490 | $valid = true; |
||
491 | |||
492 | // No one byte sequences are valid due to the while.
|
||
493 | // Two byte sequence:
|
||
494 | if (($value & 0xE0) === 0xC0) |
||
495 | { |
||
496 | $character = ($value & 0x1F) << 6; |
||
497 | $length = 2; |
||
498 | $remaining = 1; |
||
499 | } |
||
500 | // Three byte sequence:
|
||
501 | elseif (($value & 0xF0) === 0xE0) |
||
502 | { |
||
503 | $character = ($value & 0x0F) << 12; |
||
504 | $length = 3; |
||
505 | $remaining = 2; |
||
506 | } |
||
507 | // Four byte sequence:
|
||
508 | elseif (($value & 0xF8) === 0xF0) |
||
509 | { |
||
510 | $character = ($value & 0x07) << 18; |
||
511 | $length = 4; |
||
512 | $remaining = 3; |
||
513 | } |
||
514 | // Invalid byte:
|
||
515 | else
|
||
516 | { |
||
517 | $valid = false; |
||
518 | $length = 1; |
||
519 | $remaining = 0; |
||
520 | } |
||
521 | |||
522 | if ($remaining) |
||
523 | { |
||
524 | if ($position + $length <= $strlen) |
||
525 | { |
||
526 | for ($position++; $remaining; $position++) |
||
527 | { |
||
528 | $value = ord($string[$position]); |
||
529 | |||
530 | // Check that the byte is valid, then add it to the character:
|
||
531 | if (($value & 0xC0) === 0x80) |
||
532 | { |
||
533 | $character |= ($value & 0x3F) << (--$remaining * 6); |
||
534 | } |
||
535 | // If it is invalid, count the sequence as invalid and reprocess the current byte:
|
||
536 | else
|
||
537 | { |
||
538 | $valid = false; |
||
539 | $position--;
|
||
540 | break;
|
||
541 | } |
||
542 | } |
||
543 | } |
||
544 | else
|
||
545 | { |
||
546 | $position = $strlen - 1; |
||
547 | $valid = false; |
||
548 | } |
||
549 | } |
||
550 | |||
551 | // Percent encode anything invalid or not in ucschar
|
||
552 | if (
|
||
553 | // Invalid sequences
|
||
554 | !$valid
|
||
555 | // Non-shortest form sequences are invalid
|
||
556 | || $length > 1 && $character <= 0x7F |
||
557 | || $length > 2 && $character <= 0x7FF |
||
558 | || $length > 3 && $character <= 0xFFFF |
||
559 | // Outside of range of ucschar codepoints
|
||
560 | // Noncharacters
|
||
561 | || ($character & 0xFFFE) === 0xFFFE |
||
562 | || $character >= 0xFDD0 && $character <= 0xFDEF |
||
563 | || ( |
||
564 | // Everything else not in ucschar
|
||
565 | $character > 0xD7FF && $character < 0xF900 |
||
566 | || $character < 0xA0 |
||
567 | || $character > 0xEFFFD |
||
568 | ) |
||
569 | && ( |
||
570 | // Everything not in iprivate, if it applies
|
||
571 | !$iprivate
|
||
572 | || $character < 0xE000 |
||
573 | || $character > 0x10FFFD |
||
574 | ) |
||
575 | ) |
||
576 | { |
||
577 | // If we were a character, pretend we weren't, but rather an error.
|
||
578 | if ($valid) |
||
579 | $position--;
|
||
580 | |||
581 | for ($j = $start; $j <= $position; $j++) |
||
582 | { |
||
583 | $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); |
||
584 | $j += 2; |
||
585 | $position += 2; |
||
586 | $strlen += 2; |
||
587 | } |
||
588 | } |
||
589 | } |
||
590 | |||
591 | return $string; |
||
592 | } |
||
593 | |||
594 | /**
|
||
595 | * Callback function for preg_replace_callback.
|
||
596 | *
|
||
597 | * Removes sequences of percent encoded bytes that represent UTF-8
|
||
598 | * encoded characters in iunreserved
|
||
599 | *
|
||
600 | * @param array $match PCRE match
|
||
601 | * @return string Replacement
|
||
602 | */
|
||
603 | protected function remove_iunreserved_percent_encoded($match) |
||
604 | { |
||
605 | // As we just have valid percent encoded sequences we can just explode
|
||
606 | // and ignore the first member of the returned array (an empty string).
|
||
607 | $bytes = explode('%', $match[0]); |
||
608 | |||
609 | // Initialize the new string (this is what will be returned) and that
|
||
610 | // there are no bytes remaining in the current sequence (unsurprising
|
||
611 | // at the first byte!).
|
||
612 | $string = ''; |
||
613 | $remaining = 0; |
||
614 | |||
615 | // Loop over each and every byte, and set $value to its value
|
||
616 | for ($i = 1, $len = count($bytes); $i < $len; $i++) |
||
617 | { |
||
618 | $value = hexdec($bytes[$i]); |
||
619 | |||
620 | // If we're the first byte of sequence:
|
||
621 | if (!$remaining) |
||
622 | { |
||
623 | // Start position
|
||
624 | $start = $i; |
||
625 | |||
626 | // By default we are valid
|
||
627 | $valid = true; |
||
628 | |||
629 | // One byte sequence:
|
||
630 | if ($value <= 0x7F) |
||
631 | { |
||
632 | $character = $value; |
||
633 | $length = 1; |
||
634 | } |
||
635 | // Two byte sequence:
|
||
636 | elseif (($value & 0xE0) === 0xC0) |
||
637 | { |
||
638 | $character = ($value & 0x1F) << 6; |
||
639 | $length = 2; |
||
640 | $remaining = 1; |
||
641 | } |
||
642 | // Three byte sequence:
|
||
643 | elseif (($value & 0xF0) === 0xE0) |
||
644 | { |
||
645 | $character = ($value & 0x0F) << 12; |
||
646 | $length = 3; |
||
647 | $remaining = 2; |
||
648 | } |
||
649 | // Four byte sequence:
|
||
650 | elseif (($value & 0xF8) === 0xF0) |
||
651 | { |
||
652 | $character = ($value & 0x07) << 18; |
||
653 | $length = 4; |
||
654 | $remaining = 3; |
||
655 | } |
||
656 | // Invalid byte:
|
||
657 | else
|
||
658 | { |
||
659 | $valid = false; |
||
660 | $remaining = 0; |
||
661 | } |
||
662 | } |
||
663 | // Continuation byte:
|
||
664 | else
|
||
665 | { |
||
666 | // Check that the byte is valid, then add it to the character:
|
||
667 | if (($value & 0xC0) === 0x80) |
||
668 | { |
||
669 | $remaining--;
|
||
670 | $character |= ($value & 0x3F) << ($remaining * 6); |
||
671 | } |
||
672 | // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
|
||
673 | else
|
||
674 | { |
||
675 | $valid = false; |
||
676 | $remaining = 0; |
||
677 | $i--;
|
||
678 | } |
||
679 | } |
||
680 | |||
681 | // If we've reached the end of the current byte sequence, append it to Unicode::$data
|
||
682 | if (!$remaining) |
||
683 | { |
||
684 | // Percent encode anything invalid or not in iunreserved
|
||
685 | if (
|
||
686 | // Invalid sequences
|
||
687 | !$valid
|
||
688 | // Non-shortest form sequences are invalid
|
||
689 | || $length > 1 && $character <= 0x7F |
||
690 | || $length > 2 && $character <= 0x7FF |
||
691 | || $length > 3 && $character <= 0xFFFF |
||
692 | // Outside of range of iunreserved codepoints
|
||
693 | || $character < 0x2D |
||
694 | || $character > 0xEFFFD |
||
695 | // Noncharacters
|
||
696 | || ($character & 0xFFFE) === 0xFFFE |
||
697 | || $character >= 0xFDD0 && $character <= 0xFDEF |
||
698 | // Everything else not in iunreserved (this is all BMP)
|
||
699 | || $character === 0x2F |
||
700 | || $character > 0x39 && $character < 0x41 |
||
701 | || $character > 0x5A && $character < 0x61 |
||
702 | || $character > 0x7A && $character < 0x7E |
||
703 | || $character > 0x7E && $character < 0xA0 |
||
704 | || $character > 0xD7FF && $character < 0xF900 |
||
705 | ) |
||
706 | { |
||
707 | for ($j = $start; $j <= $i; $j++) |
||
708 | { |
||
709 | $string .= '%' . strtoupper($bytes[$j]); |
||
710 | } |
||
711 | } |
||
712 | else
|
||
713 | { |
||
714 | for ($j = $start; $j <= $i; $j++) |
||
715 | { |
||
716 | $string .= chr(hexdec($bytes[$j])); |
||
717 | } |
||
718 | } |
||
719 | } |
||
720 | } |
||
721 | |||
722 | // If we have any bytes left over they are invalid (i.e., we are
|
||
723 | // mid-way through a multi-byte sequence)
|
||
724 | if ($remaining) |
||
725 | { |
||
726 | for ($j = $start; $j < $len; $j++) |
||
727 | { |
||
728 | $string .= '%' . strtoupper($bytes[$j]); |
||
729 | } |
||
730 | } |
||
731 | |||
732 | return $string; |
||
733 | } |
||
734 | |||
735 | protected function scheme_normalization() |
||
736 | { |
||
737 | if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) |
||
738 | { |
||
739 | $this->iuserinfo = null; |
||
740 | } |
||
741 | if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) |
||
742 | { |
||
743 | $this->ihost = null; |
||
744 | } |
||
745 | if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) |
||
746 | { |
||
747 | $this->port = null; |
||
748 | } |
||
749 | if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) |
||
750 | { |
||
751 | $this->ipath = ''; |
||
752 | } |
||
753 | if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) |
||
754 | { |
||
755 | $this->iquery = null; |
||
756 | } |
||
757 | if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) |
||
758 | { |
||
759 | $this->ifragment = null; |
||
760 | } |
||
761 | } |
||
762 | |||
763 | /**
|
||
764 | * Check if the object represents a valid IRI. This needs to be done on each
|
||
765 | * call as some things change depending on another part of the IRI.
|
||
766 | *
|
||
767 | * @return bool
|
||
768 | */
|
||
769 | public function is_valid() |
||
770 | { |
||
771 | $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null; |
||
772 | if ($this->ipath !== '' && |
||
773 | ( |
||
774 | $isauthority && (
|
||
775 | $this->ipath[0] !== '/' || |
||
776 | substr($this->ipath, 0, 2) === '//' |
||
777 | ) || |
||
778 | ( |
||
779 | $this->scheme === null && |
||
780 | !$isauthority &&
|
||
781 | strpos($this->ipath, ':') !== false && |
||
782 | (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/')) |
||
783 | ) |
||
784 | ) |
||
785 | ) |
||
786 | { |
||
787 | return false; |
||
788 | } |
||
789 | |||
790 | return true; |
||
791 | } |
||
792 | |||
793 | /**
|
||
794 | * Set the entire IRI. Returns true on success, false on failure (if there
|
||
795 | * are any invalid characters).
|
||
796 | *
|
||
797 | * @param string $iri
|
||
798 | * @return bool
|
||
799 | */
|
||
800 | public function set_iri($iri) |
||
801 | { |
||
802 | static $cache; |
||
803 | if (!$cache) |
||
804 | { |
||
805 | $cache = array(); |
||
806 | } |
||
807 | |||
808 | if ($iri === null) |
||
809 | { |
||
810 | return true; |
||
811 | } |
||
812 | elseif (isset($cache[$iri])) |
||
813 | { |
||
814 | list($this->scheme, |
||
815 | $this->iuserinfo,
|
||
816 | $this->ihost,
|
||
817 | $this->port,
|
||
818 | $this->ipath,
|
||
819 | $this->iquery,
|
||
820 | $this->ifragment,
|
||
821 | $return) = $cache[$iri]; |
||
822 | return $return; |
||
823 | } |
||
824 | else
|
||
825 | { |
||
826 | $parsed = $this->parse_iri((string) $iri); |
||
827 | if (!$parsed) |
||
828 | { |
||
829 | return false; |
||
830 | } |
||
831 | |||
832 | $return = $this->set_scheme($parsed['scheme']) |
||
833 | && $this->set_authority($parsed['authority']) |
||
834 | && $this->set_path($parsed['path']) |
||
835 | && $this->set_query($parsed['query']) |
||
836 | && $this->set_fragment($parsed['fragment']); |
||
837 | |||
838 | $cache[$iri] = array($this->scheme, |
||
839 | $this->iuserinfo,
|
||
840 | $this->ihost,
|
||
841 | $this->port,
|
||
842 | $this->ipath,
|
||
843 | $this->iquery,
|
||
844 | $this->ifragment,
|
||
845 | $return);
|
||
846 | return $return; |
||
847 | } |
||
848 | } |
||
849 | |||
850 | /**
|
||
851 | * Set the scheme. Returns true on success, false on failure (if there are
|
||
852 | * any invalid characters).
|
||
853 | *
|
||
854 | * @param string $scheme
|
||
855 | * @return bool
|
||
856 | */
|
||
857 | public function set_scheme($scheme) |
||
858 | { |
||
859 | if ($scheme === null) |
||
860 | { |
||
861 | $this->scheme = null; |
||
862 | } |
||
863 | elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) |
||
864 | { |
||
865 | $this->scheme = null; |
||
866 | return false; |
||
867 | } |
||
868 | else
|
||
869 | { |
||
870 | $this->scheme = strtolower($scheme); |
||
871 | } |
||
872 | return true; |
||
873 | } |
||
874 | |||
875 | /**
|
||
876 | * Set the authority. Returns true on success, false on failure (if there are
|
||
877 | * any invalid characters).
|
||
878 | *
|
||
879 | * @param string $authority
|
||
880 | * @return bool
|
||
881 | */
|
||
882 | public function set_authority($authority) |
||
883 | { |
||
884 | static $cache; |
||
885 | if (!$cache) |
||
886 | $cache = array(); |
||
887 | |||
888 | if ($authority === null) |
||
889 | { |
||
890 | $this->iuserinfo = null; |
||
891 | $this->ihost = null; |
||
892 | $this->port = null; |
||
893 | return true; |
||
894 | } |
||
895 | elseif (isset($cache[$authority])) |
||
896 | { |
||
897 | list($this->iuserinfo, |
||
898 | $this->ihost,
|
||
899 | $this->port,
|
||
900 | $return) = $cache[$authority]; |
||
901 | |||
902 | return $return; |
||
903 | } |
||
904 | else
|
||
905 | { |
||
906 | $remaining = $authority; |
||
907 | if (($iuserinfo_end = strrpos($remaining, '@')) !== false) |
||
908 | { |
||
909 | $iuserinfo = substr($remaining, 0, $iuserinfo_end); |
||
910 | $remaining = substr($remaining, $iuserinfo_end + 1); |
||
911 | } |
||
912 | else
|
||
913 | { |
||
914 | $iuserinfo = null; |
||
915 | } |
||
916 | if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false) |
||
917 | { |
||
918 | if (($port = substr($remaining, $port_start + 1)) === false) |
||
919 | { |
||
920 | $port = null; |
||
921 | } |
||
922 | $remaining = substr($remaining, 0, $port_start); |
||
923 | } |
||
924 | else
|
||
925 | { |
||
926 | $port = null; |
||
927 | } |
||
928 | |||
929 | $return = $this->set_userinfo($iuserinfo) && |
||
930 | $this->set_host($remaining) && |
||
931 | $this->set_port($port); |
||
932 | |||
933 | $cache[$authority] = array($this->iuserinfo, |
||
934 | $this->ihost,
|
||
935 | $this->port,
|
||
936 | $return);
|
||
937 | |||
938 | return $return; |
||
939 | } |
||
940 | } |
||
941 | |||
942 | /**
|
||
943 | * Set the iuserinfo.
|
||
944 | *
|
||
945 | * @param string $iuserinfo
|
||
946 | * @return bool
|
||
947 | */
|
||
948 | public function set_userinfo($iuserinfo) |
||
949 | { |
||
950 | if ($iuserinfo === null) |
||
951 | { |
||
952 | $this->iuserinfo = null; |
||
953 | } |
||
954 | else
|
||
955 | { |
||
956 | $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:'); |
||
957 | $this->scheme_normalization();
|
||
958 | } |
||
959 | |||
960 | return true; |
||
961 | } |
||
962 | |||
963 | /**
|
||
964 | * Set the ihost. Returns true on success, false on failure (if there are
|
||
965 | * any invalid characters).
|
||
966 | *
|
||
967 | * @param string $ihost
|
||
968 | * @return bool
|
||
969 | */
|
||
970 | public function set_host($ihost) |
||
971 | { |
||
972 | if ($ihost === null) |
||
973 | { |
||
974 | $this->ihost = null; |
||
975 | return true; |
||
976 | } |
||
977 | elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') |
||
978 | { |
||
979 | if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1))) |
||
980 | { |
||
981 | $this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']'; |
||
982 | } |
||
983 | else
|
||
984 | { |
||
985 | $this->ihost = null; |
||
986 | return false; |
||
987 | } |
||
988 | } |
||
989 | else
|
||
990 | { |
||
991 | $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;='); |
||
992 | |||
993 | // Lowercase, but ignore pct-encoded sections (as they should
|
||
994 | // remain uppercase). This must be done after the previous step
|
||
995 | // as that can add unescaped characters.
|
||
996 | $position = 0; |
||
997 | $strlen = strlen($ihost); |
||
998 | while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) |
||
999 | { |
||
1000 | if ($ihost[$position] === '%') |
||
1001 | { |
||
1002 | $position += 3; |
||
1003 | } |
||
1004 | else
|
||
1005 | { |
||
1006 | $ihost[$position] = strtolower($ihost[$position]); |
||
1007 | $position++;
|
||
1008 | } |
||
1009 | } |
||
1010 | |||
1011 | $this->ihost = $ihost; |
||
1012 | } |
||
1013 | |||
1014 | $this->scheme_normalization();
|
||
1015 | |||
1016 | return true; |
||
1017 | } |
||
1018 | |||
1019 | /**
|
||
1020 | * Set the port. Returns true on success, false on failure (if there are
|
||
1021 | * any invalid characters).
|
||
1022 | *
|
||
1023 | * @param string $port
|
||
1024 | * @return bool
|
||
1025 | */
|
||
1026 | public function set_port($port) |
||
1027 | { |
||
1028 | if ($port === null) |
||
1029 | { |
||
1030 | $this->port = null; |
||
1031 | return true; |
||
1032 | } |
||
1033 | elseif (strspn($port, '0123456789') === strlen($port)) |
||
1034 | { |
||
1035 | $this->port = (int) $port; |
||
1036 | $this->scheme_normalization();
|
||
1037 | return true; |
||
1038 | } |
||
1039 | else
|
||
1040 | { |
||
1041 | $this->port = null; |
||
1042 | return false; |
||
1043 | } |
||
1044 | } |
||
1045 | |||
1046 | /**
|
||
1047 | * Set the ipath.
|
||
1048 | *
|
||
1049 | * @param string $ipath
|
||
1050 | * @return bool
|
||
1051 | */
|
||
1052 | public function set_path($ipath) |
||
1053 | { |
||
1054 | static $cache; |
||
1055 | if (!$cache) |
||
1056 | { |
||
1057 | $cache = array(); |
||
1058 | } |
||
1059 | |||
1060 | $ipath = (string) $ipath; |
||
1061 | |||
1062 | if (isset($cache[$ipath])) |
||
1063 | { |
||
1064 | $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)]; |
||
1065 | } |
||
1066 | else
|
||
1067 | { |
||
1068 | $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/'); |
||
1069 | $removed = $this->remove_dot_segments($valid); |
||
1070 | |||
1071 | $cache[$ipath] = array($valid, $removed); |
||
1072 | $this->ipath = ($this->scheme !== null) ? $removed : $valid; |
||
1073 | } |
||
1074 | |||
1075 | $this->scheme_normalization();
|
||
1076 | return true; |
||
1077 | } |
||
1078 | |||
1079 | /**
|
||
1080 | * Set the iquery.
|
||
1081 | *
|
||
1082 | * @param string $iquery
|
||
1083 | * @return bool
|
||
1084 | */
|
||
1085 | public function set_query($iquery) |
||
1086 | { |
||
1087 | if ($iquery === null) |
||
1088 | { |
||
1089 | $this->iquery = null; |
||
1090 | } |
||
1091 | else
|
||
1092 | { |
||
1093 | $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true); |
||
1094 | $this->scheme_normalization();
|
||
1095 | } |
||
1096 | return true; |
||
1097 | } |
||
1098 | |||
1099 | /**
|
||
1100 | * Set the ifragment.
|
||
1101 | *
|
||
1102 | * @param string $ifragment
|
||
1103 | * @return bool
|
||
1104 | */
|
||
1105 | public function set_fragment($ifragment) |
||
1106 | { |
||
1107 | if ($ifragment === null) |
||
1108 | { |
||
1109 | $this->ifragment = null; |
||
1110 | } |
||
1111 | else
|
||
1112 | { |
||
1113 | $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?'); |
||
1114 | $this->scheme_normalization();
|
||
1115 | } |
||
1116 | return true; |
||
1117 | } |
||
1118 | |||
1119 | /**
|
||
1120 | * Convert an IRI to a URI (or parts thereof)
|
||
1121 | *
|
||
1122 | * @return string
|
||
1123 | */
|
||
1124 | public function to_uri($string) |
||
1125 | { |
||
1126 | static $non_ascii; |
||
1127 | if (!$non_ascii) |
||
1128 | { |
||
1129 | $non_ascii = implode('', range("\x80", "\xFF")); |
||
1130 | } |
||
1131 | |||
1132 | $position = 0; |
||
1133 | $strlen = strlen($string); |
||
1134 | while (($position += strcspn($string, $non_ascii, $position)) < $strlen) |
||
1135 | { |
||
1136 | $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1); |
||
1137 | $position += 3; |
||
1138 | $strlen += 2; |
||
1139 | } |
||
1140 | |||
1141 | return $string; |
||
1142 | } |
||
1143 | |||
1144 | /**
|
||
1145 | * Get the complete IRI
|
||
1146 | *
|
||
1147 | * @return string
|
||
1148 | */
|
||
1149 | public function get_iri() |
||
1150 | { |
||
1151 | if (!$this->is_valid()) |
||
1152 | { |
||
1153 | return false; |
||
1154 | } |
||
1155 | |||
1156 | $iri = ''; |
||
1157 | if ($this->scheme !== null) |
||
1158 | { |
||
1159 | $iri .= $this->scheme . ':'; |
||
1160 | } |
||
1161 | if (($iauthority = $this->get_iauthority()) !== null) |
||
1162 | { |
||
1163 | $iri .= '//' . $iauthority; |
||
1164 | } |
||
1165 | if ($this->ipath !== '') |
||
1166 | { |
||
1167 | $iri .= $this->ipath; |
||
1168 | } |
||
1169 | elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') |
||
1170 | { |
||
1171 | $iri .= $this->normalization[$this->scheme]['ipath']; |
||
1172 | } |
||
1173 | if ($this->iquery !== null) |
||
1174 | { |
||
1175 | $iri .= '?' . $this->iquery; |
||
1176 | } |
||
1177 | if ($this->ifragment !== null) |
||
1178 | { |
||
1179 | $iri .= '#' . $this->ifragment; |
||
1180 | } |
||
1181 | |||
1182 | return $iri; |
||
1183 | } |
||
1184 | |||
1185 | /**
|
||
1186 | * Get the complete URI
|
||
1187 | *
|
||
1188 | * @return string
|
||
1189 | */
|
||
1190 | public function get_uri() |
||
1191 | { |
||
1192 | return $this->to_uri($this->get_iri()); |
||
1193 | } |
||
1194 | |||
1195 | /**
|
||
1196 | * Get the complete iauthority
|
||
1197 | *
|
||
1198 | * @return string
|
||
1199 | */
|
||
1200 | protected function get_iauthority() |
||
1201 | { |
||
1202 | if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) |
||
1203 | { |
||
1204 | $iauthority = ''; |
||
1205 | if ($this->iuserinfo !== null) |
||
1206 | { |
||
1207 | $iauthority .= $this->iuserinfo . '@'; |
||
1208 | } |
||
1209 | if ($this->ihost !== null) |
||
1210 | { |
||
1211 | $iauthority .= $this->ihost; |
||
1212 | } |
||
1213 | if ($this->port !== null) |
||
1214 | { |
||
1215 | $iauthority .= ':' . $this->port; |
||
1216 | } |
||
1217 | return $iauthority; |
||
1218 | } |
||
1219 | else
|
||
1220 | { |
||
1221 | return null; |
||
1222 | } |
||
1223 | } |
||
1224 | |||
1225 | /**
|
||
1226 | * Get the complete authority
|
||
1227 | *
|
||
1228 | * @return string
|
||
1229 | */
|
||
1230 | protected function get_authority() |
||
1231 | { |
||
1232 | $iauthority = $this->get_iauthority(); |
||
1233 | if (is_string($iauthority)) |
||
1234 | return $this->to_uri($iauthority); |
||
1235 | else
|
||
1236 | return $iauthority; |
||
1237 | } |
||
1238 | } |