Projet

Général

Profil

Paste
Télécharger (15,1 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / libraries / simplepie / library / SimplePie / Sanitize.php @ 41cc1b08

1
<?php
2
/**
3
 * SimplePie
4
 *
5
 * A PHP-Based RSS and Atom Feed Framework.
6
 * Takes the hard work out of managing a complete RSS/Atom solution.
7
 *
8
 * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
9
 * All rights reserved.
10
 *
11
 * Redistribution and use in source and binary forms, with or without modification, are
12
 * permitted provided that the following conditions are met:
13
 *
14
 *         * Redistributions of source code must retain the above copyright notice, this list of
15
 *           conditions and the following disclaimer.
16
 *
17
 *         * Redistributions in binary form must reproduce the above copyright notice, this list
18
 *           of conditions and the following disclaimer in the documentation and/or other materials
19
 *           provided with the distribution.
20
 *
21
 *         * Neither the name of the SimplePie Team nor the names of its contributors may be used
22
 *           to endorse or promote products derived from this software without specific prior
23
 *           written permission.
24
 *
25
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28
 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33
 * POSSIBILITY OF SUCH DAMAGE.
34
 *
35
 * @package SimplePie
36
 * @version 1.3.1
37
 * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
38
 * @author Ryan Parman
39
 * @author Geoffrey Sneddon
40
 * @author Ryan McCue
41
 * @link http://simplepie.org/ SimplePie
42
 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43
 */
44

    
45
/**
46
 * Used for data cleanup and post-processing
47
 *
48
 *
49
 * This class can be overloaded with {@see SimplePie::set_sanitize_class()}
50
 *
51
 * @package SimplePie
52
 * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
53
 */
54
class SimplePie_Sanitize
55
{
56
        // Private vars
57
        var $base;
58

    
59
        // Options
60
        var $remove_div = true;
61
        var $image_handler = '';
62
        var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
63
        var $encode_instead_of_strip = false;
64
        var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
65
        var $strip_comments = false;
66
        var $output_encoding = 'UTF-8';
67
        var $enable_cache = true;
68
        var $cache_location = './cache';
69
        var $cache_name_function = 'md5';
70
        var $timeout = 10;
71
        var $useragent = '';
72
        var $force_fsockopen = false;
73
        var $replace_url_attributes = null;
74

    
75
        public function __construct()
76
        {
77
                // Set defaults
78
                $this->set_url_replacements(null);
79
        }
80

    
81
        public function remove_div($enable = true)
82
        {
83
                $this->remove_div = (bool) $enable;
84
        }
85

    
86
        public function set_image_handler($page = false)
87
        {
88
                if ($page)
89
                {
90
                        $this->image_handler = (string) $page;
91
                }
92
                else
93
                {
94
                        $this->image_handler = false;
95
                }
96
        }
97

    
98
        public function set_registry(SimplePie_Registry $registry)
99
        {
100
                $this->registry = $registry;
101
        }
102

    
103
        public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
104
        {
105
                if (isset($enable_cache))
106
                {
107
                        $this->enable_cache = (bool) $enable_cache;
108
                }
109

    
110
                if ($cache_location)
111
                {
112
                        $this->cache_location = (string) $cache_location;
113
                }
114

    
115
                if ($cache_name_function)
116
                {
117
                        $this->cache_name_function = (string) $cache_name_function;
118
                }
119
        }
120

    
121
        public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
122
        {
123
                if ($timeout)
124
                {
125
                        $this->timeout = (string) $timeout;
126
                }
127

    
128
                if ($useragent)
129
                {
130
                        $this->useragent = (string) $useragent;
131
                }
132

    
133
                if ($force_fsockopen)
134
                {
135
                        $this->force_fsockopen = (string) $force_fsockopen;
136
                }
137
        }
138

    
139
        public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
140
        {
141
                if ($tags)
142
                {
143
                        if (is_array($tags))
144
                        {
145
                                $this->strip_htmltags = $tags;
146
                        }
147
                        else
148
                        {
149
                                $this->strip_htmltags = explode(',', $tags);
150
                        }
151
                }
152
                else
153
                {
154
                        $this->strip_htmltags = false;
155
                }
156
        }
157

    
158
        public function encode_instead_of_strip($encode = false)
159
        {
160
                $this->encode_instead_of_strip = (bool) $encode;
161
        }
162

    
163
        public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
164
        {
165
                if ($attribs)
166
                {
167
                        if (is_array($attribs))
168
                        {
169
                                $this->strip_attributes = $attribs;
170
                        }
171
                        else
172
                        {
173
                                $this->strip_attributes = explode(',', $attribs);
174
                        }
175
                }
176
                else
177
                {
178
                        $this->strip_attributes = false;
179
                }
180
        }
181

    
182
        public function strip_comments($strip = false)
183
        {
184
                $this->strip_comments = (bool) $strip;
185
        }
186

    
187
        public function set_output_encoding($encoding = 'UTF-8')
188
        {
189
                $this->output_encoding = (string) $encoding;
190
        }
191

    
192
        /**
193
         * Set element/attribute key/value pairs of HTML attributes
194
         * containing URLs that need to be resolved relative to the feed
195
         *
196
         * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite,
197
         * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite,
198
         * |q|@cite
199
         *
200
         * @since 1.0
201
         * @param array|null $element_attribute Element/attribute key/value pairs, null for default
202
         */
203
        public function set_url_replacements($element_attribute = null)
204
        {
205
                if ($element_attribute === null)
206
                {
207
                        $element_attribute = array(
208
                                'a' => 'href',
209
                                'area' => 'href',
210
                                'blockquote' => 'cite',
211
                                'del' => 'cite',
212
                                'form' => 'action',
213
                                'img' => array(
214
                                        'longdesc',
215
                                        'src'
216
                                ),
217
                                'input' => 'src',
218
                                'ins' => 'cite',
219
                                'q' => 'cite'
220
                        );
221
                }
222
                $this->replace_url_attributes = (array) $element_attribute;
223
        }
224

    
225
        public function sanitize($data, $type, $base = '')
226
        {
227
                $data = trim($data);
228
                if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
229
                {
230
                        if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
231
                        {
232
                                if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
233
                                {
234
                                        $type |= SIMPLEPIE_CONSTRUCT_HTML;
235
                                }
236
                                else
237
                                {
238
                                        $type |= SIMPLEPIE_CONSTRUCT_TEXT;
239
                                }
240
                        }
241

    
242
                        if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
243
                        {
244
                                $data = base64_decode($data);
245
                        }
246

    
247
                        if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
248
                        {
249

    
250
                                $document = new DOMDocument();
251
                                $document->encoding = 'UTF-8';
252
                                $data = $this->preprocess($data, $type);
253

    
254
                                set_error_handler(array('SimplePie_Misc', 'silence_errors'));
255
                                $document->loadHTML($data);
256
                                restore_error_handler();
257

    
258
                                // Strip comments
259
                                if ($this->strip_comments)
260
                                {
261
                                        $xpath = new DOMXPath($document);
262
                                        $comments = $xpath->query('//comment()');
263

    
264
                                        foreach ($comments as $comment)
265
                                        {
266
                                                $comment->parentNode->removeChild($comment);
267
                                        }
268
                                }
269

    
270
                                // Strip out HTML tags and attributes that might cause various security problems.
271
                                // Based on recommendations by Mark Pilgrim at:
272
                                // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
273
                                if ($this->strip_htmltags)
274
                                {
275
                                        foreach ($this->strip_htmltags as $tag)
276
                                        {
277
                                                $this->strip_tag($tag, $document, $type);
278
                                        }
279
                                }
280

    
281
                                if ($this->strip_attributes)
282
                                {
283
                                        foreach ($this->strip_attributes as $attrib)
284
                                        {
285
                                                $this->strip_attr($attrib, $document);
286
                                        }
287
                                }
288

    
289
                                // Replace relative URLs
290
                                $this->base = $base;
291
                                foreach ($this->replace_url_attributes as $element => $attributes)
292
                                {
293
                                        $this->replace_urls($document, $element, $attributes);
294
                                }
295

    
296
                                // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
297
                                if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
298
                                {
299
                                        $images = $document->getElementsByTagName('img');
300
                                        foreach ($images as $img)
301
                                        {
302
                                                if ($img->hasAttribute('src'))
303
                                                {
304
                                                        $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
305
                                                        $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));
306

    
307
                                                        if ($cache->load())
308
                                                        {
309
                                                                $img->setAttribute('src', $this->image_handler . $image_url);
310
                                                        }
311
                                                        else
312
                                                        {
313
                                                                $file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
314
                                                                $headers = $file->headers;
315

    
316
                                                                if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
317
                                                                {
318
                                                                        if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
319
                                                                        {
320
                                                                                $img->setAttribute('src', $this->image_handler . $image_url);
321
                                                                        }
322
                                                                        else
323
                                                                        {
324
                                                                                trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
325
                                                                        }
326
                                                                }
327
                                                        }
328
                                                }
329
                                        }
330
                                }
331

    
332
                                // Remove the DOCTYPE
333
                                // Seems to cause segfaulting if we don't do this
334
                                if ($document->firstChild instanceof DOMDocumentType)
335
                                {
336
                                        $document->removeChild($document->firstChild);
337
                                }
338

    
339
                                // Move everything from the body to the root
340
                                $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
341
                                $document->replaceChild($real_body, $document->firstChild);
342

    
343
                                // Finally, convert to a HTML string
344
                                $data = trim($document->saveHTML());
345

    
346
                                if ($this->remove_div)
347
                                {
348
                                        $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
349
                                        $data = preg_replace('/<\/div>$/', '', $data);
350
                                }
351
                                else
352
                                {
353
                                        $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
354
                                }
355
                        }
356

    
357
                        if ($type & SIMPLEPIE_CONSTRUCT_IRI)
358
                        {
359
                                $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
360
                                if ($absolute !== false)
361
                                {
362
                                        $data = $absolute;
363
                                }
364
                        }
365

    
366
                        if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
367
                        {
368
                                $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
369
                        }
370

    
371
                        if ($this->output_encoding !== 'UTF-8')
372
                        {
373
                                $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
374
                        }
375
                }
376
                return $data;
377
        }
378

    
379
        protected function preprocess($html, $type)
380
        {
381
                $ret = '';
382
                if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
383
                {
384
                        // Atom XHTML constructs are wrapped with a div by default
385
                        // Note: No protection if $html contains a stray </div>!
386
                        $html = '<div>' . $html . '</div>';
387
                        $ret .= '<!DOCTYPE html>';
388
                        $content_type = 'text/html';
389
                }
390
                else
391
                {
392
                        $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
393
                        $content_type = 'application/xhtml+xml';
394
                }
395

    
396
                $ret .= '<html><head>';
397
                $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
398
                $ret .= '</head><body>' . $html . '</body></html>';
399
                return $ret;
400
        }
401

    
402
        public function replace_urls($document, $tag, $attributes)
403
        {
404
                if (!is_array($attributes))
405
                {
406
                        $attributes = array($attributes);
407
                }
408

    
409
                if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
410
                {
411
                        $elements = $document->getElementsByTagName($tag);
412
                        foreach ($elements as $element)
413
                        {
414
                                foreach ($attributes as $attribute)
415
                                {
416
                                        if ($element->hasAttribute($attribute))
417
                                        {
418
                                                $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
419
                                                if ($value !== false)
420
                                                {
421
                                                        $element->setAttribute($attribute, $value);
422
                                                }
423
                                        }
424
                                }
425
                        }
426
                }
427
        }
428

    
429
        public function do_strip_htmltags($match)
430
        {
431
                if ($this->encode_instead_of_strip)
432
                {
433
                        if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
434
                        {
435
                                $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
436
                                $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
437
                                return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
438
                        }
439
                        else
440
                        {
441
                                return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
442
                        }
443
                }
444
                elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
445
                {
446
                        return $match[4];
447
                }
448
                else
449
                {
450
                        return '';
451
                }
452
        }
453

    
454
        protected function strip_tag($tag, $document, $type)
455
        {
456
                $xpath = new DOMXPath($document);
457
                $elements = $xpath->query('body//' . $tag);
458
                if ($this->encode_instead_of_strip)
459
                {
460
                        foreach ($elements as $element)
461
                        {
462
                                $fragment = $document->createDocumentFragment();
463

    
464
                                // For elements which aren't script or style, include the tag itself
465
                                if (!in_array($tag, array('script', 'style')))
466
                                {
467
                                        $text = '<' . $tag;
468
                                        if ($element->hasAttributes())
469
                                        {
470
                                                $attrs = array();
471
                                                foreach ($element->attributes as $name => $attr)
472
                                                {
473
                                                        $value = $attr->value;
474

    
475
                                                        // In XHTML, empty values should never exist, so we repeat the value
476
                                                        if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
477
                                                        {
478
                                                                $value = $name;
479
                                                        }
480
                                                        // For HTML, empty is fine
481
                                                        elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
482
                                                        {
483
                                                                $attrs[] = $name;
484
                                                                continue;
485
                                                        }
486

    
487
                                                        // Standard attribute text
488
                                                        $attrs[] = $name . '="' . $attr->value . '"';
489
                                                }
490
                                                $text .= ' ' . implode(' ', $attrs);
491
                                        }
492
                                        $text .= '>';
493
                                        $fragment->appendChild(new DOMText($text));
494
                                }
495

    
496
                                $number = $element->childNodes->length;
497
                                for ($i = $number; $i > 0; $i--)
498
                                {
499
                                        $child = $element->childNodes->item(0);
500
                                        $fragment->appendChild($child);
501
                                }
502

    
503
                                if (!in_array($tag, array('script', 'style')))
504
                                {
505
                                        $fragment->appendChild(new DOMText('</' . $tag . '>'));
506
                                }
507

    
508
                                $element->parentNode->replaceChild($fragment, $element);
509
                        }
510

    
511
                        return;
512
                }
513
                elseif (in_array($tag, array('script', 'style')))
514
                {
515
                        foreach ($elements as $element)
516
                        {
517
                                $element->parentNode->removeChild($element);
518
                        }
519

    
520
                        return;
521
                }
522
                else
523
                {
524
                        foreach ($elements as $element)
525
                        {
526
                                $fragment = $document->createDocumentFragment();
527
                                $number = $element->childNodes->length;
528
                                for ($i = $number; $i > 0; $i--)
529
                                {
530
                                        $child = $element->childNodes->item(0);
531
                                        $fragment->appendChild($child);
532
                                }
533

    
534
                                $element->parentNode->replaceChild($fragment, $element);
535
                        }
536
                }
537
        }
538

    
539
        protected function strip_attr($attrib, $document)
540
        {
541
                $xpath = new DOMXPath($document);
542
                $elements = $xpath->query('//*[@' . $attrib . ']');
543

    
544
                foreach ($elements as $element)
545
                {
546
                        $element->removeAttribute($attrib);
547
                }
548
        }
549
}