root / drupal7 / sites / all / libraries / simplepie / library / SimplePie / Content / Type / Sniffer.php @ 41cc1b08
1 |
<?php
|
---|---|
2 |
/**
|
3 |
* SimplePie
|
4 |
*
|
5 |
* A PHP-Based RSS and Atom Feed Framework.
|
6 |
* Takes the hard work out of managing a complete RSS/Atom solution.
|
7 |
*
|
8 |
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
|
9 |
* All rights reserved.
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without modification, are
|
12 |
* permitted provided that the following conditions are met:
|
13 |
*
|
14 |
* * Redistributions of source code must retain the above copyright notice, this list of
|
15 |
* conditions and the following disclaimer.
|
16 |
*
|
17 |
* * Redistributions in binary form must reproduce the above copyright notice, this list
|
18 |
* of conditions and the following disclaimer in the documentation and/or other materials
|
19 |
* provided with the distribution.
|
20 |
*
|
21 |
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
|
22 |
* to endorse or promote products derived from this software without specific prior
|
23 |
* written permission.
|
24 |
*
|
25 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
26 |
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
27 |
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
|
28 |
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
29 |
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
30 |
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
31 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
32 |
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
33 |
* POSSIBILITY OF SUCH DAMAGE.
|
34 |
*
|
35 |
* @package SimplePie
|
36 |
* @version 1.3.1
|
37 |
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
|
38 |
* @author Ryan Parman
|
39 |
* @author Geoffrey Sneddon
|
40 |
* @author Ryan McCue
|
41 |
* @link http://simplepie.org/ SimplePie
|
42 |
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
|
43 |
*/
|
44 |
|
45 |
|
46 |
/**
|
47 |
* Content-type sniffing
|
48 |
*
|
49 |
* Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06
|
50 |
*
|
51 |
* This is used since we can't always trust Content-Type headers, and is based
|
52 |
* upon the HTML5 parsing rules.
|
53 |
*
|
54 |
*
|
55 |
* This class can be overloaded with {@see SimplePie::set_content_type_sniffer_class()}
|
56 |
*
|
57 |
* @package SimplePie
|
58 |
* @subpackage HTTP
|
59 |
*/
|
60 |
class SimplePie_Content_Type_Sniffer |
61 |
{ |
62 |
/**
|
63 |
* File object
|
64 |
*
|
65 |
* @var SimplePie_File
|
66 |
*/
|
67 |
var $file; |
68 |
|
69 |
/**
|
70 |
* Create an instance of the class with the input file
|
71 |
*
|
72 |
* @param SimplePie_Content_Type_Sniffer $file Input file
|
73 |
*/
|
74 |
public function __construct($file) |
75 |
{ |
76 |
$this->file = $file; |
77 |
} |
78 |
|
79 |
/**
|
80 |
* Get the Content-Type of the specified file
|
81 |
*
|
82 |
* @return string Actual Content-Type
|
83 |
*/
|
84 |
public function get_type() |
85 |
{ |
86 |
if (isset($this->file->headers['content-type'])) |
87 |
{ |
88 |
if (!isset($this->file->headers['content-encoding']) |
89 |
&& ($this->file->headers['content-type'] === 'text/plain' |
90 |
|| $this->file->headers['content-type'] === 'text/plain; charset=ISO-8859-1' |
91 |
|| $this->file->headers['content-type'] === 'text/plain; charset=iso-8859-1' |
92 |
|| $this->file->headers['content-type'] === 'text/plain; charset=UTF-8')) |
93 |
{ |
94 |
return $this->text_or_binary(); |
95 |
} |
96 |
|
97 |
if (($pos = strpos($this->file->headers['content-type'], ';')) !== false) |
98 |
{ |
99 |
$official = substr($this->file->headers['content-type'], 0, $pos); |
100 |
} |
101 |
else
|
102 |
{ |
103 |
$official = $this->file->headers['content-type']; |
104 |
} |
105 |
$official = trim(strtolower($official)); |
106 |
|
107 |
if ($official === 'unknown/unknown' |
108 |
|| $official === 'application/unknown') |
109 |
{ |
110 |
return $this->unknown(); |
111 |
} |
112 |
elseif (substr($official, -4) === '+xml' |
113 |
|| $official === 'text/xml' |
114 |
|| $official === 'application/xml') |
115 |
{ |
116 |
return $official; |
117 |
} |
118 |
elseif (substr($official, 0, 6) === 'image/') |
119 |
{ |
120 |
if ($return = $this->image()) |
121 |
{ |
122 |
return $return; |
123 |
} |
124 |
else
|
125 |
{ |
126 |
return $official; |
127 |
} |
128 |
} |
129 |
elseif ($official === 'text/html') |
130 |
{ |
131 |
return $this->feed_or_html(); |
132 |
} |
133 |
else
|
134 |
{ |
135 |
return $official; |
136 |
} |
137 |
} |
138 |
else
|
139 |
{ |
140 |
return $this->unknown(); |
141 |
} |
142 |
} |
143 |
|
144 |
/**
|
145 |
* Sniff text or binary
|
146 |
*
|
147 |
* @return string Actual Content-Type
|
148 |
*/
|
149 |
public function text_or_binary() |
150 |
{ |
151 |
if (substr($this->file->body, 0, 2) === "\xFE\xFF" |
152 |
|| substr($this->file->body, 0, 2) === "\xFF\xFE" |
153 |
|| substr($this->file->body, 0, 4) === "\x00\x00\xFE\xFF" |
154 |
|| substr($this->file->body, 0, 3) === "\xEF\xBB\xBF") |
155 |
{ |
156 |
return 'text/plain'; |
157 |
} |
158 |
elseif (preg_match('/[\x00-\x08\x0E-\x1A\x1C-\x1F]/', $this->file->body)) |
159 |
{ |
160 |
return 'application/octect-stream'; |
161 |
} |
162 |
else
|
163 |
{ |
164 |
return 'text/plain'; |
165 |
} |
166 |
} |
167 |
|
168 |
/**
|
169 |
* Sniff unknown
|
170 |
*
|
171 |
* @return string Actual Content-Type
|
172 |
*/
|
173 |
public function unknown() |
174 |
{ |
175 |
$ws = strspn($this->file->body, "\x09\x0A\x0B\x0C\x0D\x20"); |
176 |
if (strtolower(substr($this->file->body, $ws, 14)) === '<!doctype html' |
177 |
|| strtolower(substr($this->file->body, $ws, 5)) === '<html' |
178 |
|| strtolower(substr($this->file->body, $ws, 7)) === '<script') |
179 |
{ |
180 |
return 'text/html'; |
181 |
} |
182 |
elseif (substr($this->file->body, 0, 5) === '%PDF-') |
183 |
{ |
184 |
return 'application/pdf'; |
185 |
} |
186 |
elseif (substr($this->file->body, 0, 11) === '%!PS-Adobe-') |
187 |
{ |
188 |
return 'application/postscript'; |
189 |
} |
190 |
elseif (substr($this->file->body, 0, 6) === 'GIF87a' |
191 |
|| substr($this->file->body, 0, 6) === 'GIF89a') |
192 |
{ |
193 |
return 'image/gif'; |
194 |
} |
195 |
elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A") |
196 |
{ |
197 |
return 'image/png'; |
198 |
} |
199 |
elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF") |
200 |
{ |
201 |
return 'image/jpeg'; |
202 |
} |
203 |
elseif (substr($this->file->body, 0, 2) === "\x42\x4D") |
204 |
{ |
205 |
return 'image/bmp'; |
206 |
} |
207 |
elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00") |
208 |
{ |
209 |
return 'image/vnd.microsoft.icon'; |
210 |
} |
211 |
else
|
212 |
{ |
213 |
return $this->text_or_binary(); |
214 |
} |
215 |
} |
216 |
|
217 |
/**
|
218 |
* Sniff images
|
219 |
*
|
220 |
* @return string Actual Content-Type
|
221 |
*/
|
222 |
public function image() |
223 |
{ |
224 |
if (substr($this->file->body, 0, 6) === 'GIF87a' |
225 |
|| substr($this->file->body, 0, 6) === 'GIF89a') |
226 |
{ |
227 |
return 'image/gif'; |
228 |
} |
229 |
elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A") |
230 |
{ |
231 |
return 'image/png'; |
232 |
} |
233 |
elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF") |
234 |
{ |
235 |
return 'image/jpeg'; |
236 |
} |
237 |
elseif (substr($this->file->body, 0, 2) === "\x42\x4D") |
238 |
{ |
239 |
return 'image/bmp'; |
240 |
} |
241 |
elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00") |
242 |
{ |
243 |
return 'image/vnd.microsoft.icon'; |
244 |
} |
245 |
else
|
246 |
{ |
247 |
return false; |
248 |
} |
249 |
} |
250 |
|
251 |
/**
|
252 |
* Sniff HTML
|
253 |
*
|
254 |
* @return string Actual Content-Type
|
255 |
*/
|
256 |
public function feed_or_html() |
257 |
{ |
258 |
$len = strlen($this->file->body); |
259 |
$pos = strspn($this->file->body, "\x09\x0A\x0D\x20"); |
260 |
|
261 |
while ($pos < $len) |
262 |
{ |
263 |
switch ($this->file->body[$pos]) |
264 |
{ |
265 |
case "\x09": |
266 |
case "\x0A": |
267 |
case "\x0D": |
268 |
case "\x20": |
269 |
$pos += strspn($this->file->body, "\x09\x0A\x0D\x20", $pos); |
270 |
continue 2; |
271 |
|
272 |
case '<': |
273 |
$pos++;
|
274 |
break;
|
275 |
|
276 |
default:
|
277 |
return 'text/html'; |
278 |
} |
279 |
|
280 |
if (substr($this->file->body, $pos, 3) === '!--') |
281 |
{ |
282 |
$pos += 3; |
283 |
if ($pos < $len && ($pos = strpos($this->file->body, '-->', $pos)) !== false) |
284 |
{ |
285 |
$pos += 3; |
286 |
} |
287 |
else
|
288 |
{ |
289 |
return 'text/html'; |
290 |
} |
291 |
} |
292 |
elseif (substr($this->file->body, $pos, 1) === '!') |
293 |
{ |
294 |
if ($pos < $len && ($pos = strpos($this->file->body, '>', $pos)) !== false) |
295 |
{ |
296 |
$pos++;
|
297 |
} |
298 |
else
|
299 |
{ |
300 |
return 'text/html'; |
301 |
} |
302 |
} |
303 |
elseif (substr($this->file->body, $pos, 1) === '?') |
304 |
{ |
305 |
if ($pos < $len && ($pos = strpos($this->file->body, '?>', $pos)) !== false) |
306 |
{ |
307 |
$pos += 2; |
308 |
} |
309 |
else
|
310 |
{ |
311 |
return 'text/html'; |
312 |
} |
313 |
} |
314 |
elseif (substr($this->file->body, $pos, 3) === 'rss' |
315 |
|| substr($this->file->body, $pos, 7) === 'rdf:RDF') |
316 |
{ |
317 |
return 'application/rss+xml'; |
318 |
} |
319 |
elseif (substr($this->file->body, $pos, 4) === 'feed') |
320 |
{ |
321 |
return 'application/atom+xml'; |
322 |
} |
323 |
else
|
324 |
{ |
325 |
return 'text/html'; |
326 |
} |
327 |
} |
328 |
|
329 |
return 'text/html'; |
330 |
} |
331 |
} |
332 |
|