1
|
<?php
|
2
|
|
3
|
/**
|
4
|
* @file
|
5
|
* Download via HTTP.
|
6
|
*
|
7
|
* Support caching, HTTP Basic Authentication, detection of RSS/Atom feeds,
|
8
|
* redirects.
|
9
|
*/
|
10
|
|
11
|
/**
|
12
|
* Error code for when the URL could not be parsed.
|
13
|
*
|
14
|
* @var int
|
15
|
*/
|
16
|
define('FEEDS_ERROR_PARSE_ERROR', -1001);
|
17
|
|
18
|
/**
|
19
|
* Error code for when the scheme of an URL could not be determined.
|
20
|
*
|
21
|
* An example of a scheme is 'http'.
|
22
|
*
|
23
|
* @var int
|
24
|
*/
|
25
|
define('FEEDS_ERROR_NO_SCHEME', -1002);
|
26
|
|
27
|
/**
|
28
|
* Error code for when the scheme of an URL is not supported.
|
29
|
*
|
30
|
* @var int
|
31
|
*/
|
32
|
define('FEEDS_ERROR_INVALID_SCHEME', -1003);
|
33
|
|
34
|
/**
|
35
|
* PCRE for finding the link tags in html.
|
36
|
*/
|
37
|
define('HTTP_REQUEST_PCRE_LINK_TAG', '/<link((?:[\x09\x0A\x0B\x0C\x0D\x20]+[^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3D\x3E]*(?:[\x09\x0A\x0B\x0C\x0D\x20]*=[\x09\x0A\x0B\x0C\x0D\x20]*(?:"(?:[^"]*)"|\'(?:[^\']*)\'|(?:[^\x09\x0A\x0B\x0C\x0D\x20\x22\x27\x3E][^\x09\x0A\x0B\x0C\x0D\x20\x3E]*)?))?)*)[\x09\x0A\x0B\x0C\x0D\x20]*(>(.*)<\/link>|(\/)?>)/si');
|
38
|
|
39
|
/**
|
40
|
* PCRE for matching all the attributes in a tag.
|
41
|
*/
|
42
|
define('HTTP_REQUEST_PCRE_TAG_ATTRIBUTES', '/[\x09\x0A\x0B\x0C\x0D\x20]+([^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3D\x3E]*)(?:[\x09\x0A\x0B\x0C\x0D\x20]*=[\x09\x0A\x0B\x0C\x0D\x20]*(?:"([^"]*)"|\'([^\']*)\'|([^\x09\x0A\x0B\x0C\x0D\x20\x22\x27\x3E][^\x09\x0A\x0B\x0C\x0D\x20\x3E]*)?))?/');
|
43
|
|
44
|
/**
|
45
|
* For cUrl specific errors.
|
46
|
*/
|
47
|
class HRCurlException extends Exception {}
|
48
|
|
49
|
/**
|
50
|
* For HTTP requests that do not return a 2xx code.
|
51
|
*/
|
52
|
class FeedsHTTPRequestException extends Exception {}
|
53
|
|
54
|
/**
|
55
|
* Discovers RSS or atom feeds at the given URL.
|
56
|
*
|
57
|
* If document in given URL is an HTML document, function attempts to discover
|
58
|
* RSS or Atom feeds.
|
59
|
*
|
60
|
* @param string $url
|
61
|
* The url of the feed to retrieve.
|
62
|
* @param array $options
|
63
|
* An optional array of options.
|
64
|
* For valid options, see feeds_http_request().
|
65
|
*
|
66
|
* @return bool|string
|
67
|
* The discovered feed, or FALSE if the URL is not reachable or there was an
|
68
|
* error.
|
69
|
*/
|
70
|
function http_request_get_common_syndication($url, $options = array()) {
|
71
|
$download = feeds_http_request($url, $options);
|
72
|
|
73
|
// Cannot get the feed, return.
|
74
|
// feeds_http_request() always returns 200 even if its 304.
|
75
|
if ($download->code != 200) {
|
76
|
return FALSE;
|
77
|
}
|
78
|
|
79
|
// Drop the data into a separate variable so all manipulations of the html
|
80
|
// will not effect the actual object that exists in the static cache.
|
81
|
// @see feeds_http_request()
|
82
|
$downloaded_string = $download->data;
|
83
|
// If this happens to be a feed then just return the url.
|
84
|
if (isset($download->headers['content-type']) && http_request_is_feed($download->headers['content-type'], $downloaded_string)) {
|
85
|
return $url;
|
86
|
}
|
87
|
|
88
|
$discovered_feeds = http_request_find_feeds($downloaded_string);
|
89
|
foreach ($discovered_feeds as $feed_url) {
|
90
|
$absolute = http_request_create_absolute_url($feed_url, $url);
|
91
|
if (!empty($absolute)) {
|
92
|
// @TODO: something more intelligent?
|
93
|
return $absolute;
|
94
|
}
|
95
|
}
|
96
|
}
|
97
|
|
98
|
/**
|
99
|
* Get the content from the given URL.
|
100
|
*
|
101
|
* @param string $url
|
102
|
* A valid URL (not only web URLs).
|
103
|
* @param string $username
|
104
|
* If the URL uses authentication, supply the username.
|
105
|
* @param string $password
|
106
|
* If the URL uses authentication, supply the password.
|
107
|
* @param bool $accept_invalid_cert
|
108
|
* Whether to accept invalid certificates.
|
109
|
* @param int $timeout
|
110
|
* Timeout in seconds to wait for an HTTP get request to finish.
|
111
|
*
|
112
|
* @return object
|
113
|
* An object that describes the data downloaded from $url.
|
114
|
*/
|
115
|
function http_request_get($url, $username = NULL, $password = NULL, $accept_invalid_cert = FALSE, $timeout = NULL) {
|
116
|
return feeds_http_request($url, array(
|
117
|
'username' => $username,
|
118
|
'password' => $password,
|
119
|
'accept_invalid_cert' => $accept_invalid_cert,
|
120
|
'timeout' => $timeout,
|
121
|
));
|
122
|
}
|
123
|
|
124
|
/**
|
125
|
* Get the content from the given URL.
|
126
|
*
|
127
|
* @param string $url
|
128
|
* A valid URL (not only web URLs).
|
129
|
* @param array $options
|
130
|
* (optional) An array that can have one or more of the following elements:
|
131
|
* - username: (string) If the URL uses authentication, supply the username.
|
132
|
* - password: (string) If the URL uses authentication, supply the password.
|
133
|
* - accept_invalid_cert: (bool) Whether to accept invalid certificates.
|
134
|
* Defaults to FALSE.
|
135
|
* - timeout: (integer) Timeout in seconds to wait for an HTTP get request to
|
136
|
* finish. Defaults to 30 seconds.
|
137
|
* - cache_http_result: (bool) Whether to cache the HTTP result. Defaults to
|
138
|
* TRUE.
|
139
|
*
|
140
|
* @return object
|
141
|
* An object that describes the data downloaded from $url.
|
142
|
*/
|
143
|
function feeds_http_request($url, array $options = array()) {
|
144
|
$options += array(
|
145
|
'username' => NULL,
|
146
|
'password' => NULL,
|
147
|
'accept_invalid_cert' => FALSE,
|
148
|
'cache_http_result' => TRUE,
|
149
|
);
|
150
|
|
151
|
// Make sure a request timeout is set.
|
152
|
if (empty($options['timeout'])) {
|
153
|
$options['timeout'] = variable_get('http_request_timeout', 30);
|
154
|
}
|
155
|
|
156
|
// Intra-pagedownload cache, avoid to download the same content twice within
|
157
|
// one page download (it's possible, compatible and parse calls).
|
158
|
$cached_urls = &drupal_static(__FUNCTION__, array());
|
159
|
if (!empty($cached_urls[$url])) {
|
160
|
$cache = http_request_get_cache($url);
|
161
|
if ($cache->data) {
|
162
|
return $cache->data;
|
163
|
}
|
164
|
}
|
165
|
|
166
|
if (!$options['username'] && valid_url($url, TRUE)) {
|
167
|
// Handle password protected feeds.
|
168
|
$url_parts = parse_url($url);
|
169
|
if (!empty($url_parts['user'])) {
|
170
|
$options['password'] = urldecode($url_parts['pass']);
|
171
|
$options['username'] = urldecode($url_parts['user']);
|
172
|
}
|
173
|
}
|
174
|
|
175
|
$curl = http_request_use_curl();
|
176
|
|
177
|
// Only download and parse data if really needs refresh.
|
178
|
// Based on "Last-Modified" and "If-Modified-Since".
|
179
|
$headers = array();
|
180
|
if ($options['cache_http_result'] && $cache = http_request_get_cache($url)) {
|
181
|
$last_result = $cache->data;
|
182
|
$last_headers = array_change_key_case($last_result->headers);
|
183
|
|
184
|
if (!empty($last_headers['etag'])) {
|
185
|
if ($curl) {
|
186
|
$headers[] = 'If-None-Match: ' . $last_headers['etag'];
|
187
|
}
|
188
|
else {
|
189
|
$headers['If-None-Match'] = $last_headers['etag'];
|
190
|
}
|
191
|
}
|
192
|
if (!empty($last_headers['last-modified'])) {
|
193
|
if ($curl) {
|
194
|
$headers[] = 'If-Modified-Since: ' . $last_headers['last-modified'];
|
195
|
}
|
196
|
else {
|
197
|
$headers['If-Modified-Since'] = $last_headers['last-modified'];
|
198
|
}
|
199
|
}
|
200
|
if (!empty($options['username']) && !$curl) {
|
201
|
$headers['Authorization'] = 'Basic ' . base64_encode($options['username'] . ':' . $options['password']);
|
202
|
}
|
203
|
}
|
204
|
|
205
|
// Support the 'feed' and 'webcal' schemes by converting them into 'http'.
|
206
|
$url = strtr($url, array('feed://' => 'http://', 'webcal://' => 'http://'));
|
207
|
|
208
|
if ($curl) {
|
209
|
$headers[] = 'User-Agent: Drupal (+http://drupal.org/)';
|
210
|
$result = new stdClass();
|
211
|
$result->headers = array();
|
212
|
|
213
|
// Parse the URL and make sure we can handle the schema.
|
214
|
// cURL can only support either http:// or https://.
|
215
|
// CURLOPT_PROTOCOLS is only supported with cURL 7.19.4.
|
216
|
$uri = parse_url($url);
|
217
|
if ($uri === FALSE) {
|
218
|
$result->error = 'unable to parse URL';
|
219
|
$result->code = FEEDS_ERROR_PARSE_ERROR;
|
220
|
}
|
221
|
elseif (!isset($uri['scheme'])) {
|
222
|
$result->error = 'missing schema';
|
223
|
$result->code = FEEDS_ERROR_NO_SCHEME;
|
224
|
}
|
225
|
else {
|
226
|
switch ($uri['scheme']) {
|
227
|
case 'http':
|
228
|
case 'https':
|
229
|
// Valid scheme.
|
230
|
break;
|
231
|
|
232
|
default:
|
233
|
$result->error = 'invalid schema ' . $uri['scheme'];
|
234
|
$result->code = FEEDS_ERROR_INVALID_SCHEME;
|
235
|
break;
|
236
|
}
|
237
|
}
|
238
|
|
239
|
// If the scheme was valid, continue to request the feed using cURL.
|
240
|
if (empty($result->error)) {
|
241
|
$download = curl_init($url);
|
242
|
curl_setopt($download, CURLOPT_FOLLOWLOCATION, TRUE);
|
243
|
if (!empty($options['username'])) {
|
244
|
curl_setopt($download, CURLOPT_USERPWD, $options['username'] . ':' . $options['password']);
|
245
|
curl_setopt($download, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
|
246
|
}
|
247
|
curl_setopt($download, CURLOPT_HTTPHEADER, $headers);
|
248
|
curl_setopt($download, CURLOPT_HEADER, TRUE);
|
249
|
curl_setopt($download, CURLOPT_RETURNTRANSFER, TRUE);
|
250
|
curl_setopt($download, CURLOPT_ENCODING, '');
|
251
|
curl_setopt($download, CURLOPT_TIMEOUT, $options['timeout']);
|
252
|
|
253
|
$proxy_server = variable_get('proxy_server');
|
254
|
|
255
|
if ($proxy_server && _drupal_http_use_proxy($uri['host'])) {
|
256
|
curl_setopt($download, CURLOPT_PROXY, $proxy_server);
|
257
|
curl_setopt($download, CURLOPT_PROXYPORT, variable_get('proxy_port', 8080));
|
258
|
|
259
|
// Proxy user/password.
|
260
|
if ($proxy_username = variable_get('proxy_username')) {
|
261
|
$username_password = $proxy_username . ':' . variable_get('proxy_password', '');
|
262
|
|
263
|
curl_setopt($download, CURLOPT_PROXYUSERPWD, $username_password);
|
264
|
curl_setopt($download, CURLOPT_PROXYAUTH, variable_get('proxy_auth_method', CURLAUTH_BASIC));
|
265
|
}
|
266
|
}
|
267
|
|
268
|
if ($options['accept_invalid_cert']) {
|
269
|
curl_setopt($download, CURLOPT_SSL_VERIFYPEER, 0);
|
270
|
curl_setopt($download, CURLOPT_SSL_VERIFYHOST, 0);
|
271
|
}
|
272
|
$header = '';
|
273
|
$result->data = curl_exec($download);
|
274
|
if (curl_error($download)) {
|
275
|
throw new HRCurlException(
|
276
|
t('cURL error (@code) @error for @url', array(
|
277
|
'@code' => curl_errno($download),
|
278
|
'@error' => curl_error($download),
|
279
|
'@url' => $url,
|
280
|
)), curl_errno($download)
|
281
|
);
|
282
|
}
|
283
|
|
284
|
// When using a proxy, remove extra data from the header which is not
|
285
|
// considered by CURLINFO_HEADER_SIZE (possibly cURL bug).
|
286
|
// This data is only added when to HTTP header when working with a proxy.
|
287
|
// Example string added: <HTTP/1.0 200 Connection established\r\n\r\n>
|
288
|
// This was fixed in libcurl version 7.30.0 (0x71e00) (April 12, 2013),
|
289
|
// so this workaround only removes the proxy-added headers if we are using
|
290
|
// an older version of libcurl.
|
291
|
$curl_ver = curl_version();
|
292
|
|
293
|
if ($proxy_server && $curl_ver['version_number'] < 0x71e00 && _drupal_http_use_proxy($uri['host'])) {
|
294
|
$http_header_break = "\r\n\r\n";
|
295
|
$response = explode($http_header_break, $result->data);
|
296
|
if (count($response) > 2) {
|
297
|
$result->data = substr($result->data, strlen($response[0] . $http_header_break), strlen($result->data));
|
298
|
}
|
299
|
}
|
300
|
|
301
|
$header_size = curl_getinfo($download, CURLINFO_HEADER_SIZE);
|
302
|
$header = substr($result->data, 0, $header_size - 1);
|
303
|
$result->data = substr($result->data, $header_size);
|
304
|
$headers = preg_split("/(\r\n){2}/", $header);
|
305
|
$header_lines = preg_split("/\r\n|\n|\r/", end($headers));
|
306
|
// Skip HTTP response status.
|
307
|
array_shift($header_lines);
|
308
|
|
309
|
while ($line = trim(array_shift($header_lines))) {
|
310
|
list($header, $value) = explode(':', $line, 2);
|
311
|
// Normalize the headers.
|
312
|
$header = strtolower($header);
|
313
|
|
314
|
if (isset($result->headers[$header]) && $header == 'set-cookie') {
|
315
|
// RFC 2109: the Set-Cookie response header comprises the token Set-
|
316
|
// Cookie:, followed by a comma-separated list of one or more cookies.
|
317
|
$result->headers[$header] .= ',' . trim($value);
|
318
|
}
|
319
|
else {
|
320
|
$result->headers[$header] = trim($value);
|
321
|
}
|
322
|
}
|
323
|
$result->code = curl_getinfo($download, CURLINFO_HTTP_CODE);
|
324
|
|
325
|
curl_close($download);
|
326
|
}
|
327
|
}
|
328
|
else {
|
329
|
$result = drupal_http_request($url, array(
|
330
|
'headers' => $headers,
|
331
|
'timeout' => $options['timeout'],
|
332
|
));
|
333
|
$result->headers = isset($result->headers) ? $result->headers : array();
|
334
|
}
|
335
|
|
336
|
$result->code = isset($result->code) ? $result->code : 200;
|
337
|
|
338
|
// In case of 304 Not Modified try to return cached data.
|
339
|
if ($result->code == 304) {
|
340
|
|
341
|
if (isset($last_result->data)) {
|
342
|
$last_result->from_cache = TRUE;
|
343
|
return $last_result;
|
344
|
}
|
345
|
else {
|
346
|
// It's a tragedy, this file must exist and contain good data.
|
347
|
// In this case, clear cache and repeat.
|
348
|
http_request_clear_cache($url);
|
349
|
return feeds_http_request($url, $options);
|
350
|
}
|
351
|
}
|
352
|
|
353
|
// Set caches if asked.
|
354
|
if ($options['cache_http_result']) {
|
355
|
http_request_set_cache($url, $result);
|
356
|
// In the static cache, mark this URL as being cached.
|
357
|
$cached_urls[$url] = TRUE;
|
358
|
}
|
359
|
|
360
|
return $result;
|
361
|
}
|
362
|
|
363
|
/**
|
364
|
* Checks the result of the HTTP Request.
|
365
|
*
|
366
|
* @param string $url
|
367
|
* The URL that was requested.
|
368
|
* @param object $result
|
369
|
* The HTTP Request result.
|
370
|
*
|
371
|
* @throws FeedsHTTPRequestException
|
372
|
* In case the result code of the HTTP request is not in the 2xx series.
|
373
|
*/
|
374
|
function http_request_check_result($url, $result) {
|
375
|
if (!in_array($result->code, array(200, 201, 202, 203, 204, 205, 206))) {
|
376
|
$vars = array(
|
377
|
'@url' => $url,
|
378
|
'@code' => $result->code,
|
379
|
'@error' => isset($result->error) ? $result->error : 'Unknown error',
|
380
|
);
|
381
|
|
382
|
switch ($result->code) {
|
383
|
case FEEDS_ERROR_PARSE_ERROR:
|
384
|
$message = t('Download of @url failed because it could not be parsed.', $vars);
|
385
|
break;
|
386
|
|
387
|
case FEEDS_ERROR_NO_SCHEME:
|
388
|
$message = t("Download of @url failed because its scheme could not be determined. The URL is expected to start with something like '@example'.", $vars + array(
|
389
|
'@example' => 'http://',
|
390
|
));
|
391
|
break;
|
392
|
|
393
|
case FEEDS_ERROR_INVALID_SCHEME:
|
394
|
$message = t('Download of @url failed because its scheme is not supported: @error. Examples of supported schemes are: @supported.', $vars + array(
|
395
|
'@supported' => implode(', ', array('http', 'https')),
|
396
|
));
|
397
|
break;
|
398
|
|
399
|
default:
|
400
|
if (isset($result->error)) {
|
401
|
$message = t('Download of @url failed with code @code and the following error: @error.', $vars);
|
402
|
}
|
403
|
else {
|
404
|
$message = t('Download of @url failed with code @code.', $vars);
|
405
|
}
|
406
|
break;
|
407
|
}
|
408
|
|
409
|
throw new FeedsHTTPRequestException($message);
|
410
|
}
|
411
|
}
|
412
|
|
413
|
/**
|
414
|
* Decides if it's possible to use cURL or not.
|
415
|
*
|
416
|
* @return bool
|
417
|
* TRUE if cURL may be used, FALSE otherwise.
|
418
|
*/
|
419
|
function http_request_use_curl() {
|
420
|
// Allow site administrators to choose to not use cURL.
|
421
|
if (variable_get('feeds_never_use_curl', FALSE)) {
|
422
|
return FALSE;
|
423
|
}
|
424
|
|
425
|
// Check that the PHP cURL extension has been enabled.
|
426
|
if (!extension_loaded('curl')) {
|
427
|
return FALSE;
|
428
|
}
|
429
|
|
430
|
// cURL below PHP 5.6.0 must not have open_basedir or safe_mode enabled.
|
431
|
if (version_compare(PHP_VERSION, '5.6.0', '<')) {
|
432
|
// phpcs:ignore PHPCompatibility.IniDirectives.RemovedIniDirectives.safe_modeDeprecatedRemoved
|
433
|
return !ini_get('safe_mode') && !ini_get('open_basedir');
|
434
|
}
|
435
|
|
436
|
// cURL in PHP 5.6.0 and above re-enables CURLOPT_FOLLOWLOCATION with
|
437
|
// open_basedir so there is no need to check for this.
|
438
|
return TRUE;
|
439
|
}
|
440
|
|
441
|
/**
|
442
|
* Clear cache for a specific URL.
|
443
|
*
|
444
|
* @param string $url
|
445
|
* The URL to clear.
|
446
|
*/
|
447
|
function http_request_clear_cache($url) {
|
448
|
cache_clear_all(hash('sha256', $url), 'cache_feeds_http');
|
449
|
}
|
450
|
|
451
|
/**
|
452
|
* Gets the cache for a specific URL.
|
453
|
*
|
454
|
* @param string $url
|
455
|
* The URL to find the cached item.
|
456
|
*
|
457
|
* @return object|false
|
458
|
* The cache or FALSE on failure.
|
459
|
*/
|
460
|
function http_request_get_cache($url) {
|
461
|
return cache_get(hash('sha256', $url), 'cache_feeds_http');
|
462
|
}
|
463
|
|
464
|
/**
|
465
|
* Sets the cache for a specific URL.
|
466
|
*
|
467
|
* @param string $url
|
468
|
* The URL to cache.
|
469
|
* @param object $result
|
470
|
* The result of the HTTP request.
|
471
|
*/
|
472
|
function http_request_set_cache($url, $result) {
|
473
|
$item = ($result instanceof FeedsHTTPCacheItem) ? $result : new FeedsHTTPCacheItem(hash('sha256', $url), $result);
|
474
|
$item->cacheSet();
|
475
|
}
|
476
|
|
477
|
/**
|
478
|
* Returns if the provided $content_type is a feed.
|
479
|
*
|
480
|
* @param string $content_type
|
481
|
* The Content-Type header.
|
482
|
*
|
483
|
* @param string $data
|
484
|
* The actual data from the http request.
|
485
|
*
|
486
|
* @return bool
|
487
|
* Returns TRUE if this is a parsable feed.
|
488
|
*/
|
489
|
function http_request_is_feed($content_type, $data) {
|
490
|
$pos = strpos($content_type, ';');
|
491
|
if ($pos !== FALSE) {
|
492
|
$content_type = substr($content_type, 0, $pos);
|
493
|
}
|
494
|
$content_type = strtolower($content_type);
|
495
|
if (strpos($content_type, 'xml') !== FALSE) {
|
496
|
return TRUE;
|
497
|
}
|
498
|
|
499
|
// @TODO: Sometimes the content-type can be text/html but still be a valid
|
500
|
// feed.
|
501
|
return FALSE;
|
502
|
}
|
503
|
|
504
|
/**
|
505
|
* Finds potential feed tags in the HTML document.
|
506
|
*
|
507
|
* @param string $html
|
508
|
* The html string to search.
|
509
|
*
|
510
|
* @return array
|
511
|
* An array of href to feeds.
|
512
|
*/
|
513
|
function http_request_find_feeds($html) {
|
514
|
$matches = array();
|
515
|
preg_match_all(HTTP_REQUEST_PCRE_LINK_TAG, $html, $matches);
|
516
|
$links = $matches[1];
|
517
|
$valid_links = array();
|
518
|
|
519
|
// Build up all the links information.
|
520
|
foreach ($links as $link_tag) {
|
521
|
$attributes = array();
|
522
|
$candidate = array();
|
523
|
|
524
|
preg_match_all(HTTP_REQUEST_PCRE_TAG_ATTRIBUTES, $link_tag, $attributes, PREG_SET_ORDER);
|
525
|
foreach ($attributes as $attribute) {
|
526
|
// Find the key value pairs, attribute[1] is key and attribute[2] is the
|
527
|
// value. However, if the link tag used single quotes, the value might
|
528
|
// be in attribute[3] instead.
|
529
|
if (empty($attribute[2])) {
|
530
|
$attribute[2] = $attribute[3];
|
531
|
}
|
532
|
if (!empty($attribute[1]) && !empty($attribute[2])) {
|
533
|
$candidate[drupal_strtolower($attribute[1])] = drupal_strtolower(decode_entities($attribute[2]));
|
534
|
}
|
535
|
}
|
536
|
|
537
|
// Examine candidate to see if it s a feed.
|
538
|
// @TODO: could/should use http_request_is_feed ??
|
539
|
if (isset($candidate['rel']) && $candidate['rel'] == 'alternate') {
|
540
|
if (isset($candidate['href']) && isset($candidate['type']) && strpos($candidate['type'], 'xml') !== FALSE) {
|
541
|
// All tests pass, its a valid candidate.
|
542
|
$valid_links[] = $candidate['href'];
|
543
|
}
|
544
|
}
|
545
|
}
|
546
|
|
547
|
return $valid_links;
|
548
|
}
|
549
|
|
550
|
/**
|
551
|
* Create an absolute url.
|
552
|
*
|
553
|
* @param string $url
|
554
|
* The href to transform.
|
555
|
* @param string $base_url
|
556
|
* The url to be used as the base for a relative $url.
|
557
|
*
|
558
|
* @return string
|
559
|
* An absolute url
|
560
|
*/
|
561
|
function http_request_create_absolute_url($url, $base_url) {
|
562
|
$url = trim($url);
|
563
|
if (valid_url($url, TRUE)) {
|
564
|
// Valid absolute url already.
|
565
|
return $url;
|
566
|
}
|
567
|
|
568
|
// Turn relative url into absolute.
|
569
|
if (valid_url($url, FALSE)) {
|
570
|
// Produces variables $scheme, $host, $user, $pass, $path, $query and
|
571
|
// $fragment.
|
572
|
$parsed_url = parse_url($base_url);
|
573
|
if ($parsed_url === FALSE) {
|
574
|
// Invalid $base_url.
|
575
|
return FALSE;
|
576
|
}
|
577
|
|
578
|
$path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
|
579
|
if (strlen($path) > 0 && substr($path, -1) != '/') {
|
580
|
// Path ends not with '/', so remove all before previous '/'.
|
581
|
$path = dirname($path);
|
582
|
}
|
583
|
|
584
|
// Adding to the existing path.
|
585
|
$cparts = array();
|
586
|
if ($url[0] == '/') {
|
587
|
$cparts = array_filter(explode("/", $url));
|
588
|
}
|
589
|
else {
|
590
|
// Backtracking from the existing path.
|
591
|
$path_cparts = array_filter(explode("/", $path));
|
592
|
$url_cparts = array_filter(explode("/", $url));
|
593
|
$cparts = array_merge($path_cparts, $url_cparts);
|
594
|
}
|
595
|
|
596
|
$remove_parts = 0;
|
597
|
// Start from behind.
|
598
|
$reverse_cparts = array_reverse($cparts);
|
599
|
foreach ($reverse_cparts as $i => &$part) {
|
600
|
if ($part == '.') {
|
601
|
$part = NULL;
|
602
|
}
|
603
|
elseif ($part == '..') {
|
604
|
$part = NULL;
|
605
|
$remove_parts++;
|
606
|
}
|
607
|
elseif ($remove_parts > 0) {
|
608
|
// If the current part isn't "..", and we had ".." before, then delete
|
609
|
// the part.
|
610
|
$part = NULL;
|
611
|
$remove_parts--;
|
612
|
}
|
613
|
}
|
614
|
$cparts = array_filter(array_reverse($reverse_cparts));
|
615
|
$path = implode("/", $cparts);
|
616
|
|
617
|
// Build the prefix to the path.
|
618
|
$absolute_url = '';
|
619
|
if (isset($parsed_url['scheme'])) {
|
620
|
$absolute_url = $parsed_url['scheme'] . '://';
|
621
|
}
|
622
|
|
623
|
if (isset($parsed_url['user'])) {
|
624
|
$absolute_url .= $parsed_url['user'];
|
625
|
if (isset($pass)) {
|
626
|
$absolute_url .= ':' . $parsed_url['pass'];
|
627
|
}
|
628
|
$absolute_url .= '@';
|
629
|
}
|
630
|
if (isset($parsed_url['host'])) {
|
631
|
$absolute_url .= $parsed_url['host'] . '/';
|
632
|
}
|
633
|
|
634
|
$absolute_url .= $path;
|
635
|
|
636
|
if (valid_url($absolute_url, TRUE)) {
|
637
|
return $absolute_url;
|
638
|
}
|
639
|
}
|
640
|
return FALSE;
|
641
|
}
|