Révision ed9a13f1
Ajouté par Assos Assos il y a presque 4 ans
drupal7/sites/all/modules/feeds/libraries/common_syndication_parser.inc | ||
---|---|---|
2 | 2 |
|
3 | 3 |
/** |
4 | 4 |
* @file |
5 |
* Downloading and parsing functions for Common Syndication Parser.
|
|
6 |
* Pillaged from FeedAPI common syndication parser.
|
|
5 |
* Downloading and parsing functions for Common Syndication Parser. |
|
6 |
* Pillaged from FeedAPI common syndication parser. |
|
7 | 7 |
* |
8 | 8 |
* @todo Restructure. OO could work wonders here. |
9 | 9 |
* @todo Write unit tests. |
... | ... | |
31 | 31 |
return FALSE; |
32 | 32 |
} |
33 | 33 |
$feed_type = _parser_common_syndication_feed_format_detect($xml); |
34 |
if ($feed_type == "atom1.0") {
|
|
34 |
if ($feed_type == "atom1.0") { |
|
35 | 35 |
return _parser_common_syndication_atom10_parse($xml); |
36 | 36 |
} |
37 | 37 |
if ($feed_type == "RSS2.0" || $feed_type == "RSS0.91" || $feed_type == "RSS0.92") { |
... | ... | |
88 | 88 |
|
89 | 89 |
$base = _parser_common_syndication_atom10_parse_base_url($feed_XML); |
90 | 90 |
|
91 |
// Detect the title |
|
91 |
// Detect the title.
|
|
92 | 92 |
$parsed_source['title'] = isset($feed_XML->title) ? _parser_common_syndication_title("{$feed_XML->title}") : ""; |
93 |
// Detect the description |
|
93 |
// Detect the description.
|
|
94 | 94 |
$parsed_source['description'] = isset($feed_XML->subtitle) ? "{$feed_XML->subtitle}" : ""; |
95 | 95 |
|
96 | 96 |
$parsed_source['link'] = _parser_common_syndication_link($feed_XML->link); |
... | ... | |
101 | 101 |
$parsed_source['items'] = array(); |
102 | 102 |
|
103 | 103 |
foreach ($feed_XML->entry as $news) { |
104 |
$georss = (array)$news->children($ns["georss"]); |
|
104 |
$georss = (array) $news->children($ns["georss"]);
|
|
105 | 105 |
$geoname = ''; |
106 | 106 |
if (isset($georss['featureName'])) { |
107 | 107 |
$geoname = "{$georss['featureName']}"; |
... | ... | |
127 | 127 |
if (isset($category['scheme'])) { |
128 | 128 |
$domain = "{$category['scheme']}"; |
129 | 129 |
if (!empty($domain)) { |
130 |
if (!isset($additional_taxonomies['ATOM Domains'][$domain])) {
|
|
131 |
$additional_taxonomies['ATOM Domains'][$domain] = array();
|
|
132 |
}
|
|
133 |
$additional_taxonomies['ATOM Domains'][$domain][] = count($additional_taxonomies['ATOM Categories']) - 1;
|
|
130 |
if (!isset($additional_taxonomies['ATOM Domains'][$domain])) { |
|
131 |
$additional_taxonomies['ATOM Domains'][$domain] = array(); |
|
132 |
} |
|
133 |
$additional_taxonomies['ATOM Domains'][$domain][] = count($additional_taxonomies['ATOM Categories']) - 1; |
|
134 | 134 |
} |
135 | 135 |
} |
136 | 136 |
$additional_taxonomies['ATOM Categories'][] = "{$category['term']}"; |
... | ... | |
141 | 141 |
|
142 | 142 |
$body = ''; |
143 | 143 |
if (!empty($news->content)) { |
144 |
foreach ($news->content->children() as $child) {
|
|
144 |
foreach ($news->content->children() as $child) { |
|
145 | 145 |
$body .= $child->asXML(); |
146 | 146 |
} |
147 | 147 |
$body .= "{$news->content}"; |
148 | 148 |
} |
149 | 149 |
elseif (!empty($news->summary)) { |
150 |
foreach ($news->summary->children() as $child) {
|
|
150 |
foreach ($news->summary->children() as $child) { |
|
151 | 151 |
$body .= $child->asXML(); |
152 | 152 |
} |
153 | 153 |
$body .= "{$news->summary}"; |
... | ... | |
175 | 175 |
$item['timestamp'] = _parser_common_syndication_parse_date("{$news->published}"); |
176 | 176 |
} |
177 | 177 |
elseif (isset($news->issued)) { |
178 |
$item['timestamp'] = _parser_common_syndication_parse_date("{$news->issued}");
|
|
178 |
$item['timestamp'] = _parser_common_syndication_parse_date("{$news->issued}"); |
|
179 | 179 |
} |
180 | 180 |
elseif (isset($news->updated)) { |
181 | 181 |
$item['timestamp'] = _parser_common_syndication_parse_date("{$news->updated}"); |
... | ... | |
340 | 340 |
'author_name' => array('dc:creator', 'dc:publisher'), |
341 | 341 |
'guid' => 'rdf:about', |
342 | 342 |
'timestamp' => 'dc:date', |
343 |
'tags' => 'dc:subject' |
|
343 |
'tags' => 'dc:subject',
|
|
344 | 344 |
)); |
345 | 345 |
|
346 | 346 |
// Special handling for the title: |
... | ... | |
357 | 357 |
// Add every found RDF property to the feed item. |
358 | 358 |
$item['rdf'] = array(); |
359 | 359 |
foreach ($rdf_data as $rdf_property => $rdf_value) { |
360 |
// looks nicer in the mapper UI
|
|
360 |
// Looks nicer in the mapper UI.
|
|
361 | 361 |
// @todo Revisit, not used with feedapi mapper anymore. |
362 | 362 |
$rdf_property = str_replace(':', '_', $rdf_property); |
363 | 363 |
$item['rdf'][$rdf_property] = $rdf_value; |
... | ... | |
369 | 369 |
return $parsed_source; |
370 | 370 |
} |
371 | 371 |
|
372 |
/** |
|
373 |
* |
|
374 |
*/ |
|
372 | 375 |
function _parser_common_syndication_RDF10_property($rdf_data, $rdf_properties = array()) { |
373 | 376 |
$rdf_properties = is_array($rdf_properties) ? $rdf_properties : array_slice(func_get_args(), 1); |
374 | 377 |
foreach ($rdf_properties as $rdf_property) { |
375 | 378 |
if ($rdf_property && !empty($rdf_data[$rdf_property])) { |
376 |
// remove empty strings
|
|
379 |
// Remove empty strings.
|
|
377 | 380 |
return array_filter($rdf_data[$rdf_property], 'strlen'); |
378 | 381 |
} |
379 | 382 |
} |
380 | 383 |
} |
381 | 384 |
|
385 |
/** |
|
386 |
* |
|
387 |
*/ |
|
382 | 388 |
function _parser_common_syndication_RDF10_item($rdf_data, $mappings) { |
383 | 389 |
foreach ($mappings as $k => $v) { |
384 | 390 |
$values = _parser_common_syndication_RDF10_property($rdf_data, $v); |
... | ... | |
394 | 400 |
|
395 | 401 |
$ns = array( |
396 | 402 |
"content" => "http://purl.org/rss/1.0/modules/content/", |
397 |
"dc" => "http://purl.org/dc/elements/1.1/",
|
|
398 |
"georss" => "http://www.georss.org/georss",
|
|
403 |
"dc" => "http://purl.org/dc/elements/1.1/", |
|
404 |
"georss" => "http://www.georss.org/georss", |
|
399 | 405 |
); |
400 | 406 |
|
401 | 407 |
$parsed_source = array(); |
... | ... | |
415 | 421 |
|
416 | 422 |
$category = $news->xpath('category'); |
417 | 423 |
// Get children for current namespace. |
418 |
$content = (array)$news->children($ns["content"]); |
|
419 |
$dc = (array)$news->children($ns["dc"]); |
|
420 |
$georss = (array)$news->children($ns["georss"]); |
|
424 |
$content = (array) $news->children($ns["content"]);
|
|
425 |
$dc = (array) $news->children($ns["dc"]);
|
|
426 |
$georss = (array) $news->children($ns["georss"]);
|
|
421 | 427 |
$news = (array) $news; |
422 | 428 |
$news['category'] = $category; |
423 | 429 |
|
... | ... | |
430 | 436 |
} |
431 | 437 |
// Some sources use content:encoded as description i.e. |
432 | 438 |
// PostNuke PageSetter module. |
433 |
if (isset($news['encoded'])) { // content:encoded for PHP < 5.1.2. |
|
439 |
// content:encoded for PHP < 5.1.2. |
|
440 |
if (isset($news['encoded'])) { |
|
434 | 441 |
if (strlen($body) < strlen("{$news['encoded']}")) { |
435 | 442 |
$body = "{$news['encoded']}"; |
436 | 443 |
} |
437 | 444 |
} |
438 |
if (isset($content['encoded'])) { // content:encoded for PHP >= 5.1.2. |
|
445 |
// content:encoded for PHP >= 5.1.2. |
|
446 |
if (isset($content['encoded'])) { |
|
439 | 447 |
if (strlen($body) < strlen("{$content['encoded']}")) { |
440 | 448 |
$body = "{$content['encoded']}"; |
441 | 449 |
} |
... | ... | |
448 | 456 |
$original_author = "{$news['author']}"; |
449 | 457 |
} |
450 | 458 |
elseif (!empty($dc["creator"])) { |
451 |
$original_author = (string)$dc["creator"]; |
|
459 |
$original_author = (string) $dc["creator"];
|
|
452 | 460 |
} |
453 | 461 |
|
454 | 462 |
if (!empty($news['link'])) { |
... | ... | |
486 | 494 |
if (isset($category['domain'])) { |
487 | 495 |
$domain = "{$category['domain']}"; |
488 | 496 |
if (!empty($domain)) { |
489 |
if (!isset($additional_taxonomies['RSS Domains'][$domain])) {
|
|
490 |
$additional_taxonomies['RSS Domains'][$domain] = array();
|
|
491 |
}
|
|
492 |
$additional_taxonomies['RSS Domains'][$domain][] = count($additional_taxonomies['RSS Categories']) - 1;
|
|
497 |
if (!isset($additional_taxonomies['RSS Domains'][$domain])) { |
|
498 |
$additional_taxonomies['RSS Domains'][$domain] = array(); |
|
499 |
} |
|
500 |
$additional_taxonomies['RSS Domains'][$domain][] = count($additional_taxonomies['RSS Categories']) - 1; |
|
493 | 501 |
} |
494 | 502 |
} |
495 | 503 |
} |
... | ... | |
587 | 595 |
list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); |
588 | 596 |
// Calculate the epoch for current date assuming GMT. |
589 | 597 |
$epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year); |
590 |
if ($match[10] != 'Z') { // Z is zulu time, aka GMT |
|
598 |
// Z is zulu time, aka GMT. |
|
599 |
if ($match[10] != 'Z') { |
|
591 | 600 |
list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]); |
592 | 601 |
// Zero out the variables. |
593 | 602 |
if (!$tz_hour) { |
... | ... | |
615 | 624 |
* original article) |
616 | 625 |
* |
617 | 626 |
* @param array $links |
618 |
* Array of SimpleXML objects |
|
627 |
* Array of SimpleXML objects.
|
|
619 | 628 |
* |
620 | 629 |
* @return string |
621 | 630 |
* An URL if found. An empty string otherwise. |
... | ... | |
637 | 646 |
} |
638 | 647 |
|
639 | 648 |
/** |
640 |
* Prepare raw data to be a title |
|
649 |
* Prepare raw data to be a title.
|
|
641 | 650 |
*/ |
642 | 651 |
function _parser_common_syndication_title($title, $body = FALSE) { |
643 | 652 |
if (empty($title) && !empty($body)) { |
Formats disponibles : Unified diff
Weekly update of contrib modules