Projet

Général

Profil

Paste
Télécharger (20,4 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / modules / feeds / libraries / common_syndication_parser.inc @ ed9a13f1

1
<?php
2

    
3
/**
4
 * @file
5
 * Downloading and parsing functions for Common Syndication Parser.
6
 * Pillaged from FeedAPI common syndication parser.
7
 *
8
 * @todo Restructure. OO could work wonders here.
9
 * @todo Write unit tests.
10
 * @todo Keep in Feeds project or host on Drupal?
11
 */
12

    
13
/**
14
 * Parse the feed into a data structure.
15
 *
16
 * @param string $string
17
 *   The feed object (contains the URL or the parsed XML structure).
18
 *
19
 * @return array|false
20
 *   The structured datas extracted from the feed or FALSE in case of failures.
21
 */
22
function common_syndication_parser_parse($string) {
23
  // SimpleXML can only deal with XML declaration at the start of the document,
24
  // so remove any surrounding whitespace.
25
  $string = trim($string);
26

    
27
  @ $xml = simplexml_load_string($string, NULL, LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NOCDATA);
28

    
29
  // Got a malformed XML.
30
  if ($xml === FALSE || is_null($xml)) {
31
    return FALSE;
32
  }
33
  $feed_type = _parser_common_syndication_feed_format_detect($xml);
34
  if ($feed_type == "atom1.0") {
35
    return _parser_common_syndication_atom10_parse($xml);
36
  }
37
  if ($feed_type == "RSS2.0" || $feed_type == "RSS0.91" || $feed_type == "RSS0.92") {
38
    return _parser_common_syndication_RSS20_parse($xml);
39
  }
40
  if ($feed_type == "RDF") {
41
    return _parser_common_syndication_RDF10_parse($xml);
42
  }
43
  return FALSE;
44
}
45

    
46
/**
47
 * Determine the feed format of a SimpleXML parsed object structure.
48
 *
49
 * @param SimpleXMLElement $xml
50
 *   SimpleXML-preprocessed feed.
51
 *
52
 * @return string|false
53
 *   The feed format short description or FALSE if not compatible.
54
 */
55
function _parser_common_syndication_feed_format_detect($xml) {
56
  if (!is_object($xml)) {
57
    return FALSE;
58
  }
59
  $attr = $xml->attributes();
60
  $type = strtolower($xml->getName());
61
  if (isset($xml->entry) && $type == "feed") {
62
    return "atom1.0";
63
  }
64
  if ($type == "rss" && $attr["version"] == "2.0") {
65
    return "RSS2.0";
66
  }
67
  if ($type == "rdf" && isset($xml->channel)) {
68
    return "RDF";
69
  }
70
  if ($type == "rss" && $attr["version"] == "0.91") {
71
    return "RSS0.91";
72
  }
73
  if ($type == "rss" && $attr["version"] == "0.92") {
74
    return "RSS0.92";
75
  }
76
  return FALSE;
77
}
78

    
79
/**
80
 * Parse atom feeds.
81
 */
82
function _parser_common_syndication_atom10_parse($feed_XML) {
83
  $parsed_source = array();
84

    
85
  $ns = array(
86
    "georss" => "http://www.georss.org/georss",
87
  );
88

    
89
  $base = _parser_common_syndication_atom10_parse_base_url($feed_XML);
90

    
91
  // Detect the title.
92
  $parsed_source['title'] = isset($feed_XML->title) ? _parser_common_syndication_title("{$feed_XML->title}") : "";
93
  // Detect the description.
94
  $parsed_source['description'] = isset($feed_XML->subtitle) ? "{$feed_XML->subtitle}" : "";
95

    
96
  $parsed_source['link'] = _parser_common_syndication_link($feed_XML->link);
97
  if ($base && !valid_url($parsed_source['link'], TRUE) && valid_url($parsed_source['link'])) {
98
    $parsed_source['link'] = $base . $parsed_source['link'];
99
  }
100

    
101
  $parsed_source['items'] = array();
102

    
103
  foreach ($feed_XML->entry as $news) {
104
    $georss = (array) $news->children($ns["georss"]);
105
    $geoname = '';
106
    if (isset($georss['featureName'])) {
107
      $geoname = "{$georss['featureName']}";
108
    }
109

    
110
    $latlon =
111
    $lat =
112
    $lon = NULL;
113
    if (isset($georss['point'])) {
114
      $latlon = explode(' ', $georss['point']);
115
      $lat = "{$latlon[0]}";
116
      $lon = "{$latlon[1]}";
117
      if (!$geoname) {
118
        $geoname = "{$lat} {$lon}";
119
      }
120
    }
121

    
122
    $additional_taxonomies = array();
123
    if (isset($news->category)) {
124
      $additional_taxonomies['ATOM Categories'] = array();
125
      $additional_taxonomies['ATOM Domains'] = array();
126
      foreach ($news->category as $category) {
127
        if (isset($category['scheme'])) {
128
          $domain = "{$category['scheme']}";
129
          if (!empty($domain)) {
130
            if (!isset($additional_taxonomies['ATOM Domains'][$domain])) {
131
              $additional_taxonomies['ATOM Domains'][$domain] = array();
132
            }
133
            $additional_taxonomies['ATOM Domains'][$domain][] = count($additional_taxonomies['ATOM Categories']) - 1;
134
          }
135
        }
136
        $additional_taxonomies['ATOM Categories'][] = "{$category['term']}";
137
      }
138
    }
139

    
140
    $title = "{$news->title}";
141

    
142
    $body = '';
143
    if (!empty($news->content)) {
144
      foreach ($news->content->children() as $child) {
145
        $body .= $child->asXML();
146
      }
147
      $body .= "{$news->content}";
148
    }
149
    elseif (!empty($news->summary)) {
150
      foreach ($news->summary->children() as $child) {
151
        $body .= $child->asXML();
152
      }
153
      $body .= "{$news->summary}";
154
    }
155

    
156
    $original_author = '';
157
    if (!empty($news->source->author->name)) {
158
      $original_author = "{$news->source->author->name}";
159
    }
160
    elseif (!empty($news->author->name)) {
161
      $original_author = "{$news->author->name}";
162
    }
163
    elseif (!empty($feed_XML->author->name)) {
164
      $original_author = "{$feed_XML->author->name}";
165
    }
166

    
167
    $item = array();
168
    $item['title'] = _parser_common_syndication_title($title, $body);
169
    $item['description'] = $body;
170
    $item['author_name'] = $original_author;
171

    
172
    // Fall back to updated for timestamp if both published and issued are
173
    // empty.
174
    if (isset($news->published)) {
175
      $item['timestamp'] = _parser_common_syndication_parse_date("{$news->published}");
176
    }
177
    elseif (isset($news->issued)) {
178
      $item['timestamp'] = _parser_common_syndication_parse_date("{$news->issued}");
179
    }
180
    elseif (isset($news->updated)) {
181
      $item['timestamp'] = _parser_common_syndication_parse_date("{$news->updated}");
182
    }
183

    
184
    $item['guid'] = (string) $news->id;
185

    
186
    $item['url'] = _parser_common_syndication_link($news->link);
187

    
188
    if (!$item['url'] && !empty($news->content['src']) && valid_url($news->content['src'], TRUE)) {
189
      $item['url'] = (string) $news->content['src'];
190
    }
191

    
192
    if (!strlen($item['url']) && $item['guid'] && valid_url($item['guid'], TRUE)) {
193
      $item['url'] = $item['guid'];
194
    }
195

    
196
    if (!valid_url($item['url'], TRUE) && valid_url($item['url'])) {
197
      if ($item_base = _parser_common_syndication_atom10_parse_base_url($news)) {
198
        $item['url'] = $item_base . $item['url'];
199
      }
200
      elseif ($base) {
201
        $item['url'] = $base . $item['url'];
202
      }
203
    }
204

    
205
    // Fall back on URL if GUID is empty.
206
    if (!strlen($item['guid'])) {
207
      $item['guid'] = $item['url'];
208
    }
209

    
210
    $item['geolocations'] = array();
211
    if ($lat && $lon) {
212
      $item['geolocations'] = array(
213
        array(
214
          'name' => $geoname,
215
          'lat' => $lat,
216
          'lon' => $lon,
217
        ),
218
      );
219
    }
220
    $item['tags'] = isset($additional_taxonomies['ATOM Categories']) ? $additional_taxonomies['ATOM Categories'] : array();
221
    $item['domains'] = isset($additional_taxonomies['ATOM Domains']) ? $additional_taxonomies['ATOM Domains'] : array();
222
    $parsed_source['items'][] = $item;
223
  }
224

    
225
  return $parsed_source;
226
}
227

    
228
/**
229
 * Finds the base URL of an Atom document.
230
 *
231
 * @param SimpleXMLElement $xml
232
 *   The XML document.
233
 *
234
 * @return string|false
235
 *   Returns the base URL or false on failure.
236
 */
237
function _parser_common_syndication_atom10_parse_base_url(SimpleXMLElement $xml) {
238
  $base = $xml->attributes('xml', TRUE)->base;
239
  if (!$base) {
240
    $base = $xml['base'];
241
  }
242

    
243
  if ($base && valid_url($base, TRUE)) {
244
    return rtrim($base, '/') . '/';
245
  }
246

    
247
  // Try to build a base from the self link.
248
  foreach ($xml->xpath('*[local-name() = "link" and @rel="self" and @href]') as $self) {
249
    if (valid_url($self['href'], TRUE)) {
250
      return _parser_common_syndication_string_url_path((string) $self['href']);
251
    }
252
  }
253

    
254
  // Try to build a base from the alternate link.
255
  foreach ($xml->xpath('*[local-name() = "link" and @rel="alternate" and @href]') as $alternate) {
256
    if (valid_url($alternate['href'], TRUE)) {
257
      return _parser_common_syndication_string_url_path((string) $alternate['href']);
258
    }
259
  }
260

    
261
  return FALSE;
262
}
263

    
264
/**
265
 * Removes the path parts of an absolute URL.
266
 *
267
 * @param string $url
268
 *   The absolute URL.
269
 *
270
 * @return string
271
 *   The absolute URL with the path stripped.
272
 */
273
function _parser_common_syndication_string_url_path($url) {
274
  $pos = strpos($url, '/', strpos($url, '//') + 2);
275

    
276
  return $pos ? substr($url, 0, $pos + 1) : $url . '/';
277
}
278

    
279
/**
280
 * Parse RDF Site Summary (RSS) 1.0 feeds in RDF/XML format.
281
 *
282
 * @see http://web.resource.org/rss/1.0/
283
 */
284
function _parser_common_syndication_RDF10_parse($feed_XML) {
285
  // Declare some canonical standard prefixes for well-known namespaces:
286
  static $canonical_namespaces = array(
287
    'rdf'      => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
288
    'rdfs'     => 'http://www.w3.org/2000/01/rdf-schema#',
289
    'xsi'      => 'http://www.w3.org/2001/XMLSchema-instance#',
290
    'xsd'      => 'http://www.w3.org/2001/XMLSchema#',
291
    'owl'      => 'http://www.w3.org/2002/07/owl#',
292
    'dc'       => 'http://purl.org/dc/elements/1.1/',
293
    'dcterms'  => 'http://purl.org/dc/terms/',
294
    'dcmitype' => 'http://purl.org/dc/dcmitype/',
295
    'foaf'     => 'http://xmlns.com/foaf/0.1/',
296
    'rss'      => 'http://purl.org/rss/1.0/',
297
  );
298

    
299
  // Get all namespaces declared in the feed element.
300
  $namespaces = $feed_XML->getNamespaces(TRUE);
301

    
302
  // Process the <rss:channel> resource containing feed metadata:
303
  foreach ($feed_XML->children($canonical_namespaces['rss'])->channel as $rss_channel) {
304
    $parsed_source = array(
305
      'title'       => _parser_common_syndication_title((string) $rss_channel->title),
306
      'description' => (string) $rss_channel->description,
307
      'link'        => (string) $rss_channel->link,
308
      'items'       => array(),
309
    );
310
    break;
311
  }
312

    
313
  // Process each <rss:item> resource contained in the feed:
314
  foreach ($feed_XML->children($canonical_namespaces['rss'])->item as $rss_item) {
315

    
316
    // Extract all available RDF statements from the feed item's RDF/XML
317
    // tags, allowing for both the item's attributes and child elements to
318
    // contain RDF properties:
319
    $rdf_data = array();
320
    foreach ($namespaces as $ns => $ns_uri) {
321
      // Note that we attempt to normalize the found property name
322
      // namespaces to well-known 'standard' prefixes where possible, as the
323
      // feed may in principle use any arbitrary prefixes and we should
324
      // still be able to correctly handle it.
325
      foreach ($rss_item->attributes($ns_uri) as $attr_name => $attr_value) {
326
        $ns_prefix = ($ns_prefix = array_search($ns_uri, $canonical_namespaces)) ? $ns_prefix : $ns;
327
        $rdf_data[$ns_prefix . ':' . $attr_name][] = (string) $attr_value;
328
      }
329
      foreach ($rss_item->children($ns_uri) as $rss_property) {
330
        $ns_prefix = ($ns_prefix = array_search($ns_uri, $canonical_namespaces)) ? $ns_prefix : $ns;
331
        $rdf_data[$ns_prefix . ':' . $rss_property->getName()][] = (string) $rss_property;
332
      }
333
    }
334

    
335
    // Declaratively define mappings that determine how to construct the result object.
336
    $item = _parser_common_syndication_RDF10_item($rdf_data, array(
337
      'title'       => array('rss:title', 'dc:title'),
338
      'description' => array('rss:description', 'dc:description', 'content:encoded'),
339
      'url'         => array('rss:link', 'rdf:about'),
340
      'author_name' => array('dc:creator', 'dc:publisher'),
341
      'guid'        => 'rdf:about',
342
      'timestamp'   => 'dc:date',
343
      'tags'        => 'dc:subject',
344
    ));
345

    
346
    // Special handling for the title:
347
    $item['title'] = _parser_common_syndication_title($item['title'], $item['description']);
348

    
349
    // Parse any date/time values into Unix timestamps:
350
    $item['timestamp'] = _parser_common_syndication_parse_date($item['timestamp']);
351

    
352
    // If no GUID found, use the URL of the feed.
353
    if (empty($item['guid'])) {
354
      $item['guid'] = $item['url'];
355
    }
356

    
357
    // Add every found RDF property to the feed item.
358
    $item['rdf'] = array();
359
    foreach ($rdf_data as $rdf_property => $rdf_value) {
360
      // Looks nicer in the mapper UI.
361
      // @todo Revisit, not used with feedapi mapper anymore.
362
      $rdf_property = str_replace(':', '_', $rdf_property);
363
      $item['rdf'][$rdf_property] = $rdf_value;
364
    }
365

    
366
    $parsed_source['items'][] = $item;
367
  }
368

    
369
  return $parsed_source;
370
}
371

    
372
/**
373
 *
374
 */
375
function _parser_common_syndication_RDF10_property($rdf_data, $rdf_properties = array()) {
376
  $rdf_properties = is_array($rdf_properties) ? $rdf_properties : array_slice(func_get_args(), 1);
377
  foreach ($rdf_properties as $rdf_property) {
378
    if ($rdf_property && !empty($rdf_data[$rdf_property])) {
379
      // Remove empty strings.
380
      return array_filter($rdf_data[$rdf_property], 'strlen');
381
    }
382
  }
383
}
384

    
385
/**
386
 *
387
 */
388
function _parser_common_syndication_RDF10_item($rdf_data, $mappings) {
389
  foreach ($mappings as $k => $v) {
390
    $values = _parser_common_syndication_RDF10_property($rdf_data, $v);
391
    $mappings[$k] = !is_array($values) || count($values) > 1 ? $values : reset($values);
392
  }
393
  return $mappings;
394
}
395

    
396
/**
397
 * Parse RSS2.0 feeds.
398
 */
399
function _parser_common_syndication_RSS20_parse($feed_XML) {
400

    
401
  $ns = array(
402
    "content" => "http://purl.org/rss/1.0/modules/content/",
403
    "dc" => "http://purl.org/dc/elements/1.1/",
404
    "georss" => "http://www.georss.org/georss",
405
  );
406

    
407
  $parsed_source = array();
408
  // Detect the title.
409
  $parsed_source['title'] = isset($feed_XML->channel->title) ? _parser_common_syndication_title("{$feed_XML->channel->title}") : "";
410
  // Detect the description.
411
  $parsed_source['description'] = isset($feed_XML->channel->description) ? "{$feed_XML->channel->description}" : "";
412
  // Detect the link.
413
  $parsed_source['link'] = isset($feed_XML->channel->link) ? "{$feed_XML->channel->link}" : "";
414
  $parsed_source['items'] = array();
415

    
416
  foreach ($feed_XML->xpath('//item') as $news) {
417
    $title = $body = $original_author = $original_url = $guid = '';
418

    
419
    // Get optional source url.
420
    $source_url = (string) $news->source['url'];
421

    
422
    $category = $news->xpath('category');
423
    // Get children for current namespace.
424
    $content = (array) $news->children($ns["content"]);
425
    $dc      = (array) $news->children($ns["dc"]);
426
    $georss  = (array) $news->children($ns["georss"]);
427
    $news = (array) $news;
428
    $news['category'] = $category;
429

    
430
    if (isset($news['title'])) {
431
      $title = "{$news['title']}";
432
    }
433

    
434
    if (isset($news['description'])) {
435
      $body = "{$news['description']}";
436
    }
437
    // Some sources use content:encoded as description i.e.
438
    // PostNuke PageSetter module.
439
    // content:encoded for PHP < 5.1.2.
440
    if (isset($news['encoded'])) {
441
      if (strlen($body) < strlen("{$news['encoded']}")) {
442
        $body = "{$news['encoded']}";
443
      }
444
    }
445
    // content:encoded for PHP >= 5.1.2.
446
    if (isset($content['encoded'])) {
447
      if (strlen($body) < strlen("{$content['encoded']}")) {
448
        $body = "{$content['encoded']}";
449
      }
450
    }
451
    if (!isset($body)) {
452
      $body = "{$news['title']}";
453
    }
454

    
455
    if (!empty($news['author'])) {
456
      $original_author = "{$news['author']}";
457
    }
458
    elseif (!empty($dc["creator"])) {
459
      $original_author = (string) $dc["creator"];
460
    }
461

    
462
    if (!empty($news['link'])) {
463
      $original_url = "{$news['link']}";
464
      $guid = $original_url;
465
    }
466

    
467
    if (!empty($news['guid'])) {
468
      $guid = "{$news['guid']}";
469
    }
470

    
471
    if (!empty($georss['featureName'])) {
472
      $geoname = "{$georss['featureName']}";
473
    }
474

    
475
    $lat =
476
    $lon =
477
    $latlon =
478
    $geoname = NULL;
479
    if (!empty($georss['point'])) {
480
      $latlon = explode(' ', $georss['point']);
481
      $lat = "{$latlon[0]}";
482
      $lon = "{$latlon[1]}";
483
      if (!$geoname) {
484
        $geoname = "$lat $lon";
485
      }
486
    }
487

    
488
    $additional_taxonomies = array();
489
    $additional_taxonomies['RSS Categories'] = array();
490
    $additional_taxonomies['RSS Domains'] = array();
491
    if (isset($news['category'])) {
492
      foreach ($news['category'] as $category) {
493
        $additional_taxonomies['RSS Categories'][] = "{$category}";
494
        if (isset($category['domain'])) {
495
          $domain = "{$category['domain']}";
496
          if (!empty($domain)) {
497
            if (!isset($additional_taxonomies['RSS Domains'][$domain])) {
498
              $additional_taxonomies['RSS Domains'][$domain] = array();
499
            }
500
            $additional_taxonomies['RSS Domains'][$domain][] = count($additional_taxonomies['RSS Categories']) - 1;
501
          }
502
        }
503
      }
504
    }
505

    
506
    $item = array();
507
    $item['title'] = _parser_common_syndication_title($title, $body);
508
    $item['description'] = $body;
509
    $item['author_name'] = $original_author;
510
    if (!empty($news['pubDate'])) {
511
      $item['timestamp'] = _parser_common_syndication_parse_date($news['pubDate']);
512
    }
513
    elseif (!empty($dc['date'])) {
514
      $item['timestamp'] = _parser_common_syndication_parse_date($dc['date']);
515
    }
516
    else {
517
      $item['timestamp'] = time();
518
    }
519
    $item['url'] = trim($original_url);
520
    $item['guid'] = $guid;
521
    if (!empty($news['source'])) {
522
      $item['source:title'] = $news['source'];
523
    }
524
    else {
525
      $item['source:title'] = NULL;
526
    }
527
    $item['source:url'] = trim($source_url);
528

    
529
    $item['geolocations'] = array();
530
    if (isset($geoname, $lat, $lon)) {
531
      $item['geolocations'] = array(
532
        array(
533
          'name' => $geoname,
534
          'lat' => $lat,
535
          'lon' => $lon,
536
        ),
537
      );
538
    }
539

    
540
    $item['domains'] = $additional_taxonomies['RSS Domains'];
541
    $item['tags'] = $additional_taxonomies['RSS Categories'];
542
    $parsed_source['items'][] = $item;
543
  }
544
  return $parsed_source;
545
}
546

    
547
/**
548
 * Parse a date comes from a feed.
549
 *
550
 * @param string $date_str
551
 *   The date string in various formats.
552
 *
553
 * @return int
554
 *   The timestamp of the string or the current time if can't be parsed.
555
 */
556
function _parser_common_syndication_parse_date($date_str) {
557
  // PHP < 5.3 doesn't like the GMT- notation for parsing timezones.
558
  $date_str = str_replace('GMT-', '-', $date_str);
559
  $date_str = str_replace('GMT+', '+', $date_str);
560
  $parsed_date = strtotime($date_str);
561

    
562
  if ($parsed_date === FALSE || $parsed_date == -1) {
563
    $parsed_date = _parser_common_syndication_parse_w3cdtf($date_str);
564
  }
565

    
566
  if (($parsed_date === FALSE || $parsed_date == -1)) {
567
    // PHP does not support the UT timezone. Fake it. The system that generated
568
    // this, Google Groups, probably meant UTC.
569
    $date_str = strtolower(trim($date_str));
570
    $last_three = substr($date_str, strlen($date_str) - 3, 3);
571

    
572
    if ($last_three == ' ut') {
573
      $parsed_date = strtotime($date_str . 'c');
574
    }
575
  }
576

    
577
  return $parsed_date === FALSE ? time() : $parsed_date;
578
}
579

    
580
/**
581
 * Parse the W3C date/time format, a subset of ISO 8601.
582
 *
583
 * PHP date parsing functions do not handle this format.
584
 * See http://www.w3.org/TR/NOTE-datetime for more information.
585
 * Originally from MagpieRSS (http://magpierss.sourceforge.net/).
586
 *
587
 * @param string $date_str
588
 *   A potentially W3C DTF date.
589
 *
590
 * @return int|false
591
 *   A timestamp if parsed successfully or FALSE if not.
592
 */
593
function _parser_common_syndication_parse_w3cdtf($date_str) {
594
  if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) {
595
    list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
596
    // Calculate the epoch for current date assuming GMT.
597
    $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
598
    // Z is zulu time, aka GMT.
599
    if ($match[10] != 'Z') {
600
      list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]);
601
      // Zero out the variables.
602
      if (!$tz_hour) {
603
        $tz_hour = 0;
604
      }
605
      if (!$tz_min) {
606
        $tz_min = 0;
607
      }
608
      $offset_secs = (($tz_hour * 60) + $tz_min) * 60;
609
      // Is timezone ahead of GMT?  If yes, subtract offset.
610
      if ($tz_mod == '+') {
611
        $offset_secs *= -1;
612
      }
613
      $epoch += $offset_secs;
614
    }
615
    return $epoch;
616
  }
617
  else {
618
    return FALSE;
619
  }
620
}
621

    
622
/**
623
 * Extract the link that points to the original content (back to site or
624
 * original article)
625
 *
626
 * @param array $links
627
 *   Array of SimpleXML objects.
628
 *
629
 * @return string
630
 *   An URL if found. An empty string otherwise.
631
 */
632
function _parser_common_syndication_link($links) {
633
  $to_link = '';
634
  if (count($links) > 0) {
635
    foreach ($links as $link) {
636
      $link = $link->attributes();
637
      $to_link = isset($link["href"]) ? "{$link["href"]}" : "";
638
      if (isset($link["rel"])) {
639
        if ("{$link["rel"]}" == 'alternate') {
640
          break;
641
        }
642
      }
643
    }
644
  }
645
  return trim($to_link);
646
}
647

    
648
/**
649
 * Prepare raw data to be a title.
650
 */
651
function _parser_common_syndication_title($title, $body = FALSE) {
652
  if (empty($title) && !empty($body)) {
653
    // Explode to words and use the first 3 words.
654
    $words = preg_split('/[\s,]+/', strip_tags($body));
655
    $title = implode(' ', array_slice($words, 0, 3));
656
  }
657
  return $title;
658
}