1
|
<?php
|
2
|
|
3
|
/**
|
4
|
* @file
|
5
|
* Contains FeedsSitemapParser and related classes.
|
6
|
*/
|
7
|
|
8
|
/**
|
9
|
* A parser for the Sitemap specification http://www.sitemaps.org/protocol.php
|
10
|
*/
|
11
|
class FeedsSitemapParser extends FeedsParser {
|
12
|
/**
|
13
|
* Implements FeedsParser::parse().
|
14
|
*/
|
15
|
public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
|
16
|
// Set time zone to GMT for parsing dates with strtotime().
|
17
|
$tz = date_default_timezone_get();
|
18
|
date_default_timezone_set('GMT');
|
19
|
// Yes, using a DOM parser is a bit inefficient, but will do for now
|
20
|
$xml = new SimpleXMLElement($fetcher_result->getRaw());
|
21
|
$result = new FeedsParserResult();
|
22
|
foreach ($xml->url as $url) {
|
23
|
$item = array('url' => (string) $url->loc);
|
24
|
if ($url->lastmod) {
|
25
|
$item['lastmod'] = strtotime($url->lastmod);
|
26
|
}
|
27
|
if ($url->changefreq) {
|
28
|
$item['changefreq'] = (string) $url->changefreq;
|
29
|
}
|
30
|
if ($url->priority) {
|
31
|
$item['priority'] = (string) $url->priority;
|
32
|
}
|
33
|
$result->items[] = $item;
|
34
|
}
|
35
|
date_default_timezone_set($tz);
|
36
|
return $result;
|
37
|
}
|
38
|
|
39
|
/**
|
40
|
* Implements FeedsParser::getMappingSources().
|
41
|
*/
|
42
|
public function getMappingSources() {
|
43
|
return array(
|
44
|
'url' => array(
|
45
|
'name' => t('Item URL (link)'),
|
46
|
'description' => t('URL of the feed item.'),
|
47
|
),
|
48
|
'lastmod' => array(
|
49
|
'name' => t('Last modification date'),
|
50
|
'description' => t('Last modified date as UNIX time GMT of the feed item.'),
|
51
|
),
|
52
|
'changefreq' => array(
|
53
|
'name' => t('Change frequency'),
|
54
|
'description' => t('How frequently the page is likely to change.'),
|
55
|
),
|
56
|
'priority' => array(
|
57
|
'name' => t('Priority'),
|
58
|
'description' => t('The priority of this URL relative to other URLs on the site.'),
|
59
|
),
|
60
|
) + parent::getMappingSources();
|
61
|
}
|
62
|
}
|