/drupal7/modules/aggregator/aggregator.parser.inc - Annoter - Club Drupal - Forge Centrale Marseille

85ad3d82

Assos Assos

<?php

2

3

/**

4

 * @file

5

 * Parser functions for the aggregator module.

6

*/

7

8

/**

9

 * Implements hook_aggregator_parse_info().

10

*/

11

function aggregator_aggregator_parse_info() {

12

  return array(

13

    'title' => t('Default parser'),

14

    'description' => t('Parses RSS, Atom and RDF feeds.'),

15

);

16

17

18

/**

19

 * Implements hook_aggregator_parse().

20

*/

21

function aggregator_aggregator_parse($feed) {

22

  global $channel, $image;

23

24

  // Filter the input data.

25

  if (aggregator_parse_feed($feed->source_string, $feed)) {

26

    $modified = empty($feed->http_headers['last-modified']) ? 0 : strtotime($feed->http_headers['last-modified']);

27

28

    // Prepare the channel data.

29

    foreach ($channel as $key => $value) {

30

      $channel[$key] = trim($value);

31

32

33

    // Prepare the image data (if any).

34

    foreach ($image as $key => $value) {

35

      $image[$key] = trim($value);

36

37

38

    $etag = empty($feed->http_headers['etag']) ? '' : $feed->http_headers['etag'];

39

40

    // Add parsed data to the feed object.

41

    $feed->link = !empty($channel['link']) ? $channel['link'] : '';

42

    $feed->description = !empty($channel['description']) ? $channel['description'] : '';

43

    $feed->image = !empty($image['url']) ? $image['url'] : '';

44

    $feed->etag = $etag;

45

    $feed->modified = $modified;

46

47

    // Clear the cache.

48

    cache_clear_all();

49

50

    return TRUE;

51

52

53

  return FALSE;

54

55

56

/**

57

 * Parses a feed and stores its items.

58

59

 * @param $data

60

 *   The feed data.

61

 * @param $feed

62

 *   An object describing the feed to be parsed.

63

64

 * @return

65

 *   FALSE on error, TRUE otherwise.

66

*/

67

function aggregator_parse_feed(&$data, $feed) {

68

  global $items, $image, $channel;

69

70

  // Unset the global variables before we use them.

71

  unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']);

72

  $items = array();

73

  $image = array();

74

  $channel = array();

75

76

  // Parse the data.

77

  $xml_parser = drupal_xml_parser_create($data);

78

  xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end');

79

  xml_set_character_data_handler($xml_parser, 'aggregator_element_data');

80

81

  if (!xml_parse($xml_parser, $data, 1)) {

82

    watchdog('aggregator', 'The feed from %site seems to be broken, due to an error "%error" on line %line.', array('%site' => $feed->title, '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser)), WATCHDOG_WARNING);

83

    drupal_set_message(t('The feed from %site seems to be broken, because of error "%error" on line %line.', array('%site' => $feed->title, '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))), 'error');

84

    return FALSE;

85

86

  xml_parser_free($xml_parser);

87

88

  // We reverse the array such that we store the first item last, and the last

89

  // item first. In the database, the newest item should be at the top.

90

  $items = array_reverse($items);

91

92

  // Initialize items array.

93

  $feed->items = array();

94

  foreach ($items as $item) {

95

96

    // Prepare the item:

97

    foreach ($item as $key => $value) {

98

      $item[$key] = trim($value);

99

100

101

    // Resolve the item's title. If no title is found, we use up to 40

102

    // characters of the description ending at a word boundary, but not

103

    // splitting potential entities.

104

    if (!empty($item['title'])) {

105

      $item['title'] = $item['title'];

106

107

    elseif (!empty($item['description'])) {

108

      $item['title'] = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($item['description'], 40));

109

110

    else {

111

      $item['title'] = '';

112

113

114

    // Resolve the items link.

115

    if (!empty($item['link'])) {

116

      $item['link'] = $item['link'];

117

118

    else {

119

      $item['link'] = $feed->link;

120

121

122

    // Atom feeds have an ID tag instead of a GUID tag.

123

    if (!isset($item['guid'])) {

124

      $item['guid'] = isset($item['id']) ? $item['id'] : '';

125

126

127

    // Atom feeds have a content and/or summary tag instead of a description tag.

128

    if (!empty($item['content:encoded'])) {

129

      $item['description'] = $item['content:encoded'];

130

131

    elseif (!empty($item['summary'])) {

132

      $item['description'] = $item['summary'];

133

134

    elseif (!empty($item['content'])) {

135

      $item['description'] = $item['content'];

136

137

138

    // Try to resolve and parse the item's publication date.

139

    $date = '';

140

    foreach (array('pubdate', 'dc:date', 'dcterms:issued', 'dcterms:created', 'dcterms:modified', 'issued', 'created', 'modified', 'published', 'updated') as $key) {

141

      if (!empty($item[$key])) {

142

        $date = $item[$key];

143

        break;

    $item['timestamp'] = strtotime($date);

148

149

    if ($item['timestamp'] === FALSE) {

150

      $item['timestamp'] = aggregator_parse_w3cdtf($date); // Aggregator_parse_w3cdtf() returns FALSE on failure.

151

152

153

    // Resolve dc:creator tag as the item author if author tag is not set.

154

    if (empty($item['author']) && !empty($item['dc:creator'])) {

155

      $item['author'] = $item['dc:creator'];

156

157

158

    $item += array('author' => '', 'description' => '');

159

160

    // Store on $feed object. This is where processors will look for parsed items.

161

    $feed->items[] = $item;

162

163

164

  return TRUE;

165

166

167

/**

168

 * Performs an action when an opening tag is encountered.

169

170

 * Callback function used by xml_parse() within aggregator_parse_feed().

171

*/

172

function aggregator_element_start($parser, $name, $attributes) {

173

  global $item, $element, $tag, $items, $channel;

174

175

  $name = strtolower($name);

176

  switch ($name) {

177

    case 'image':

178

    case 'textinput':

179

    case 'summary':

180

    case 'tagline':

181

    case 'subtitle':

182

    case 'logo':

183

    case 'info':

184

      $element = $name;

185

      break;

186

    case 'id':

187

    case 'content':

188

      if ($element != 'item') {

189

        $element = $name;

190

191

    case 'link':

192

      // According to RFC 4287, link elements in Atom feeds without a 'rel'

193

      // attribute should be interpreted as though the relation type is

194

      // "alternate".

195

      if (!empty($attributes['HREF']) && (empty($attributes['REL']) || $attributes['REL'] == 'alternate')) {

196

        if ($element == 'item') {

197

          $items[$item]['link'] = $attributes['HREF'];

198

199

        else {

200

          $channel['link'] = $attributes['HREF'];

201

202

203

      break;

204

    case 'item':

205

      $element = $name;

206

      $item += 1;

207

      break;

208

    case 'entry':

209

      $element = 'item';

210

      $item += 1;

211

      break;

212

213

214

  $tag = $name;

215

216

217

/**

218

 * Performs an action when a closing tag is encountered.

219

220

 * Callback function used by xml_parse() within aggregator_parse_feed().

221

*/

222

function aggregator_element_end($parser, $name) {

223

  global $element;

224

225

  switch ($name) {

226

    case 'image':

227

    case 'textinput':

228

    case 'item':

229

    case 'entry':

230

    case 'info':

231

      $element = '';

232

      break;

233

    case 'id':

234

    case 'content':

235

      if ($element == $name) {

236

        $element = '';

/**

242

 * Performs an action when data is encountered.

243

244

 * Callback function used by xml_parse() within aggregator_parse_feed().

245

*/

246

function aggregator_element_data($parser, $data) {

247

  global $channel, $element, $items, $item, $image, $tag;

248

  $items += array($item => array());

249

  switch ($element) {

250

    case 'item':

251

      $items[$item] += array($tag => '');

252

      $items[$item][$tag] .= $data;

253

      break;

254

    case 'image':

255

    case 'logo':

256

      $image += array($tag => '');

257

      $image[$tag] .= $data;

258

      break;

259

    case 'link':

260

      if ($data) {

261

        $items[$item] += array($tag => '');

262

        $items[$item][$tag] .= $data;

263

264

      break;

265

    case 'content':

266

      $items[$item] += array('content' => '');

267

      $items[$item]['content'] .= $data;

268

      break;

269

    case 'summary':

270

      $items[$item] += array('summary' => '');

271

      $items[$item]['summary'] .= $data;

272

      break;

273

    case 'tagline':

274

    case 'subtitle':

275

      $channel += array('description' => '');

276

      $channel['description'] .= $data;

277

      break;

278

    case 'info':

279

    case 'id':

280

    case 'textinput':

281

      // The sub-element is not supported. However, we must recognize

282

      // it or its contents will end up in the item array.

283

      break;

284

    default:

285

      $channel += array($tag => '');

286

      $channel[$tag] .= $data;

/**

291

 * Parses the W3C date/time format, a subset of ISO 8601.

292

293

 * PHP date parsing functions do not handle this format. See

294

 * http://www.w3.org/TR/NOTE-datetime for more information. Originally from

295

 * MagpieRSS (http://magpierss.sourceforge.net/).

296

297

 * @param $date_str

298

 *   A string with a potentially W3C DTF date.

299

300

 * @return

301

 *   A timestamp if parsed successfully or FALSE if not.

302

*/

303

function aggregator_parse_w3cdtf($date_str) {

304

  if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) {

305

    list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);

306

    // Calculate the epoch for current date assuming GMT.

307

    $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);

308

    if ($match[10] != 'Z') { // Z is zulu time, aka GMT

309

      list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]);

310

      // Zero out the variables.

311

      if (!$tz_hour) {

312

        $tz_hour = 0;

313

314

      if (!$tz_min) {

315

        $tz_min = 0;

316

317

      $offset_secs = (($tz_hour * 60) + $tz_min) * 60;

318

      // Is timezone ahead of GMT?  If yes, subtract offset.

319

      if ($tz_mod == '+') {

320

        $offset_secs *= -1;

321

322

      $epoch += $offset_secs;

323

324

    return $epoch;

325

326

  else {

327

    return FALSE;

328

329

Projet

Général

Profil

Club Drupal

root / drupal7 / modules / aggregator / aggregator.parser.inc @ 01f36513