Projet

Général

Profil

Paste
Télécharger (18,5 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / modules / feeds_xpathparser / FeedsXPathParserBase.inc @ f066bdb5

1
<?php
2

    
3
/**
4
 * @file
5
 * Provides the base class for FeedsXPathParserHTML and FeedsXPathParserXML.
6
 */
7

    
8
/**
9
 * Base class for the HTML and XML parsers.
10
 */
11
abstract class FeedsXPathParserBase extends FeedsParser {
12

    
13
  protected $rawXML = array();
14
  protected $doc = NULL;
15
  protected $xpath = NULL;
16

    
17
  /**
18
   * Classes that use FeedsXPathParserBase must implement this.
19
   *
20
   * @param array $source_config
21
   *   The configuration for the source.
22
   * @param FeedsFetcherResult $fetcher_result
23
   *   A FeedsFetcherResult object.
24
   *
25
   * @return DOMDocument
26
   *   The DOMDocument to perform XPath queries on.
27
   */
28
  abstract protected function setup($source_config, FeedsFetcherResult $fetcher_result);
29

    
30
  /**
31
   * Helper callback to return the raw value.
32
   *
33
   * @param DOMNode $node
34
   *   The DOMNode to convert to a string.
35
   *
36
   * @return string
37
   *   The string representation of the DOMNode.
38
   */
39
  abstract protected function getRaw(DOMNode $node);
40

    
41
  /**
42
   * Implements FeedsParser::parse().
43
   */
44
  public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
45
    $source_config = $source->getConfigFor($this);
46
    $state = $source->state(FEEDS_PARSE);
47

    
48
    if (empty($source_config)) {
49
      $source_config = $this->getConfig();
50
    }
51

    
52
    $this->doc = $this->setup($source_config, $fetcher_result);
53

    
54
    $parser_result = new FeedsParserResult();
55

    
56
    $mappings = $this->getOwnMappings();
57
    $this->rawXML = array_keys(array_filter($source_config['rawXML']));
58
    // Set link.
59
    $fetcher_config = $source->getConfigFor($source->importer->fetcher);
60
    $parser_result->link = isset($fetcher_config['source']) ? $fetcher_config['source'] : '';
61

    
62
    $this->xpath = new FeedsXPathParserDOMXPath($this->doc);
63
    $config = array();
64
    $config['debug'] = array_keys(array_filter($source_config['exp']['debug']));
65
    $config['errors'] = $source_config['exp']['errors'];
66

    
67
    $this->xpath->setConfig($config);
68

    
69
    $context_query = '(' . $source_config['context'] . ')';
70
    if (empty($state->total)) {
71
      $state->total = $this->xpath->namespacedQuery('count(' . $context_query . ')', $this->doc, 'count');
72
    }
73

    
74
    $start = $state->pointer ? $state->pointer : 0;
75
    $limit = $start + $source->importer->getLimit();
76
    $end = ($limit > $state->total) ? $state->total : $limit;
77
    $state->pointer = $end;
78

    
79
    $context_query .= "[position() > $start and position() <= $end]";
80

    
81
    $progress = $state->pointer ? $state->pointer : 0;
82

    
83
    $all_nodes = $this->xpath->namespacedQuery($context_query, NULL, 'context');
84

    
85
    foreach ($all_nodes as $node) {
86
      // Invoke a hook to check whether the domnode should be skipped.
87
      if (in_array(TRUE, module_invoke_all('feeds_xpathparser_filter_domnode', $node, $this->doc, $source), TRUE)) {
88
        continue;
89
      }
90

    
91
      $parsed_item = $variables = array();
92
      foreach ($source_config['sources'] as $element_key => $query) {
93
        // Variable substitution.
94
        $query = strtr($query, $variables);
95
        // Parse the item.
96
        $result = $this->parseSourceElement($query, $node, $element_key);
97
        if (isset($result)) {
98
          if (!is_array($result)) {
99
            $variables['$' . $mappings[$element_key]] = $result;
100
          }
101
          else {
102
            $variables['$' . $mappings[$element_key]] = '';
103
          }
104
          $parsed_item[$element_key] = $result;
105
        }
106
      }
107
      if (!empty($parsed_item)) {
108
        $parser_result->items[] = $parsed_item;
109
      }
110
    }
111

    
112
    $state->progress($state->total, $progress);
113
    unset($this->doc);
114
    unset($this->xpath);
115
    return $parser_result;
116
  }
117

    
118
  /**
119
   * Parses one item from the context array.
120
   *
121
   * @param string $query
122
   *   An XPath query.
123
   * @param DOMNode $context
124
   *   The current context DOMNode .
125
   * @param string $source
126
   *   The name of the source for this query.
127
   *
128
   * @return array
129
   *   An array containing the results of the query.
130
   */
131
  protected function parseSourceElement($query, $context, $source) {
132

    
133
    if (empty($query)) {
134
      return;
135
    }
136

    
137
    $node_list = $this->xpath->namespacedQuery($query, $context, $source);
138

    
139
    // Iterate through the results of the XPath query.  If this source is
140
    // configured to return raw xml, make it so.
141
    if ($node_list instanceof DOMNodeList) {
142
      $results = array();
143
      if (in_array($source, $this->rawXML)) {
144
        foreach ($node_list as $node) {
145
          $results[] = $this->getRaw($node);
146
        }
147
      }
148
      else {
149
        foreach ($node_list as $node) {
150
          $results[] = $node->nodeValue;
151
        }
152
      }
153
      // Return single result if so.
154
      if (count($results) === 1) {
155
        return $results[0];
156
      }
157
      // Empty result returns NULL, that way we can check.
158
      elseif (empty($results)) {
159
        return;
160
      }
161
      else {
162
        return $results;
163
      }
164
    }
165
    // A value was returned directly from namespacedQuery().
166
    else {
167
      return $node_list;
168
    }
169
  }
170

    
171
  /**
172
   * Overrides parent::sourceForm().
173
   */
174
  public function sourceForm($source_config) {
175
    $form = array();
176
    $importer = feeds_importer($this->id);
177
    $importer_config = $importer->getConfig();
178
    $mappings_ = $importer->processor->getMappings();
179

    
180
    if (empty($source_config)) {
181
      $source_config = $this->getConfig();
182
    }
183

    
184
    if (isset($source_config['allow_override']) &&
185
        !$source_config['allow_override'] &&
186
        empty($source_config['config'])) {
187
      return;
188
    }
189

    
190
    // Add extensions that might get importerd.
191
    $allowed_extensions = isset($importer_config['fetcher']['config']['allowed_extensions']) ? $importer_config['fetcher']['config']['allowed_extensions'] : FALSE;
192
    if ($allowed_extensions) {
193
      if (strpos($allowed_extensions, 'html') === FALSE) {
194
        $importer->fetcher->config['allowed_extensions'] .= ' html htm';
195
      }
196
    }
197

    
198
    $uniques = $this->getUniques();
199
    $mappings = $this->getOwnMappings();
200
    $targets = $importer->processor->getMappingTargets();
201

    
202
    $form['xpath'] = array(
203
      '#type' => 'fieldset',
204
      '#tree' => TRUE,
205
      '#title' => t('XPath Parser Settings'),
206
      '#collapsible' => TRUE,
207
      '#collapsed' => TRUE,
208
    );
209
    if (empty($mappings)) {
210
      // Detect if Feeds menu structure has changed. This will take a while to
211
      // be released, but since I run dev it needs to work.
212
      $feeds_menu = feeds_ui_menu();
213
      if (isset($feeds_menu['admin/structure/feeds/list'])) {
214
        $feeds_base = 'admin/structure/feeds/edit/';
215
      }
216
      else {
217
        $feeds_base = 'admin/structure/feeds/';
218
      }
219
      $form['xpath']['error_message']['#markup'] = '<div class="help">' . t('No XPath mappings are defined. Define mappings !link.', array('!link' => l(t('here'), $feeds_base . $this->id . '/mapping'))) . '</div><br />';
220
      return $form;
221
    }
222
    $form['xpath']['context'] = array(
223
      '#type' => 'textfield',
224
      '#title' => t('Context'),
225
      '#required' => TRUE,
226
      '#description' => t('This is the base query, all other queries will run in this context.'),
227
      '#default_value' => isset($source_config['context']) ? $source_config['context'] : '',
228
      '#maxlength' => 1024,
229
      '#size' => 80,
230
    );
231
    $form['xpath']['sources'] = array(
232
      '#type' => 'fieldset',
233
      '#tree' => TRUE,
234
    );
235
    if (!empty($uniques)) {
236
      $items = array(
237
        format_plural(count($uniques),
238
          t('Field <strong>!column</strong> is mandatory and considered unique: only one item per !column value will be created.',
239
            array('!column' => implode(', ', $uniques))),
240
          t('Fields <strong>!columns</strong> are mandatory and values in these columns are considered unique: only one entry per value in one of these columns will be created.',
241
            array('!columns' => implode(', ', $uniques)))),
242
      );
243
      $form['xpath']['sources']['help']['#markup'] = '<div class="help">' . theme('item_list', array('items' => $items)) . '</div>';
244
    }
245
    $variables = array();
246
    foreach ($mappings as $source => $target) {
247
      $form['xpath']['sources'][$source] = array(
248
        '#type' => 'textfield',
249
        '#title' => isset($targets[$target]['name']) ? check_plain($targets[$target]['name']) : check_plain($target),
250
        '#description' => t('The XPath query to run.'),
251
        '#default_value' => isset($source_config['sources'][$source]) ? $source_config['sources'][$source] : '',
252
        '#maxlength' => 1024,
253
        '#size' => 80,
254
      );
255
      if (!empty($variables)) {
256
        $variable_text = format_plural(count($variables),
257
          t('The variable %variable is available for replacement.', array('%variable' => implode(', ', $variables))),
258
          t('The variables %variable are available for replacement.', array('%variable' => implode(', ', $variables)))
259
        );
260
        $form['xpath']['sources'][$source]['#description'] .= '<br />' . $variable_text;
261
      }
262
      $variables[] = '$' . $target;
263
    }
264
    $form['xpath']['rawXML'] = array(
265
      '#type' => 'checkboxes',
266
      '#title' => t('Select the queries you would like to return raw XML or HTML'),
267
      '#options' => $this->getOwnMappings(TRUE),
268
      '#default_value' => isset($source_config['rawXML']) ? $source_config['rawXML'] : array(),
269
    );
270
    $form['xpath']['exp'] = array(
271
      '#type' => 'fieldset',
272
      '#collapsible' => TRUE,
273
      '#collapsed' => TRUE,
274
      '#tree' => TRUE,
275
      '#title' => t('Debug Options'),
276
    );
277
    $form['xpath']['exp']['errors'] = array(
278
      '#type' => 'checkbox',
279
      '#title' => t('Show error messages.'),
280
      '#default_value' => isset($source_config['exp']['errors']) ? $source_config['exp']['errors'] : FALSE,
281
    );
282
    if (extension_loaded('tidy')) {
283
      $form['xpath']['exp']['tidy'] = array(
284
        '#type' => 'checkbox',
285
        '#title' => t('Use Tidy'),
286
        '#description' => t('The Tidy PHP extension has been detected.
287
                              Select this to clean the markup before parsing.'),
288
        '#default_value' => isset($source_config['exp']['tidy']) ? $source_config['exp']['tidy'] : FALSE,
289
      );
290
      $form['xpath']['exp']['tidy_encoding'] = array(
291
        '#type' => 'textfield',
292
        '#title' => t('Tidy encoding'),
293
        '#description' => t('Set the encoding for tidy. See the !phpdocs for possible values.', array('!phpdocs' => l(t('PHP docs'), 'http://www.php.net/manual/en/tidy.parsestring.php/'))),
294
        '#default_value' => isset($source_config['exp']['tidy_encoding']) ? $source_config['exp']['tidy_encoding'] : 'UTF8',
295
        '#states' => array(
296
          'visible' => array(
297
            ':input[name$="[tidy]"]' => array(
298
              'checked' => TRUE,
299
            ),
300
          ),
301
        ),
302
      );
303
    }
304
    $form['xpath']['exp']['debug'] = array(
305
      '#type' => 'checkboxes',
306
      '#title' => t('Debug query'),
307
      '#options' => array_merge(array('context' => t('Context')), $this->getOwnMappings(TRUE)),
308
      '#default_value' => isset($source_config['exp']['debug']) ? $source_config['exp']['debug'] : array(),
309
    );
310
    return $form;
311
  }
312

    
313
  /**
314
   * Overrides parent::configForm().
315
   */
316
  public function configForm(&$form_state) {
317
    $config = $this->getConfig();
318
    $config['config'] = TRUE;
319
    $form = $this->sourceForm($config);
320
    $form['xpath']['context']['#required'] = FALSE;
321
    $form['xpath']['#collapsed'] = FALSE;
322
    $form['xpath']['allow_override'] = array(
323
      '#type' => 'checkbox',
324
      '#title' => t('Allow source configuration override'),
325
      '#description' => t('This setting allows feed nodes to specify their own XPath values for the context and sources.'),
326
      '#default_value' => $config['allow_override'],
327
    );
328

    
329
    return $form;
330
  }
331

    
332
  /**
333
   * Overrides parent::sourceDefaults().
334
   */
335
  public function sourceDefaults() {
336
    return array();
337
  }
338

    
339
  /**
340
   * Overrides parent::configDefaults().
341
   */
342
  public function configDefaults() {
343
    return array(
344
      'sources' => array(),
345
      'rawXML' => array(),
346
      'context' => '',
347
      'exp' => array(
348
        'errors' => FALSE,
349
        'tidy' => FALSE,
350
        'debug' => array(),
351
        'tidy_encoding' => 'UTF8',
352
      ),
353
      'allow_override' => TRUE,
354
    );
355
  }
356

    
357
  /**
358
   * Overrides parent::sourceFormValidate().
359
   *
360
   * If the values of this source are the same as the base config we set them to
361
   * blank so that the values will be inherited from the importer defaults.
362
   */
363
  public function sourceFormValidate(&$values) {
364
    $config = $this->getConfig();
365
    $values = $values['xpath'];
366
    $allow_override = $config['allow_override'];
367
    unset($config['allow_override']);
368
    ksort($values);
369
    ksort($config);
370
    if ($values === $config || !$allow_override) {
371
      $values = array();
372
      return;
373
    }
374

    
375
    $this->configFormValidate($values);
376
  }
377

    
378
  /**
379
   * Overrides parent::sourceFormValidate().
380
   */
381
  public function configFormValidate(&$values) {
382
    $mappings = $this->getOwnMappings();
383

    
384
    // This tests if we're validating configForm or sourceForm.
385
    $config_form = FALSE;
386
    if (isset($values['xpath'])) {
387
      $values = $values['xpath'];
388
      $config_form = TRUE;
389
    }
390
    $class = get_class($this);
391
    $xml = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?>' . "\n<items></items>");
392
    $use_errors = $this->errorStart();
393

    
394
    $values['context'] = trim($values['context']);
395
    if (!empty($values['context'])) {
396
      $result = $xml->xpath($values['context']);
397
    }
398
    $error = libxml_get_last_error();
399

    
400
    // Error code 1219 is undefined namespace prefix.
401
    // Our sample doc doesn't have any namespaces let alone the one they're
402
    // trying to use. Besides, if someone is trying to use a namespace in an
403
    // XPath query, they're probably right.
404
    if ($error && $error->code != 1219) {
405
      $element = 'feeds][' . $class . '][xpath][context';
406
      if ($config_form) {
407
        $element = 'xpath][context';
408
      }
409
      form_set_error($element, t('There was an error with the XPath selector: %error', array('%error' => $error->message)));
410
      libxml_clear_errors();
411
    }
412
    foreach ($values['sources'] as $key => &$query) {
413
      $query = trim($query);
414
      if (!empty($query)) {
415
        $result = $xml->xpath($query);
416
        $error = libxml_get_last_error();
417
        if ($error && $error->code != 1219) {
418
          $variable_present = FALSE;
419
          // Our variable substitution options can cause syntax errors, check
420
          // if we're doing that.
421
          if ($error->code == 1207) {
422
            foreach ($mappings as $target) {
423
              if (strpos($query, '$' . $target) !== FALSE) {
424
                $variable_present = TRUE;
425
                break;
426
              }
427
            }
428
          }
429
          if (!$variable_present) {
430
            $element = 'feeds][' . $class . '][xpath][sources][' . $key;
431
            if ($config_form) {
432
              $element = 'xpath][sources][' . $key;
433
            }
434
            form_set_error($element, t('There was an error with the XPath selector: %error', array('%error' => $error->message)));
435
            libxml_clear_errors();
436
          }
437
        }
438
      }
439
    }
440
    $this->errorStop($use_errors, FALSE);
441
  }
442

    
443
  /**
444
   * Overrides parent::getMappingSources().
445
   */
446
  public function getMappingSources() {
447
    $mappings = $this->getOwnMappings();
448
    $next = 0;
449
    if (!empty($mappings)) {
450
      // Mappings can be re-ordered, so find the max.
451
      foreach (array_keys($mappings) as $key) {
452
        list(, $index) = explode(':', $key);
453
        if ($index > $next) {
454
          $next = $index;
455
        }
456
      }
457
      $next++;
458
    }
459
    return array(
460
      'xpathparser:' . $next => array(
461
        'name' => t('XPath Expression'),
462
        'description' => t('Allows you to configure an XPath expression that will populate this field.'),
463
      ),
464
    ) + parent::getMappingSources();
465
  }
466

    
467
  /**
468
   * Gets the unique mappings targets that are used by this parser.
469
   *
470
   * @return array
471
   *   An array of mappings keyed source => target.
472
   */
473
  protected function getUniques() {
474
    $uniques = array();
475
    $importer = feeds_importer($this->id);
476

    
477
    $targets = $importer->processor->getMappingTargets();
478
    foreach ($importer->processor->getMappings() as $mapping) {
479
      if (!empty($mapping['unique'])) {
480
        $uniques[$mapping['source']] = $targets[$mapping['target']]['name'];
481
      }
482
    }
483

    
484
    return $uniques;
485
  }
486

    
487
  /**
488
   * Gets the mappings that are defined by this parser.
489
   *
490
   * The mappings begin with "xpathparser:".
491
   *
492
   * @return array
493
   *   An array of mappings keyed source => target.
494
   */
495
  protected function getOwnMappings($label = FALSE) {
496
    $importer = feeds_importer($this->id);
497
    $mappings = $this->filterMappings($importer->processor->getMappings());
498
    if ($label) {
499
      $targets = $importer->processor->getMappingTargets();
500
      foreach ($mappings as $source => $target) {
501
        $mappings[$source] = isset($targets[$target]['name']) ? $targets[$target]['name'] : $target;
502
      }
503
    }
504

    
505
    return $mappings;
506
  }
507

    
508
  /**
509
   * Filters mappings, returning the ones that belong to us.
510
   *
511
   * @param array $mappings
512
   *   A mapping array from a processor.
513
   *
514
   * @return array
515
   *   An array of mappings keyed source => target.
516
   */
517
  protected function filterMappings(array $mappings) {
518
    $our_mappings = array();
519
    foreach ($mappings as $mapping) {
520
      if (strpos($mapping['source'], 'xpathparser:') === 0) {
521
        $our_mappings[$mapping['source']] = $mapping['target'];
522
      }
523
    }
524
    return $our_mappings;
525
  }
526

    
527
  /**
528
   * Starts custom error handling.
529
   *
530
   * @return bool
531
   *   The previous value of use_errors.
532
   */
533
  protected function errorStart() {
534
    libxml_clear_errors();
535
    return libxml_use_internal_errors(TRUE);
536
  }
537

    
538
  /**
539
   * Stops custom error handling.
540
   *
541
   * @param bool $use
542
   *   The previous value of use_errors.
543
   * @param bool $print
544
   *   (Optional) Whether to print errors to the screen. Defaults to TRUE.
545
   */
546
  protected function errorStop($use, $print = TRUE) {
547
    if ($print) {
548
      foreach (libxml_get_errors() as $error) {
549
        switch ($error->level) {
550
          case LIBXML_ERR_WARNING:
551
          case LIBXML_ERR_ERROR:
552
            $type = 'warning';
553
            break;
554

    
555
          case LIBXML_ERR_FATAL:
556
            $type = 'error';
557
            break;
558
        }
559
        $args = array(
560
          '%error' => trim($error->message),
561
          '%num' => $error->line,
562
          '%code' => $error->code,
563
        );
564
        $message = t('%error on line %num. Error code: %code', $args);
565
        drupal_set_message($message, $type, FALSE);
566
      }
567
    }
568
    libxml_clear_errors();
569
    libxml_use_internal_errors($use);
570
  }
571

    
572
  /**
573
   * Overrides parent::hasSourceConfig().
574
   *
575
   * Stop Feeds from building our form over and over again.
576
   */
577
  public function hasSourceConfig() {
578
    return TRUE;
579
  }
580

    
581
}