Projet

Général

Profil

Paste
Télécharger (19,1 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / modules / feeds_xpathparser / FeedsXPathParserBase.inc @ 1f142f4f

1
<?php
2

    
3
/**
4
 * @file
5
 * Provides the base class for FeedsXPathParserHTML and FeedsXPathParserXML.
6
 */
7

    
8
/**
9
 * Base class for the HTML and XML parsers.
10
 */
11
abstract class FeedsXPathParserBase extends FeedsParser {
12

    
13
  /**
14
   * The DOMDocument used for parsing.
15
   *
16
   * @var DOMDocument
17
   */
18
  protected $doc;
19

    
20
  /**
21
   * The return value of libxml_disable_entity_loader().
22
   *
23
   * @var bool
24
   */
25
  protected $loader;
26

    
27
  /**
28
   * The elements that should be displayed in raw XML.
29
   *
30
   * @var array
31
   */
32
  protected $rawXML = array();
33

    
34
  /**
35
   * The DOMXPath objet used for parsing.
36
   *
37
   * @var DOMXPath
38
   */
39
  protected $xpath;
40

    
41
  /**
42
   * Classes that use FeedsXPathParserBase must implement this.
43
   *
44
   * @param array $source_config
45
   *   The configuration for the source.
46
   * @param FeedsFetcherResult $fetcher_result
47
   *   A FeedsFetcherResult object.
48
   *
49
   * @return DOMDocument
50
   *   The DOMDocument to perform XPath queries on.
51
   */
52
  abstract protected function setup($source_config, FeedsFetcherResult $fetcher_result);
53

    
54
  /**
55
   * Helper callback to return the raw value.
56
   *
57
   * @param DOMNode $node
58
   *   The DOMNode to convert to a string.
59
   *
60
   * @return string
61
   *   The string representation of the DOMNode.
62
   */
63
  abstract protected function getRaw(DOMNode $node);
64

    
65
  /**
66
   * Implements FeedsParser::parse().
67
   */
68
  public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
69
    $source_config = $source->getConfigFor($this);
70
    $state = $source->state(FEEDS_PARSE);
71

    
72
    if (empty($source_config)) {
73
      $source_config = $this->getConfig();
74
    }
75

    
76
    $this->doc = $this->setup($source_config, $fetcher_result);
77

    
78
    $parser_result = new FeedsParserResult();
79

    
80
    $mappings = $this->getOwnMappings();
81
    $this->rawXML = array_keys(array_filter($source_config['rawXML']));
82
    // Set link.
83
    $fetcher_config = $source->getConfigFor($source->importer->fetcher);
84
    $parser_result->link = isset($fetcher_config['source']) ? $fetcher_config['source'] : '';
85

    
86
    $this->xpath = new FeedsXPathParserDOMXPath($this->doc);
87
    $config = array();
88
    $config['debug'] = array_keys(array_filter($source_config['exp']['debug']));
89
    $config['errors'] = $source_config['exp']['errors'];
90

    
91
    $this->xpath->setConfig($config);
92

    
93
    $context_query = '(' . $source_config['context'] . ')';
94
    if (empty($state->total)) {
95
      $state->total = $this->xpath->namespacedQuery('count(' . $context_query . ')', $this->doc, 'count');
96
    }
97

    
98
    $start = $state->pointer ? $state->pointer : 0;
99
    $limit = $start + $source->importer->getLimit();
100
    $end = ($limit > $state->total) ? $state->total : $limit;
101
    $state->pointer = $end;
102

    
103
    $context_query .= "[position() > $start and position() <= $end]";
104

    
105
    $progress = $state->pointer ? $state->pointer : 0;
106

    
107
    $all_nodes = $this->xpath->namespacedQuery($context_query, NULL, 'context');
108

    
109
    // The source config could have old values that don't exist in the importer.
110
    $sources = array_intersect_key($source_config['sources'], $mappings);
111

    
112
    foreach ($all_nodes as $node) {
113
      // Invoke a hook to check whether the domnode should be skipped.
114
      if (in_array(TRUE, module_invoke_all('feeds_xpathparser_filter_domnode', $node, $this->doc, $source), TRUE)) {
115
        continue;
116
      }
117

    
118
      $parsed_item = $variables = array();
119
      foreach ($sources as $element_key => $query) {
120
        // Variable substitution.
121
        $query = strtr($query, $variables);
122
        // Parse the item.
123
        $result = $this->parseSourceElement($query, $node, $element_key);
124
        if (isset($result)) {
125
          $variables['$' . $mappings[$element_key]] = is_array($result) ? reset($result) : $result;
126
          $parsed_item[$element_key] = $result;
127
        }
128
      }
129
      if (!empty($parsed_item)) {
130
        $parser_result->items[] = $parsed_item;
131
      }
132
    }
133

    
134
    $state->progress($state->total, $progress);
135
    unset($this->doc);
136
    unset($this->xpath);
137
    return $parser_result;
138
  }
139

    
140
  /**
141
   * Parses one item from the context array.
142
   *
143
   * @param string $query
144
   *   An XPath query.
145
   * @param DOMNode $context
146
   *   The current context DOMNode .
147
   * @param string $source
148
   *   The name of the source for this query.
149
   *
150
   * @return array
151
   *   An array containing the results of the query.
152
   */
153
  protected function parseSourceElement($query, $context, $source) {
154

    
155
    if (empty($query)) {
156
      return;
157
    }
158

    
159
    $node_list = $this->xpath->namespacedQuery($query, $context, $source);
160

    
161
    // Iterate through the results of the XPath query.  If this source is
162
    // configured to return raw xml, make it so.
163
    if ($node_list instanceof DOMNodeList) {
164
      $results = array();
165
      if (in_array($source, $this->rawXML)) {
166
        foreach ($node_list as $node) {
167
          $results[] = $this->getRaw($node);
168
        }
169
      }
170
      else {
171
        foreach ($node_list as $node) {
172
          $results[] = $node->nodeValue;
173
        }
174
      }
175
      // Return single result if so.
176
      if (count($results) === 1) {
177
        return $results[0];
178
      }
179
      // Empty result returns NULL, that way we can check.
180
      elseif (empty($results)) {
181
        return;
182
      }
183
      else {
184
        return $results;
185
      }
186
    }
187
    // A value was returned directly from namespacedQuery().
188
    else {
189
      return $node_list;
190
    }
191
  }
192

    
193
  /**
194
   * Overrides parent::sourceForm().
195
   */
196
  public function sourceForm($source_config) {
197
    $form = array();
198
    $importer = feeds_importer($this->id);
199
    $importer_config = $importer->getConfig();
200
    $mappings_ = $importer->processor->getMappings();
201

    
202
    if (empty($source_config)) {
203
      $source_config = $this->getConfig();
204
    }
205

    
206
    if (isset($source_config['allow_override']) &&
207
        !$source_config['allow_override'] &&
208
        empty($source_config['config'])) {
209
      return;
210
    }
211

    
212
    // Add extensions that might get importerd.
213
    $allowed_extensions = isset($importer_config['fetcher']['config']['allowed_extensions']) ? $importer_config['fetcher']['config']['allowed_extensions'] : FALSE;
214
    if ($allowed_extensions) {
215
      if (strpos($allowed_extensions, 'html') === FALSE) {
216
        $importer->fetcher->config['allowed_extensions'] .= ' html htm';
217
      }
218
    }
219

    
220
    $uniques = $this->getUniques();
221
    $mappings = $this->getOwnMappings();
222
    $targets = $importer->processor->getMappingTargets();
223

    
224
    $form['xpath'] = array(
225
      '#type' => 'fieldset',
226
      '#tree' => TRUE,
227
      '#title' => t('XPath Parser Settings'),
228
      '#collapsible' => TRUE,
229
      '#collapsed' => TRUE,
230
    );
231
    if (empty($mappings)) {
232
      // Detect if Feeds menu structure has changed. This will take a while to
233
      // be released, but since I run dev it needs to work.
234
      $feeds_menu = feeds_ui_menu();
235
      if (isset($feeds_menu['admin/structure/feeds/list'])) {
236
        $feeds_base = 'admin/structure/feeds/edit/';
237
      }
238
      else {
239
        $feeds_base = 'admin/structure/feeds/';
240
      }
241
      $form['xpath']['error_message']['#markup'] = '<div class="help">' . t('No XPath mappings are defined. Define mappings !link.', array('!link' => l(t('here'), $feeds_base . $this->id . '/mapping'))) . '</div><br />';
242
      return $form;
243
    }
244
    $form['xpath']['context'] = array(
245
      '#type' => 'textfield',
246
      '#title' => t('Context'),
247
      '#required' => TRUE,
248
      '#description' => t('This is the base query, all other queries will run in this context.'),
249
      '#default_value' => isset($source_config['context']) ? $source_config['context'] : '',
250
      '#maxlength' => 1024,
251
      '#size' => 80,
252
    );
253
    $form['xpath']['sources'] = array(
254
      '#type' => 'fieldset',
255
      '#tree' => TRUE,
256
    );
257
    if (!empty($uniques)) {
258
      $items = array(
259
        format_plural(count($uniques),
260
          t('Field <strong>!column</strong> is mandatory and considered unique: only one item per !column value will be created.',
261
            array('!column' => implode(', ', $uniques))),
262
          t('Fields <strong>!columns</strong> are mandatory and values in these columns are considered unique: only one entry per value in one of these columns will be created.',
263
            array('!columns' => implode(', ', $uniques)))),
264
      );
265
      $form['xpath']['sources']['help']['#markup'] = '<div class="help">' . theme('item_list', array('items' => $items)) . '</div>';
266
    }
267
    $variables = array();
268
    foreach ($mappings as $source => $target) {
269
      $form['xpath']['sources'][$source] = array(
270
        '#type' => 'textfield',
271
        '#title' => isset($targets[$target]['name']) ? check_plain($targets[$target]['name']) : check_plain($target),
272
        '#description' => t('The XPath query to run.'),
273
        '#default_value' => isset($source_config['sources'][$source]) ? $source_config['sources'][$source] : '',
274
        '#maxlength' => 1024,
275
        '#size' => 80,
276
      );
277
      if (!empty($variables)) {
278
        $variable_text = format_plural(count($variables),
279
          t('The variable %variable is available for replacement.', array('%variable' => implode(', ', $variables))),
280
          t('The variables %variable are available for replacement.', array('%variable' => implode(', ', $variables)))
281
        );
282
        $form['xpath']['sources'][$source]['#description'] .= '<br />' . $variable_text;
283
      }
284
      $variables[] = '$' . $target;
285
    }
286
    $form['xpath']['rawXML'] = array(
287
      '#type' => 'checkboxes',
288
      '#title' => t('Select the queries you would like to return raw XML or HTML'),
289
      '#options' => $this->getOwnMappings(TRUE),
290
      '#default_value' => isset($source_config['rawXML']) ? $source_config['rawXML'] : array(),
291
    );
292
    $form['xpath']['exp'] = array(
293
      '#type' => 'fieldset',
294
      '#collapsible' => TRUE,
295
      '#collapsed' => TRUE,
296
      '#tree' => TRUE,
297
      '#title' => t('Debug Options'),
298
    );
299
    $form['xpath']['exp']['errors'] = array(
300
      '#type' => 'checkbox',
301
      '#title' => t('Show error messages.'),
302
      '#default_value' => isset($source_config['exp']['errors']) ? $source_config['exp']['errors'] : FALSE,
303
    );
304
    if (extension_loaded('tidy')) {
305
      $form['xpath']['exp']['tidy'] = array(
306
        '#type' => 'checkbox',
307
        '#title' => t('Use Tidy'),
308
        '#description' => t('The Tidy PHP extension has been detected.
309
                              Select this to clean the markup before parsing.'),
310
        '#default_value' => isset($source_config['exp']['tidy']) ? $source_config['exp']['tidy'] : FALSE,
311
      );
312
      $form['xpath']['exp']['tidy_encoding'] = array(
313
        '#type' => 'textfield',
314
        '#title' => t('Tidy encoding'),
315
        '#description' => t('Set the encoding for tidy. See the !phpdocs for possible values.', array('!phpdocs' => l(t('PHP docs'), 'http://www.php.net/manual/en/tidy.parsestring.php/'))),
316
        '#default_value' => isset($source_config['exp']['tidy_encoding']) ? $source_config['exp']['tidy_encoding'] : 'UTF8',
317
        '#states' => array(
318
          'visible' => array(
319
            ':input[name$="[tidy]"]' => array(
320
              'checked' => TRUE,
321
            ),
322
          ),
323
        ),
324
      );
325
    }
326
    $form['xpath']['exp']['debug'] = array(
327
      '#type' => 'checkboxes',
328
      '#title' => t('Debug query'),
329
      '#options' => array_merge(array('context' => t('Context')), $this->getOwnMappings(TRUE)),
330
      '#default_value' => isset($source_config['exp']['debug']) ? $source_config['exp']['debug'] : array(),
331
    );
332
    return $form;
333
  }
334

    
335
  /**
336
   * Overrides parent::configForm().
337
   */
338
  public function configForm(&$form_state) {
339
    $config = $this->getConfig();
340
    $config['config'] = TRUE;
341
    $form = $this->sourceForm($config);
342
    $form['xpath']['context']['#required'] = FALSE;
343
    $form['xpath']['#collapsed'] = FALSE;
344
    $form['xpath']['allow_override'] = array(
345
      '#type' => 'checkbox',
346
      '#title' => t('Allow source configuration override'),
347
      '#description' => t('This setting allows feed nodes to specify their own XPath values for the context and sources.'),
348
      '#default_value' => $config['allow_override'],
349
    );
350

    
351
    return $form;
352
  }
353

    
354
  /**
355
   * Overrides parent::sourceDefaults().
356
   */
357
  public function sourceDefaults() {
358
    return array();
359
  }
360

    
361
  /**
362
   * Overrides parent::configDefaults().
363
   */
364
  public function configDefaults() {
365
    return array(
366
      'sources' => array(),
367
      'rawXML' => array(),
368
      'context' => '',
369
      'exp' => array(
370
        'errors' => FALSE,
371
        'tidy' => FALSE,
372
        'debug' => array(),
373
        'tidy_encoding' => 'UTF8',
374
      ),
375
      'allow_override' => TRUE,
376
    );
377
  }
378

    
379
  /**
380
   * Overrides parent::sourceFormValidate().
381
   *
382
   * If the values of this source are the same as the base config we set them to
383
   * blank so that the values will be inherited from the importer defaults.
384
   */
385
  public function sourceFormValidate(&$values) {
386
    $config = $this->getConfig();
387
    $values = $values['xpath'];
388
    $allow_override = $config['allow_override'];
389
    unset($config['allow_override']);
390
    ksort($values);
391
    ksort($config);
392
    if ($values === $config || !$allow_override) {
393
      $values = array();
394
      return;
395
    }
396

    
397
    $this->configFormValidate($values);
398
  }
399

    
400
  /**
401
   * Overrides parent::sourceFormValidate().
402
   */
403
  public function configFormValidate(&$values) {
404
    $mappings = $this->getOwnMappings();
405

    
406
    // This tests if we're validating configForm or sourceForm.
407
    $config_form = FALSE;
408
    if (isset($values['xpath'])) {
409
      $values = $values['xpath'];
410
      $config_form = TRUE;
411
    }
412
    $class = get_class($this);
413
    $xml = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?>' . "\n<items></items>");
414
    $use_errors = $this->errorStart();
415

    
416
    $values['context'] = trim($values['context']);
417
    if (!empty($values['context'])) {
418
      $result = $xml->xpath($values['context']);
419
    }
420
    $error = libxml_get_last_error();
421

    
422
    // Error code 1219 is undefined namespace prefix.
423
    // Our sample doc doesn't have any namespaces let alone the one they're
424
    // trying to use. Besides, if someone is trying to use a namespace in an
425
    // XPath query, they're probably right.
426
    if ($error && $error->code != 1219) {
427
      $element = 'feeds][' . $class . '][xpath][context';
428
      if ($config_form) {
429
        $element = 'xpath][context';
430
      }
431
      form_set_error($element, t('There was an error with the XPath selector: %error', array('%error' => $error->message)));
432
      libxml_clear_errors();
433
    }
434
    foreach ($values['sources'] as $key => &$query) {
435
      $query = trim($query);
436
      if (!empty($query)) {
437
        $result = $xml->xpath($query);
438
        $error = libxml_get_last_error();
439
        if ($error && $error->code != 1219) {
440
          $variable_present = FALSE;
441
          // Our variable substitution options can cause syntax errors, check
442
          // if we're doing that.
443
          if ($error->code == 1207) {
444
            foreach ($mappings as $target) {
445
              if (strpos($query, '$' . $target) !== FALSE) {
446
                $variable_present = TRUE;
447
                break;
448
              }
449
            }
450
          }
451
          if (!$variable_present) {
452
            $element = 'feeds][' . $class . '][xpath][sources][' . $key;
453
            if ($config_form) {
454
              $element = 'xpath][sources][' . $key;
455
            }
456
            form_set_error($element, t('There was an error with the XPath selector: %error', array('%error' => $error->message)));
457
            libxml_clear_errors();
458
          }
459
        }
460
      }
461
    }
462
    $this->errorStop($use_errors, FALSE);
463
  }
464

    
465
  /**
466
   * Overrides parent::getMappingSources().
467
   */
468
  public function getMappingSources() {
469
    $mappings = $this->getOwnMappings();
470
    $next = 0;
471
    if (!empty($mappings)) {
472
      // Mappings can be re-ordered, so find the max.
473
      foreach (array_keys($mappings) as $key) {
474
        list(, $index) = explode(':', $key);
475
        if ($index > $next) {
476
          $next = $index;
477
        }
478
      }
479
      $next++;
480
    }
481
    return array(
482
      'xpathparser:' . $next => array(
483
        'name' => t('XPath Expression'),
484
        'description' => t('Allows you to configure an XPath expression that will populate this field.'),
485
      ),
486
    ) + parent::getMappingSources();
487
  }
488

    
489
  /**
490
   * Gets the unique mappings targets that are used by this parser.
491
   *
492
   * @return array
493
   *   An array of mappings keyed source => target.
494
   */
495
  protected function getUniques() {
496
    $uniques = array();
497
    $importer = feeds_importer($this->id);
498

    
499
    $targets = $importer->processor->getMappingTargets();
500
    foreach ($importer->processor->getMappings() as $mapping) {
501
      if (!empty($mapping['unique'])) {
502
        $uniques[$mapping['source']] = $targets[$mapping['target']]['name'];
503
      }
504
    }
505

    
506
    return $uniques;
507
  }
508

    
509
  /**
510
   * Gets the mappings that are defined by this parser.
511
   *
512
   * The mappings begin with "xpathparser:".
513
   *
514
   * @return array
515
   *   An array of mappings keyed source => target.
516
   */
517
  protected function getOwnMappings($label = FALSE) {
518
    $importer = feeds_importer($this->id);
519
    $mappings = $this->filterMappings($importer->processor->getMappings());
520
    if ($label) {
521
      $targets = $importer->processor->getMappingTargets();
522
      foreach ($mappings as $source => $target) {
523
        $mappings[$source] = isset($targets[$target]['name']) ? $targets[$target]['name'] : $target;
524
      }
525
    }
526

    
527
    return $mappings;
528
  }
529

    
530
  /**
531
   * Filters mappings, returning the ones that belong to us.
532
   *
533
   * @param array $mappings
534
   *   A mapping array from a processor.
535
   *
536
   * @return array
537
   *   An array of mappings keyed source => target.
538
   */
539
  protected function filterMappings(array $mappings) {
540
    $our_mappings = array();
541
    foreach ($mappings as $mapping) {
542
      if (strpos($mapping['source'], 'xpathparser:') === 0) {
543
        $our_mappings[$mapping['source']] = $mapping['target'];
544
      }
545
    }
546
    return $our_mappings;
547
  }
548

    
549
  /**
550
   * Starts custom error handling.
551
   *
552
   * @return bool
553
   *   The previous value of use_errors.
554
   */
555
  protected function errorStart() {
556
    libxml_clear_errors();
557
    if (function_exists('libxml_disable_entity_loader')) {
558
      $this->loader = libxml_disable_entity_loader(TRUE);
559
    }
560

    
561
    return libxml_use_internal_errors(TRUE);
562
  }
563

    
564
  /**
565
   * Stops custom error handling.
566
   *
567
   * @param bool $use
568
   *   The previous value of use_errors.
569
   * @param bool $print
570
   *   (Optional) Whether to print errors to the screen. Defaults to TRUE.
571
   */
572
  protected function errorStop($use, $print = TRUE) {
573
    if ($print) {
574
      foreach (libxml_get_errors() as $error) {
575
        switch ($error->level) {
576
          case LIBXML_ERR_WARNING:
577
          case LIBXML_ERR_ERROR:
578
            $type = 'warning';
579
            break;
580

    
581
          case LIBXML_ERR_FATAL:
582
            $type = 'error';
583
            break;
584
        }
585
        $args = array(
586
          '%error' => trim($error->message),
587
          '%num' => $error->line,
588
          '%code' => $error->code,
589
        );
590
        $message = t('%error on line %num. Error code: %code', $args);
591
        drupal_set_message($message, $type, FALSE);
592
      }
593
    }
594
    libxml_clear_errors();
595
    libxml_use_internal_errors($use);
596

    
597
    if (function_exists('libxml_disable_entity_loader') && isset($this->loader)) {
598
      libxml_disable_entity_loader($this->loader);
599
      unset($this->loader);
600
    }
601
  }
602

    
603
  /**
604
   * Overrides parent::hasSourceConfig().
605
   *
606
   * Stop Feeds from building our form over and over again.
607
   */
608
  public function hasSourceConfig() {
609
    return TRUE;
610
  }
611

    
612
}