Projet

Général

Profil

Paste
Télécharger (2,21 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / modules / feeds_xpathparser / FeedsXPathParserHTML.inc @ a45e4bc1

1
<?php
2

    
3
/**
4
 * @files
5
 * Contains FeedsXPathParserHTML.
6
 */
7

    
8
/**
9
 * XPath parsing for HTML.
10
 */
11
class FeedsXPathParserHTML extends FeedsXPathParserBase {
12

    
13
  /**
14
   * Whether this version of PHP has a useable saveHTML() method.
15
   *
16
   * @var bool
17
   */
18
  protected $hasSaveHTML = FALSE;
19

    
20
  /**
21
   * {@inheritdoc}
22
   */
23
  public function __construct($id) {
24
    parent::__construct($id);
25

    
26
    // DOMDocument::saveHTML() cannot take $node as an argument prior to 5.3.6.
27
    if (version_compare(phpversion(), '5.3.6', '>=')) {
28
      $this->hasSaveHTML = TRUE;
29
    }
30
  }
31

    
32
  /**
33
   * {@inheritdoc}
34
   */
35
  protected function setup($source_config, FeedsFetcherResult $fetcher_result) {
36
    if (!empty($source_config['exp']['tidy']) && extension_loaded('tidy')) {
37
      $config = array(
38
        'merge-divs'       => FALSE,
39
        'merge-spans'      => FALSE,
40
        'join-styles'      => FALSE,
41
        'drop-empty-paras' => FALSE,
42
        'wrap'             => 0,
43
        'tidy-mark'        => FALSE,
44
        'escape-cdata'     => TRUE,
45
        'word-2000'        => TRUE,
46
      );
47
      // Default tidy encoding is UTF8.
48
      $encoding = $source_config['exp']['tidy_encoding'];
49
      $raw = tidy_repair_string($fetcher_result->getRaw(), $config, $encoding);
50
    }
51
    else {
52
      $raw = $fetcher_result->getRaw();
53
    }
54

    
55
    $document = new DOMDocument();
56
    $document->strictErrorChecking = FALSE;
57
    $document->recover = TRUE;
58

    
59
    // Use our own error handling.
60
    $use = $this->errorStart();
61

    
62
    if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
63
      $options = LIBXML_NONET;
64
      $options |= defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0;
65
      $options |= defined('LIBXML_PARSEHUGE') ? LIBXML_PARSEHUGE : 0;
66

    
67
      $success = $document->loadHTML($raw, $options);
68
    }
69
    else {
70
      $success = $document->loadHTML($raw);
71
    }
72

    
73
    $this->errorStop($use, $source_config['exp']['errors']);
74

    
75
    if (!$success) {
76
      throw new Exception(t('There was an error parsing the HTML document.'));
77
    }
78

    
79
    return $document;
80
  }
81

    
82
  /**
83
   * {@inheritdoc}
84
   */
85
  protected function getRaw(DOMNode $node) {
86
    if ($this->hasSaveHTML) {
87
      return $this->doc->saveHTML($node);
88
    }
89

    
90
    return $this->doc->saveXML($node, LIBXML_NOEMPTYTAG);
91
  }
92

    
93
}