Projet

Général

Profil

Paste
Télécharger (2,21 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / modules / feeds_xpathparser / FeedsXPathParserHTML.inc @ 0b4524f6

1 85ad3d82 Assos Assos
<?php
2
3
/**
4
 * @files
5 f066bdb5 Assos Assos
 * Contains FeedsXPathParserHTML.
6
 */
7
8
/**
9
 * XPath parsing for HTML.
10 85ad3d82 Assos Assos
 */
11
class FeedsXPathParserHTML extends FeedsXPathParserBase {
12
13 0b4524f6 Assos Assos
  /**
14
   * Whether this version of PHP has a useable saveHTML() method.
15
   *
16
   * @var bool
17
   */
18 85ad3d82 Assos Assos
  protected $hasSaveHTML = FALSE;
19
20
  /**
21 0b4524f6 Assos Assos
   * {@inheritdoc}
22 85ad3d82 Assos Assos
   */
23
  public function __construct($id) {
24
    parent::__construct($id);
25
26
    // DOMDocument::saveHTML() cannot take $node as an argument prior to 5.3.6.
27
    if (version_compare(phpversion(), '5.3.6', '>=')) {
28
      $this->hasSaveHTML = TRUE;
29
    }
30
  }
31
32
  /**
33 0b4524f6 Assos Assos
   * {@inheritdoc}
34 85ad3d82 Assos Assos
   */
35
  protected function setup($source_config, FeedsFetcherResult $fetcher_result) {
36 f066bdb5 Assos Assos
    if (!empty($source_config['exp']['tidy']) && extension_loaded('tidy')) {
37 85ad3d82 Assos Assos
      $config = array(
38
        'merge-divs'       => FALSE,
39
        'merge-spans'      => FALSE,
40
        'join-styles'      => FALSE,
41
        'drop-empty-paras' => FALSE,
42
        'wrap'             => 0,
43
        'tidy-mark'        => FALSE,
44
        'escape-cdata'     => TRUE,
45
        'word-2000'        => TRUE,
46
      );
47
      // Default tidy encoding is UTF8.
48
      $encoding = $source_config['exp']['tidy_encoding'];
49 0b4524f6 Assos Assos
      $raw = tidy_repair_string($fetcher_result->getRaw(), $config, $encoding);
50 85ad3d82 Assos Assos
    }
51
    else {
52
      $raw = $fetcher_result->getRaw();
53
    }
54 0b4524f6 Assos Assos
55
    $document = new DOMDocument();
56
    $document->strictErrorChecking = FALSE;
57
    $document->recover = TRUE;
58
59 85ad3d82 Assos Assos
    // Use our own error handling.
60
    $use = $this->errorStart();
61 0b4524f6 Assos Assos
62
    if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
63
      $options = LIBXML_NONET;
64
      $options |= defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0;
65
      $options |= defined('LIBXML_PARSEHUGE') ? LIBXML_PARSEHUGE : 0;
66
67
      $success = $document->loadHTML($raw, $options);
68
    }
69
    else {
70
      $success = $document->loadHTML($raw);
71
    }
72
73 85ad3d82 Assos Assos
    $this->errorStop($use, $source_config['exp']['errors']);
74 0b4524f6 Assos Assos
75 85ad3d82 Assos Assos
    if (!$success) {
76
      throw new Exception(t('There was an error parsing the HTML document.'));
77
    }
78 0b4524f6 Assos Assos
79
    return $document;
80 85ad3d82 Assos Assos
  }
81
82 0b4524f6 Assos Assos
  /**
83
   * {@inheritdoc}
84
   */
85 85ad3d82 Assos Assos
  protected function getRaw(DOMNode $node) {
86
    if ($this->hasSaveHTML) {
87
      return $this->doc->saveHTML($node);
88
    }
89
90
    return $this->doc->saveXML($node, LIBXML_NOEMPTYTAG);
91
  }
92 0b4524f6 Assos Assos
93 85ad3d82 Assos Assos
}