Projet

Général

Profil

Paste
Télécharger (1,76 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / modules / feeds_xpathparser / FeedsXPathParserHTML.inc @ 70a4c29b

1
<?php
2

    
3
/**
4
 * @files
5
 * Contains FeedsXPathParserHTML.
6
 */
7

    
8
/**
9
 * XPath parsing for HTML.
10
 */
11
class FeedsXPathParserHTML extends FeedsXPathParserBase {
12

    
13
  protected $hasSaveHTML = FALSE;
14

    
15
  /**
16
   * Overrides parent::__construct().
17
   */
18
  public function __construct($id) {
19
    parent::__construct($id);
20

    
21
    // DOMDocument::saveHTML() cannot take $node as an argument prior to 5.3.6.
22
    if (version_compare(phpversion(), '5.3.6', '>=')) {
23
      $this->hasSaveHTML = TRUE;
24
    }
25
  }
26

    
27
  /**
28
   * Implements FeedsXPathParserBase::setup().
29
   */
30
  protected function setup($source_config, FeedsFetcherResult $fetcher_result) {
31

    
32
    if (!empty($source_config['exp']['tidy']) && extension_loaded('tidy')) {
33
      $config = array(
34
        'merge-divs'       => FALSE,
35
        'merge-spans'      => FALSE,
36
        'join-styles'      => FALSE,
37
        'drop-empty-paras' => FALSE,
38
        'wrap'             => 0,
39
        'tidy-mark'        => FALSE,
40
        'escape-cdata'     => TRUE,
41
        'word-2000'        => TRUE,
42
      );
43
      // Default tidy encoding is UTF8.
44
      $encoding = $source_config['exp']['tidy_encoding'];
45
      $raw = tidy_repair_string(trim($fetcher_result->getRaw()), $config, $encoding);
46
    }
47
    else {
48
      $raw = $fetcher_result->getRaw();
49
    }
50
    $doc = new DOMDocument();
51
    // Use our own error handling.
52
    $use = $this->errorStart();
53
    $success = $doc->loadHTML($raw);
54
    unset($raw);
55
    $this->errorStop($use, $source_config['exp']['errors']);
56
    if (!$success) {
57
      throw new Exception(t('There was an error parsing the HTML document.'));
58
    }
59
    return $doc;
60
  }
61

    
62
  protected function getRaw(DOMNode $node) {
63
    if ($this->hasSaveHTML) {
64
      return $this->doc->saveHTML($node);
65
    }
66

    
67
    return $this->doc->saveXML($node, LIBXML_NOEMPTYTAG);
68
  }
69
}