Révision 0b4524f6
Ajouté par Assos Assos il y a presque 9 ans
drupal7/sites/all/modules/feeds_xpathparser/FeedsXPathParserHTML.inc | ||
---|---|---|
10 | 10 |
*/ |
11 | 11 |
class FeedsXPathParserHTML extends FeedsXPathParserBase { |
12 | 12 |
|
13 |
/** |
|
14 |
* Whether this version of PHP has a useable saveHTML() method. |
|
15 |
* |
|
16 |
* @var bool |
|
17 |
*/ |
|
13 | 18 |
protected $hasSaveHTML = FALSE; |
14 | 19 |
|
15 | 20 |
/** |
16 |
* Overrides parent::__construct().
|
|
21 |
* {@inheritdoc}
|
|
17 | 22 |
*/ |
18 | 23 |
public function __construct($id) { |
19 | 24 |
parent::__construct($id); |
... | ... | |
25 | 30 |
} |
26 | 31 |
|
27 | 32 |
/** |
28 |
* Implements FeedsXPathParserBase::setup().
|
|
33 |
* {@inheritdoc}
|
|
29 | 34 |
*/ |
30 | 35 |
protected function setup($source_config, FeedsFetcherResult $fetcher_result) { |
31 |
|
|
32 | 36 |
if (!empty($source_config['exp']['tidy']) && extension_loaded('tidy')) { |
33 | 37 |
$config = array( |
34 | 38 |
'merge-divs' => FALSE, |
... | ... | |
42 | 46 |
); |
43 | 47 |
// Default tidy encoding is UTF8. |
44 | 48 |
$encoding = $source_config['exp']['tidy_encoding']; |
45 |
$raw = tidy_repair_string(trim($fetcher_result->getRaw()), $config, $encoding);
|
|
49 |
$raw = tidy_repair_string($fetcher_result->getRaw(), $config, $encoding);
|
|
46 | 50 |
} |
47 | 51 |
else { |
48 | 52 |
$raw = $fetcher_result->getRaw(); |
49 | 53 |
} |
50 |
$doc = new DOMDocument(); |
|
54 |
|
|
55 |
$document = new DOMDocument(); |
|
56 |
$document->strictErrorChecking = FALSE; |
|
57 |
$document->recover = TRUE; |
|
58 |
|
|
51 | 59 |
// Use our own error handling. |
52 | 60 |
$use = $this->errorStart(); |
53 |
$success = $doc->loadHTML($raw); |
|
54 |
unset($raw); |
|
61 |
|
|
62 |
if (version_compare(PHP_VERSION, '5.4.0', '>=')) { |
|
63 |
$options = LIBXML_NONET; |
|
64 |
$options |= defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0; |
|
65 |
$options |= defined('LIBXML_PARSEHUGE') ? LIBXML_PARSEHUGE : 0; |
|
66 |
|
|
67 |
$success = $document->loadHTML($raw, $options); |
|
68 |
} |
|
69 |
else { |
|
70 |
$success = $document->loadHTML($raw); |
|
71 |
} |
|
72 |
|
|
55 | 73 |
$this->errorStop($use, $source_config['exp']['errors']); |
74 |
|
|
56 | 75 |
if (!$success) { |
57 | 76 |
throw new Exception(t('There was an error parsing the HTML document.')); |
58 | 77 |
} |
59 |
return $doc; |
|
78 |
|
|
79 |
return $document; |
|
60 | 80 |
} |
61 | 81 |
|
82 |
/** |
|
83 |
* {@inheritdoc} |
|
84 |
*/ |
|
62 | 85 |
protected function getRaw(DOMNode $node) { |
63 | 86 |
if ($this->hasSaveHTML) { |
64 | 87 |
return $this->doc->saveHTML($node); |
... | ... | |
66 | 89 |
|
67 | 90 |
return $this->doc->saveXML($node, LIBXML_NOEMPTYTAG); |
68 | 91 |
} |
92 |
|
|
69 | 93 |
} |
Formats disponibles : Unified diff
Weekly update of contrib modules