Révision 7707c013
Ajouté par Assos Assos il y a presque 9 ans
drupal7/sites/all/modules/feeds_jsonpath_parser/FeedsJSONPathParser.inc | ||
---|---|---|
1 | 1 |
<?php |
2 |
// $Id: FeedsJSONPathParser.inc,v 1.1.2.4.2.4 2011/02/05 19:28:01 twistor Exp $ |
|
3 | 2 |
|
4 | 3 |
/** |
5 | 4 |
* @file |
6 |
* |
|
7 |
* Provides the Class for Feeds JSONPath Parser. |
|
5 |
* Contains FeedsJSONPathParser. |
|
8 | 6 |
*/ |
9 | 7 |
|
10 | 8 |
/** |
11 |
* Base class for the HTML and XML parsers.
|
|
9 |
* Parses JSON using JSONPath.
|
|
12 | 10 |
*/ |
13 | 11 |
class FeedsJSONPathParser extends FeedsParser { |
14 | 12 |
|
15 | 13 |
/** |
16 |
* Implementation of FeedsParser::parse(). |
|
14 |
* A regular expression that finds four byte UTF-8 chars. |
|
15 |
* |
|
16 |
* @var string |
|
17 |
*/ |
|
18 |
protected static $fourByteRegex = '/(?:\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})/s'; |
|
19 |
|
|
20 |
/** |
|
21 |
* The source fields to debug. |
|
22 |
* |
|
23 |
* @var array |
|
24 |
*/ |
|
25 |
protected $debug = array(); |
|
26 |
|
|
27 |
/** |
|
28 |
* Implements FeedsParser::parse(). |
|
17 | 29 |
*/ |
18 | 30 |
public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) { |
19 |
$mappings = $source->importer->processor->config['mappings']; |
|
20 |
$mappings = $this->filterMappings($mappings); |
|
31 |
$mappings = $this->getOwnMappings(); |
|
21 | 32 |
$source_config = $source->getConfigFor($this); |
22 | 33 |
// Allow config inheritance. |
23 | 34 |
if (empty($source_config)) { |
... | ... | |
40 | 51 |
$array = json_decode($raw, TRUE); |
41 | 52 |
} |
42 | 53 |
|
43 |
if (is_array($array)) { |
|
44 |
require_once 'jsonpath-0.8.1.php'; |
|
45 |
|
|
46 |
$all_items = $this->jsonPath($array, $source_config['context']); |
|
47 |
$this->debug($all_items, 'context'); |
|
48 |
unset($array); |
|
49 |
|
|
50 |
foreach ($all_items as $item) { |
|
51 |
$parsed_item = $variables = array(); |
|
52 |
foreach ($source_config['sources'] as $source => $query) { |
|
53 |
$parsed = $this->parseSourceElement($item, $query, $source); |
|
54 |
// Avoid null values. |
|
55 |
if (isset($parsed)) { |
|
56 |
// Variable sunstitution can't handle arrays. |
|
57 |
if (!is_array($parsed)) { |
|
58 |
$variables['{' . $mappings[$source] . '}'] = $parsed; |
|
59 |
} |
|
60 |
else { |
|
61 |
$variables['{' . $mappings[$source] . '}'] = ''; |
|
62 |
} |
|
63 |
$parsed_item[$source] = $parsed; |
|
64 |
} |
|
54 |
if (!is_array($array)) { |
|
55 |
throw new Exception(t('There was an error decoding the JSON document.')); |
|
56 |
} |
|
57 |
require_once feeds_jsonpath_parser_library_path(); |
|
58 |
|
|
59 |
$all_items = $this->jsonPath($array, $source_config['context']); |
|
60 |
unset($array); |
|
61 |
|
|
62 |
// Batch. |
|
63 |
$state = $source->state(FEEDS_PARSE); |
|
64 |
if (!$state->total) { |
|
65 |
$state->total = count($all_items); |
|
66 |
} |
|
67 |
|
|
68 |
$start = (int) $state->pointer; |
|
69 |
$state->pointer = $start + $source->importer->getLimit(); |
|
70 |
$all_items = array_slice($all_items, $start, $source->importer->getLimit()); |
|
71 |
|
|
72 |
// Set progress state. |
|
73 |
$state->progress($state->total, $state->pointer); |
|
74 |
|
|
75 |
// Debug output. |
|
76 |
$this->debug($all_items, 'context'); |
|
77 |
|
|
78 |
foreach ($all_items as $item) { |
|
79 |
// Invoke a hook to check whether the item should be skipped. |
|
80 |
if ($this->invokeHook($item, $source) === TRUE) { |
|
81 |
continue; |
|
82 |
} |
|
83 |
|
|
84 |
$parsed_item = $variables = array(); |
|
85 |
foreach ($source_config['sources'] as $source_key => $query) { |
|
86 |
// Variable substitution. |
|
87 |
$query = strtr($query, $variables); |
|
88 |
$parsed = $this->parseSourceElement($item, $query, $source_key); |
|
89 |
|
|
90 |
$variables['{' . $mappings[$source_key] . '}'] = is_array($parsed) ? reset($parsed) : $parsed; |
|
91 |
|
|
92 |
// Avoid null values. |
|
93 |
if (isset($parsed)) { |
|
94 |
$parsed_item[$source_key] = $parsed; |
|
65 | 95 |
} |
96 |
} |
|
97 |
if (!empty($parsed_item)) { |
|
66 | 98 |
$result->items[] = $parsed_item; |
67 | 99 |
} |
68 | 100 |
} |
69 |
else { |
|
70 |
throw new Exception(t('There was an error decoding the JSON document.')); |
|
71 |
} |
|
72 | 101 |
return $result; |
73 | 102 |
} |
74 | 103 |
|
75 | 104 |
/** |
76 |
* Utilizes the jsonPath function from jsonpath-0.8.1.php |
|
105 |
* Utilizes the jsonPath function from jsonpath-0.8.1.php.
|
|
77 | 106 |
* |
78 | 107 |
* jsonPath returns false if the expression returns zero results and that will |
79 | 108 |
* mess up our for loops, so return an empty array instead. |
80 | 109 |
* |
81 |
* @todo |
|
82 |
* Firgure out error handling. |
|
83 |
* @param $array |
|
84 |
* The input array to parse |
|
85 |
* @$expression |
|
110 |
* @param array $array |
|
111 |
* The input array to parse. |
|
112 |
* @param string $expression |
|
86 | 113 |
* The JSONPath expression. |
114 |
* |
|
87 | 115 |
* @return array |
88 |
* Returns an array that is the output of jsonPath |
|
116 |
* Returns an array that is the output of jsonPath. |
|
117 |
* |
|
118 |
* @todo |
|
119 |
* Firgure out error handling. |
|
89 | 120 |
*/ |
90 |
private function jsonPath($array, $expression) {
|
|
121 |
protected function jsonPath($array, $expression) {
|
|
91 | 122 |
$result = jsonPath($array, $expression); |
92 | 123 |
return ($result === FALSE) ? array() : $result; |
93 | 124 |
} |
... | ... | |
95 | 126 |
/** |
96 | 127 |
* Parses one item from the context array. |
97 | 128 |
* |
98 |
* @param $item |
|
99 |
* A PHP array.
|
|
100 |
* @param $query |
|
129 |
* @param array $item
|
|
130 |
* An array containing one item from the context.
|
|
131 |
* @param string $query
|
|
101 | 132 |
* A JSONPath query. |
133 |
* @param string $source |
|
134 |
* The source element that corresponds to the query. |
|
135 |
* |
|
102 | 136 |
* @return array |
103 | 137 |
* An array containing the results of the query. |
104 | 138 |
*/ |
... | ... | |
108 | 142 |
} |
109 | 143 |
$results = $this->jsonPath($item, $query); |
110 | 144 |
$this->debug($results, $source); |
111 |
unset($item); |
|
112 |
|
|
113 |
/** |
|
114 |
* If there is one result, return it directly. If there are no results, |
|
115 |
* return. Otherwise return the results. |
|
116 |
*/ |
|
117 |
if (count($results) === 1) { |
|
118 |
return $results[0]; |
|
119 |
} |
|
120 |
if (count($results) === 0) { |
|
145 |
|
|
146 |
$count = count($results); |
|
147 |
if ($count === 0) { |
|
121 | 148 |
return; |
122 | 149 |
} |
150 |
|
|
151 |
foreach ($results as $delta => $value) { |
|
152 |
if (is_string($value) && $value !== '') { |
|
153 |
$results[$delta] = !empty($this->config['convert_four_byte']) ? $this->convertFourBytes($value) : $this->stripFourBytes($value); |
|
154 |
} |
|
155 |
} |
|
156 |
|
|
157 |
if ($count === 1) { |
|
158 |
return reset($results); |
|
159 |
} |
|
160 |
|
|
123 | 161 |
return $results; |
124 | 162 |
} |
125 | 163 |
|
... | ... | |
132 | 170 |
if (empty($source_config)) { |
133 | 171 |
$source_config = $this->config; |
134 | 172 |
} |
173 |
|
|
174 |
if (isset($source_config['allow_override']) && |
|
175 |
!$source_config['allow_override'] && |
|
176 |
empty($source_config['config'])) { |
|
177 |
return; |
|
178 |
} |
|
179 |
|
|
135 | 180 |
// Add extensions that might get importerd. |
136 | 181 |
$fetcher = feeds_importer($this->id)->fetcher; |
137 | 182 |
if (isset($fetcher->config['allowed_extensions'])) { |
... | ... | |
145 | 190 |
foreach ($mappings_ as $mapping) { |
146 | 191 |
if (strpos($mapping['source'], 'jsonpath_parser:') === 0) { |
147 | 192 |
$mappings[$mapping['source']] = $mapping['target']; |
148 |
if ($mapping['unique']) {
|
|
193 |
if (!empty($mapping['unique'])) {
|
|
149 | 194 |
$uniques[] = $mapping['target']; |
150 | 195 |
} |
151 | 196 |
} |
... | ... | |
158 | 203 |
'#tree' => TRUE, |
159 | 204 |
); |
160 | 205 |
if (empty($mappings)) { |
161 |
$form['jsonpath']['error_message']['#markup'] = t('FeedsJSONPathParser: No mappings were defined.'); |
|
206 |
// Detect if Feeds menu structure has changed. This will take a while to |
|
207 |
// be released, but since I run dev it needs to work. |
|
208 |
$feeds_menu = feeds_ui_menu(); |
|
209 |
if (isset($feeds_menu['admin/structure/feeds/list'])) { |
|
210 |
$feeds_base = 'admin/structure/feeds/edit/'; |
|
211 |
} |
|
212 |
else { |
|
213 |
$feeds_base = 'admin/structure/feeds/'; |
|
214 |
} |
|
215 |
$form['jsonpath']['error_message']['#markup'] = '<div class="help">' . t('No JSONPath mappings are defined. Define mappings !link.', array('!link' => l(t('here'), $feeds_base . $this->id . '/mapping'))) . '</div><br />'; |
|
162 | 216 |
return $form; |
163 | 217 |
} |
164 | 218 |
$form['jsonpath']['context'] = array( |
... | ... | |
194 | 248 |
'#size' => 80, |
195 | 249 |
); |
196 | 250 |
if (!empty($variables)) { |
197 |
$form['jsonpath']['sources'][$source]['#description'] .= '<br>' . t('The variables '. implode(', ', $variables). ' are availliable for replacement.'); |
|
251 |
$variable_text = format_plural(count($variables), |
|
252 |
t('The variable %v is available for replacement.', array('%v' => implode(', ', $variables))), |
|
253 |
t('The variables %v are available for replacement.', array('%v' => implode(', ', $variables))) |
|
254 |
); |
|
255 |
$form['jsonpath']['sources'][$source]['#description'] .= '<br />' . $variable_text; |
|
198 | 256 |
} |
199 | 257 |
$variables[] = '{' . $target . '}'; |
200 | 258 |
} |
... | ... | |
217 | 275 |
* Override parent::configForm(). |
218 | 276 |
*/ |
219 | 277 |
public function configForm(&$form_state) { |
220 |
$form = $this->sourceForm($this->config); |
|
278 |
$config = $this->getConfig(); |
|
279 |
$config['config'] = TRUE; |
|
280 |
$form = $this->sourceForm($config); |
|
221 | 281 |
$form['jsonpath']['context']['#required'] = FALSE; |
222 | 282 |
$form['jsonpath']['#collapsed'] = FALSE; |
283 |
$form['jsonpath']['allow_override'] = array( |
|
284 |
'#type' => 'checkbox', |
|
285 |
'#title' => t('Allow source configuration override'), |
|
286 |
'#description' => t('This setting allows feed nodes to specify their own JSONPath values for the context and sources.'), |
|
287 |
'#default_value' => $config['allow_override'], |
|
288 |
); |
|
289 |
$form['jsonpath']['convert_four_byte'] = array( |
|
290 |
'#type' => 'checkbox', |
|
291 |
'#title' => t('Convert four byte characters'), |
|
292 |
'#description' => t('Coverts four byte UTF-8 characters to their HTML entity. By default, four byte characters will be stripped.'), |
|
293 |
'#default_value' => !empty($config['convert_four_byte']), |
|
294 |
); |
|
295 |
|
|
223 | 296 |
return $form; |
224 | 297 |
} |
225 | 298 |
|
226 | 299 |
/** |
227 |
* Override parent::getMappingSources(). |
|
228 |
*/ |
|
300 |
* Override parent::getMappingSources().
|
|
301 |
*/
|
|
229 | 302 |
public function getMappingSources() { |
230 | 303 |
$mappings = $this->filterMappings(feeds_importer($this->id)->processor->config['mappings']); |
231 | 304 |
$next = 0; |
232 | 305 |
if (!empty($mappings)) { |
233 |
$last_mapping = end(array_keys($mappings)); |
|
234 |
$next = explode(':', $last_mapping); |
|
235 |
$next = $next[1] + 1; |
|
306 |
$keys = array_keys($mappings); |
|
307 |
|
|
308 |
$nums = array(); |
|
309 |
foreach ($keys as $key) { |
|
310 |
list(, $num) = explode(':', $key); |
|
311 |
$nums[] = $num; |
|
312 |
} |
|
313 |
|
|
314 |
$max = max($nums); |
|
315 |
$next = ++$max; |
|
236 | 316 |
} |
237 | 317 |
return array( |
238 | 318 |
'jsonpath_parser:' . $next => array( |
239 | 319 |
'name' => t('JSONPath Expression'), |
240 |
'description' => t('Allows you to configure n JSONPath expression that will populate this field.'),
|
|
320 |
'description' => t('Allows you to configure a JSONPath expression that will populate this field.'),
|
|
241 | 321 |
), |
242 | 322 |
) + parent::getMappingSources(); |
243 | 323 |
} |
... | ... | |
254 | 334 |
'context' => '', |
255 | 335 |
'sources' => array(), |
256 | 336 |
'debug' => array(), |
337 |
'allow_override' => FALSE, |
|
338 |
'convert_four_byte' => FALSE, |
|
257 | 339 |
); |
258 | 340 |
} |
259 | 341 |
|
... | ... | |
263 | 345 |
* If the values of this source are the same as the base config we set them to |
264 | 346 |
* blank to that the values will be inherited from the importer defaults. |
265 | 347 |
* |
266 |
* @param &$values
|
|
348 |
* @param array $values
|
|
267 | 349 |
* The values from the form to validate, passed by reference. |
268 | 350 |
*/ |
269 | 351 |
public function sourceFormValidate(&$values) { |
352 |
$config = $this->getConfig(); |
|
270 | 353 |
$values = $values['jsonpath']; |
271 |
asort($values); |
|
272 |
asort($this->config); |
|
273 |
if ($values === $this->config) { |
|
354 |
$allow_override = $config['allow_override']; |
|
355 |
unset($config['allow_override']); |
|
356 |
unset($config['convert_four_byte']); |
|
357 |
ksort($values); |
|
358 |
ksort($config); |
|
359 |
if ($values === $config || !$allow_override) { |
|
274 | 360 |
$values = array(); |
275 | 361 |
return; |
276 | 362 |
} |
... | ... | |
284 | 370 |
if (isset($values['jsonpath'])) { |
285 | 371 |
$values = $values['jsonpath']; |
286 | 372 |
} |
287 |
$values['context'] = trim($values['context']); |
|
288 |
foreach ($values['sources'] as &$source) { |
|
289 |
$source = trim($source); |
|
373 |
|
|
374 |
$values['context'] = isset($values['context']) ? trim($values['context']) : ''; |
|
375 |
if (!empty($values['sources'])) { |
|
376 |
foreach ($values['sources'] as &$source) { |
|
377 |
$source = trim($source); |
|
378 |
} |
|
290 | 379 |
} |
291 | 380 |
} |
292 | 381 |
|
382 |
/** |
|
383 |
* Gets the mappings that belong to this parser. |
|
384 |
* |
|
385 |
* @return array |
|
386 |
* An array of mappings keyed source => target. |
|
387 |
*/ |
|
388 |
protected function getOwnMappings() { |
|
389 |
$importer_config = feeds_importer($this->id)->getConfig(); |
|
390 |
return $this->filterMappings($importer_config['processor']['config']['mappings']); |
|
391 |
} |
|
392 |
|
|
293 | 393 |
/** |
294 | 394 |
* Filters mappings, returning the ones that belong to us. |
395 |
* |
|
396 |
* @param array $mappings |
|
397 |
* A mapping array from a processor. |
|
398 |
* |
|
399 |
* @return array |
|
400 |
* An array of mappings keyed source => target. |
|
295 | 401 |
*/ |
296 |
private function filterMappings($mappings) {
|
|
402 |
protected function filterMappings($mappings) {
|
|
297 | 403 |
$our_mappings = array(); |
298 | 404 |
foreach ($mappings as $mapping) { |
299 | 405 |
if (strpos($mapping['source'], 'jsonpath_parser:') === 0) { |
... | ... | |
303 | 409 |
return $our_mappings; |
304 | 410 |
} |
305 | 411 |
|
306 |
private function debug($item, $source) {
|
|
412 |
protected function debug($item, $source) {
|
|
307 | 413 |
if (in_array($source, $this->debug)) { |
308 | 414 |
$o = '<ul>'; |
309 | 415 |
foreach ($item as $i) { |
... | ... | |
313 | 419 |
drupal_set_message($source . ':' . $o); |
314 | 420 |
} |
315 | 421 |
} |
422 |
|
|
423 |
/** |
|
424 |
* Calls our filter hook. |
|
425 |
* |
|
426 |
* @param array &$item |
|
427 |
* The item to alter. |
|
428 |
* @param FeedsSource $source |
|
429 |
* The feed source. |
|
430 |
* @return true|null |
|
431 |
* Returns true if the item should be skipped. |
|
432 |
*/ |
|
433 |
protected function invokeHook(array &$item, FeedsSource $source) { |
|
434 |
foreach (module_implements('feeds_jsonpath_parser_filter') as $module) { |
|
435 |
$function = $module . '_feeds_jsonpath_parser_filter'; |
|
436 |
if ($function($item, $source) === TRUE) { |
|
437 |
return TRUE; |
|
438 |
} |
|
439 |
} |
|
440 |
} |
|
441 |
|
|
442 |
/** |
|
443 |
* Strips four byte characters from a string. |
|
444 |
* |
|
445 |
* @param string $string |
|
446 |
* The input string. |
|
447 |
* |
|
448 |
* @return string |
|
449 |
* The string with four byte characters removed. |
|
450 |
*/ |
|
451 |
public static function stripFourBytes($string) { |
|
452 |
return preg_replace(self::$fourByteRegex, '', $string); |
|
453 |
} |
|
454 |
|
|
455 |
/** |
|
456 |
* Replaces four byte characters with their HTML unicode codepoint. |
|
457 |
* |
|
458 |
* @param string $string |
|
459 |
* The input string. |
|
460 |
* |
|
461 |
* @return string |
|
462 |
* The string with four byte characters converted. |
|
463 |
*/ |
|
464 |
public static function convertFourBytes($string) { |
|
465 |
return preg_replace_callback(self::$fourByteRegex, array('FeedsJSONPathParser', 'doFourByteReplace'), $string); |
|
466 |
} |
|
467 |
|
|
468 |
/** |
|
469 |
* Callback for FeedsJSONPathParser::convertFourBytes(). |
|
470 |
* |
|
471 |
* @param array $matches |
|
472 |
* The regular expression matches. |
|
473 |
* |
|
474 |
* @return string |
|
475 |
* A four byte unicode character converted to its HTML representation. |
|
476 |
*/ |
|
477 |
public static function doFourByteReplace(array $matches) { |
|
478 |
$char = $matches[0]; |
|
479 |
|
|
480 |
// Calculate the codepoint of the character. |
|
481 |
$codepoint = ord($char[0]) - 0xF0 << 18; |
|
482 |
$codepoint += ord($char[1]) - 0x80 << 12; |
|
483 |
$codepoint += ord($char[2]) - 0x80 << 6; |
|
484 |
$codepoint += ord($char[3]) - 0x80; |
|
485 |
|
|
486 |
return '&#' . $codepoint . ';'; |
|
487 |
} |
|
488 |
|
|
316 | 489 |
} |
Formats disponibles : Unified diff
Update Feeds JSONPath Parser 7.x-1.0-beta2 -> 7.x-1.0