1
|
<?php
|
2
|
|
3
|
/**
|
4
|
* @file
|
5
|
* Contains FeedsJSONPathParser.
|
6
|
*/
|
7
|
|
8
|
/**
|
9
|
* Parses JSON using JSONPath.
|
10
|
*/
|
11
|
class FeedsJSONPathParser extends FeedsParser {
|
12
|
|
13
|
/**
|
14
|
* A regular expression that finds four byte UTF-8 chars.
|
15
|
*
|
16
|
* @var string
|
17
|
*/
|
18
|
protected static $fourByteRegex = '/(?:\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})/s';
|
19
|
|
20
|
/**
|
21
|
* The source fields to debug.
|
22
|
*
|
23
|
* @var array
|
24
|
*/
|
25
|
protected $debug = array();
|
26
|
|
27
|
/**
|
28
|
* Implements FeedsParser::parse().
|
29
|
*/
|
30
|
public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
|
31
|
$mappings = $this->getOwnMappings();
|
32
|
$source_config = $source->getConfigFor($this);
|
33
|
// Allow config inheritance.
|
34
|
if (empty($source_config)) {
|
35
|
$source_config = $this->config;
|
36
|
}
|
37
|
$this->debug = array_keys(array_filter($source_config['debug']['options']));
|
38
|
|
39
|
$raw = trim($fetcher_result->getRaw());
|
40
|
$result = new FeedsParserResult();
|
41
|
// Set link so we can set the result link attribute.
|
42
|
$fetcher_config = $source->getConfigFor($source->importer->fetcher);
|
43
|
$result->link = $fetcher_config['source'];
|
44
|
|
45
|
$array = json_decode($raw, TRUE);
|
46
|
|
47
|
// Support JSON lines format.
|
48
|
if (!is_array($array)) {
|
49
|
$raw = preg_replace('/}\s*{/', '},{', $raw);
|
50
|
$raw = '[' . $raw . ']';
|
51
|
$array = json_decode($raw, TRUE);
|
52
|
}
|
53
|
|
54
|
if (!is_array($array)) {
|
55
|
throw new Exception(t('There was an error decoding the JSON document.'));
|
56
|
}
|
57
|
require_once feeds_jsonpath_parser_library_path();
|
58
|
|
59
|
$all_items = $this->jsonPath($array, $source_config['context']);
|
60
|
unset($array);
|
61
|
|
62
|
// Batch.
|
63
|
$state = $source->state(FEEDS_PARSE);
|
64
|
if (!$state->total) {
|
65
|
$state->total = count($all_items);
|
66
|
}
|
67
|
|
68
|
$start = (int) $state->pointer;
|
69
|
$state->pointer = $start + $source->importer->getLimit();
|
70
|
$all_items = array_slice($all_items, $start, $source->importer->getLimit());
|
71
|
|
72
|
// Set progress state.
|
73
|
$state->progress($state->total, $state->pointer);
|
74
|
|
75
|
// Debug output.
|
76
|
$this->debug($all_items, 'context');
|
77
|
|
78
|
foreach ($all_items as $item) {
|
79
|
// Invoke a hook to check whether the item should be skipped.
|
80
|
if ($this->invokeHook($item, $source) === TRUE) {
|
81
|
continue;
|
82
|
}
|
83
|
|
84
|
$parsed_item = $variables = array();
|
85
|
foreach ($source_config['sources'] as $source_key => $query) {
|
86
|
// Variable substitution.
|
87
|
$query = strtr($query, $variables);
|
88
|
$parsed = $this->parseSourceElement($item, $query, $source_key);
|
89
|
|
90
|
$variables['{' . $mappings[$source_key] . '}'] = is_array($parsed) ? reset($parsed) : $parsed;
|
91
|
|
92
|
// Avoid null values.
|
93
|
if (isset($parsed)) {
|
94
|
$parsed_item[$source_key] = $parsed;
|
95
|
}
|
96
|
}
|
97
|
if (!empty($parsed_item)) {
|
98
|
$result->items[] = $parsed_item;
|
99
|
}
|
100
|
}
|
101
|
return $result;
|
102
|
}
|
103
|
|
104
|
/**
|
105
|
* Utilizes the jsonPath function from jsonpath-0.8.1.php.
|
106
|
*
|
107
|
* jsonPath returns false if the expression returns zero results and that will
|
108
|
* mess up our for loops, so return an empty array instead.
|
109
|
*
|
110
|
* @param array $array
|
111
|
* The input array to parse.
|
112
|
* @param string $expression
|
113
|
* The JSONPath expression.
|
114
|
*
|
115
|
* @return array
|
116
|
* Returns an array that is the output of jsonPath.
|
117
|
*
|
118
|
* @todo
|
119
|
* Firgure out error handling.
|
120
|
*/
|
121
|
protected function jsonPath($array, $expression) {
|
122
|
$result = jsonPath($array, $expression);
|
123
|
return ($result === FALSE) ? array() : $result;
|
124
|
}
|
125
|
|
126
|
/**
|
127
|
* Parses one item from the context array.
|
128
|
*
|
129
|
* @param array $item
|
130
|
* An array containing one item from the context.
|
131
|
* @param string $query
|
132
|
* A JSONPath query.
|
133
|
* @param string $source
|
134
|
* The source element that corresponds to the query.
|
135
|
*
|
136
|
* @return array
|
137
|
* An array containing the results of the query.
|
138
|
*/
|
139
|
protected function parseSourceElement($item, $query, $source) {
|
140
|
if (empty($query)) {
|
141
|
return;
|
142
|
}
|
143
|
$results = $this->jsonPath($item, $query);
|
144
|
$this->debug($results, $source);
|
145
|
|
146
|
$count = count($results);
|
147
|
if ($count === 0) {
|
148
|
return;
|
149
|
}
|
150
|
|
151
|
foreach ($results as $delta => $value) {
|
152
|
if (is_string($value) && $value !== '') {
|
153
|
$results[$delta] = !empty($this->config['convert_four_byte']) ? $this->convertFourBytes($value) : $this->stripFourBytes($value);
|
154
|
}
|
155
|
}
|
156
|
|
157
|
if ($count === 1) {
|
158
|
return reset($results);
|
159
|
}
|
160
|
|
161
|
return $results;
|
162
|
}
|
163
|
|
164
|
/**
|
165
|
* Source form.
|
166
|
*/
|
167
|
public function sourceForm($source_config) {
|
168
|
$form = array();
|
169
|
|
170
|
if (empty($source_config)) {
|
171
|
$source_config = $this->config;
|
172
|
}
|
173
|
|
174
|
if (isset($source_config['allow_override']) &&
|
175
|
!$source_config['allow_override'] &&
|
176
|
empty($source_config['config'])) {
|
177
|
return;
|
178
|
}
|
179
|
|
180
|
// Add extensions that might get importerd.
|
181
|
$fetcher = feeds_importer($this->id)->fetcher;
|
182
|
if (isset($fetcher->config['allowed_extensions'])) {
|
183
|
if (strpos($fetcher->config['allowed_extensions'], 'json') === FALSE) {
|
184
|
$fetcher->config['allowed_extensions'] .= ' json';
|
185
|
}
|
186
|
}
|
187
|
$mappings_ = feeds_importer($this->id)->processor->config['mappings'];
|
188
|
$uniques = $mappings = array();
|
189
|
|
190
|
foreach ($mappings_ as $mapping) {
|
191
|
if (strpos($mapping['source'], 'jsonpath_parser:') === 0) {
|
192
|
$mappings[$mapping['source']] = $mapping['target'];
|
193
|
if (!empty($mapping['unique'])) {
|
194
|
$uniques[] = $mapping['target'];
|
195
|
}
|
196
|
}
|
197
|
}
|
198
|
$form['jsonpath'] = array(
|
199
|
'#type' => 'fieldset',
|
200
|
'#title' => t('JSONPath Parser Settings'),
|
201
|
'#collapsible' => TRUE,
|
202
|
'#collapsed' => TRUE,
|
203
|
'#tree' => TRUE,
|
204
|
);
|
205
|
if (empty($mappings)) {
|
206
|
// Detect if Feeds menu structure has changed. This will take a while to
|
207
|
// be released, but since I run dev it needs to work.
|
208
|
$feeds_menu = feeds_ui_menu();
|
209
|
if (isset($feeds_menu['admin/structure/feeds/list'])) {
|
210
|
$feeds_base = 'admin/structure/feeds/edit/';
|
211
|
}
|
212
|
else {
|
213
|
$feeds_base = 'admin/structure/feeds/';
|
214
|
}
|
215
|
$form['jsonpath']['error_message']['#markup'] = '<div class="help">' . t('No JSONPath mappings are defined. Define mappings !link.', array('!link' => l(t('here'), $feeds_base . $this->id . '/mapping'))) . '</div><br />';
|
216
|
return $form;
|
217
|
}
|
218
|
$form['jsonpath']['context'] = array(
|
219
|
'#type' => 'textfield',
|
220
|
'#title' => t('Context'),
|
221
|
'#required' => TRUE,
|
222
|
'#description' => t('This is the base query, all other queries will execute in this context.'),
|
223
|
'#default_value' => isset($source_config['context']) ? $source_config['context'] : '',
|
224
|
'#maxlength' => 1024,
|
225
|
'#size' => 80,
|
226
|
);
|
227
|
$form['jsonpath']['sources'] = array(
|
228
|
'#type' => 'fieldset',
|
229
|
);
|
230
|
if (!empty($uniques)) {
|
231
|
$items = array(
|
232
|
format_plural(count($uniques),
|
233
|
t('Field <strong>!column</strong> is mandatory and considered unique: only one item per !column value will be created.',
|
234
|
array('!column' => implode(', ', $uniques))),
|
235
|
t('Fields <strong>!columns</strong> are mandatory and values in these columns are considered unique: only one entry per value in one of these columns will be created.',
|
236
|
array('!columns' => implode(', ', $uniques)))),
|
237
|
);
|
238
|
$form['jsonpath']['sources']['help']['#markup'] = '<div class="help">' . theme('item_list', array('items' => $items)) . '</div>';
|
239
|
}
|
240
|
$variables = array();
|
241
|
foreach ($mappings as $source => $target) {
|
242
|
$form['jsonpath']['sources'][$source] = array(
|
243
|
'#type' => 'textfield',
|
244
|
'#title' => $target,
|
245
|
'#description' => t('The JSONPath expression to execute.'),
|
246
|
'#default_value' => isset($source_config['sources'][$source]) ? $source_config['sources'][$source] : '',
|
247
|
'#maxlength' => 1024,
|
248
|
'#size' => 80,
|
249
|
);
|
250
|
if (!empty($variables)) {
|
251
|
$variable_text = format_plural(count($variables),
|
252
|
t('The variable %v is available for replacement.', array('%v' => implode(', ', $variables))),
|
253
|
t('The variables %v are available for replacement.', array('%v' => implode(', ', $variables)))
|
254
|
);
|
255
|
$form['jsonpath']['sources'][$source]['#description'] .= '<br />' . $variable_text;
|
256
|
}
|
257
|
$variables[] = '{' . $target . '}';
|
258
|
}
|
259
|
$form['jsonpath']['debug'] = array(
|
260
|
'#type' => 'fieldset',
|
261
|
'#title' => t('Debug'),
|
262
|
'#collapsible' => TRUE,
|
263
|
'#collapsed' => TRUE,
|
264
|
);
|
265
|
$form['jsonpath']['debug']['options'] = array(
|
266
|
'#type' => 'checkboxes',
|
267
|
'#title' => t('Debug query'),
|
268
|
'#options' => array_merge(array('context' => 'context'), $mappings),
|
269
|
'#default_value' => isset($source_config['debug']['options']) ? $source_config['debug']['options'] : array(),
|
270
|
);
|
271
|
return $form;
|
272
|
}
|
273
|
|
274
|
/**
|
275
|
* Override parent::configForm().
|
276
|
*/
|
277
|
public function configForm(&$form_state) {
|
278
|
$config = $this->getConfig();
|
279
|
$config['config'] = TRUE;
|
280
|
$form = $this->sourceForm($config);
|
281
|
$form['jsonpath']['context']['#required'] = FALSE;
|
282
|
$form['jsonpath']['#collapsed'] = FALSE;
|
283
|
$form['jsonpath']['allow_override'] = array(
|
284
|
'#type' => 'checkbox',
|
285
|
'#title' => t('Allow source configuration override'),
|
286
|
'#description' => t('This setting allows feed nodes to specify their own JSONPath values for the context and sources.'),
|
287
|
'#default_value' => $config['allow_override'],
|
288
|
);
|
289
|
$form['jsonpath']['convert_four_byte'] = array(
|
290
|
'#type' => 'checkbox',
|
291
|
'#title' => t('Convert four byte characters'),
|
292
|
'#description' => t('Coverts four byte UTF-8 characters to their HTML entity. By default, four byte characters will be stripped.'),
|
293
|
'#default_value' => !empty($config['convert_four_byte']),
|
294
|
);
|
295
|
|
296
|
return $form;
|
297
|
}
|
298
|
|
299
|
/**
|
300
|
* Override parent::getMappingSources().
|
301
|
*/
|
302
|
public function getMappingSources() {
|
303
|
$mappings = $this->filterMappings(feeds_importer($this->id)->processor->config['mappings']);
|
304
|
$next = 0;
|
305
|
if (!empty($mappings)) {
|
306
|
$keys = array_keys($mappings);
|
307
|
|
308
|
$nums = array();
|
309
|
foreach ($keys as $key) {
|
310
|
list(, $num) = explode(':', $key);
|
311
|
$nums[] = $num;
|
312
|
}
|
313
|
|
314
|
$max = max($nums);
|
315
|
$next = ++$max;
|
316
|
}
|
317
|
return array(
|
318
|
'jsonpath_parser:' . $next => array(
|
319
|
'name' => t('JSONPath Expression'),
|
320
|
'description' => t('Allows you to configure a JSONPath expression that will populate this field.'),
|
321
|
),
|
322
|
) + parent::getMappingSources();
|
323
|
}
|
324
|
|
325
|
public function sourceDefaults() {
|
326
|
return array();
|
327
|
}
|
328
|
|
329
|
/**
|
330
|
* Define defaults.
|
331
|
*/
|
332
|
public function configDefaults() {
|
333
|
return array(
|
334
|
'context' => '',
|
335
|
'sources' => array(),
|
336
|
'debug' => array(),
|
337
|
'allow_override' => FALSE,
|
338
|
'convert_four_byte' => FALSE,
|
339
|
);
|
340
|
}
|
341
|
|
342
|
/**
|
343
|
* Override parent::sourceFormValidate().
|
344
|
*
|
345
|
* If the values of this source are the same as the base config we set them to
|
346
|
* blank to that the values will be inherited from the importer defaults.
|
347
|
*
|
348
|
* @param array $values
|
349
|
* The values from the form to validate, passed by reference.
|
350
|
*/
|
351
|
public function sourceFormValidate(&$values) {
|
352
|
$config = $this->getConfig();
|
353
|
$values = $values['jsonpath'];
|
354
|
$allow_override = $config['allow_override'];
|
355
|
unset($config['allow_override']);
|
356
|
unset($config['convert_four_byte']);
|
357
|
ksort($values);
|
358
|
ksort($config);
|
359
|
if ($values === $config || !$allow_override) {
|
360
|
$values = array();
|
361
|
return;
|
362
|
}
|
363
|
$this->configFormValidate($values);
|
364
|
}
|
365
|
|
366
|
/**
|
367
|
* Override parent::sourceFormValidate().
|
368
|
*/
|
369
|
public function configFormValidate(&$values) {
|
370
|
if (isset($values['jsonpath'])) {
|
371
|
$values = $values['jsonpath'];
|
372
|
}
|
373
|
|
374
|
$values['context'] = isset($values['context']) ? trim($values['context']) : '';
|
375
|
if (!empty($values['sources'])) {
|
376
|
foreach ($values['sources'] as &$source) {
|
377
|
$source = trim($source);
|
378
|
}
|
379
|
}
|
380
|
}
|
381
|
|
382
|
/**
|
383
|
* Gets the mappings that belong to this parser.
|
384
|
*
|
385
|
* @return array
|
386
|
* An array of mappings keyed source => target.
|
387
|
*/
|
388
|
protected function getOwnMappings() {
|
389
|
$importer_config = feeds_importer($this->id)->getConfig();
|
390
|
return $this->filterMappings($importer_config['processor']['config']['mappings']);
|
391
|
}
|
392
|
|
393
|
/**
|
394
|
* Filters mappings, returning the ones that belong to us.
|
395
|
*
|
396
|
* @param array $mappings
|
397
|
* A mapping array from a processor.
|
398
|
*
|
399
|
* @return array
|
400
|
* An array of mappings keyed source => target.
|
401
|
*/
|
402
|
protected function filterMappings($mappings) {
|
403
|
$our_mappings = array();
|
404
|
foreach ($mappings as $mapping) {
|
405
|
if (strpos($mapping['source'], 'jsonpath_parser:') === 0) {
|
406
|
$our_mappings[$mapping['source']] = $mapping['target'];
|
407
|
}
|
408
|
}
|
409
|
return $our_mappings;
|
410
|
}
|
411
|
|
412
|
protected function debug($item, $source) {
|
413
|
if (in_array($source, $this->debug)) {
|
414
|
$o = '<ul>';
|
415
|
foreach ($item as $i) {
|
416
|
$o .= '<li>' . check_plain(var_export($i, TRUE)) . '</li>';
|
417
|
}
|
418
|
$o .= '</ul>';
|
419
|
drupal_set_message($source . ':' . $o);
|
420
|
}
|
421
|
}
|
422
|
|
423
|
/**
|
424
|
* Calls our filter hook.
|
425
|
*
|
426
|
* @param array &$item
|
427
|
* The item to alter.
|
428
|
* @param FeedsSource $source
|
429
|
* The feed source.
|
430
|
* @return true|null
|
431
|
* Returns true if the item should be skipped.
|
432
|
*/
|
433
|
protected function invokeHook(array &$item, FeedsSource $source) {
|
434
|
foreach (module_implements('feeds_jsonpath_parser_filter') as $module) {
|
435
|
$function = $module . '_feeds_jsonpath_parser_filter';
|
436
|
if ($function($item, $source) === TRUE) {
|
437
|
return TRUE;
|
438
|
}
|
439
|
}
|
440
|
}
|
441
|
|
442
|
/**
|
443
|
* Strips four byte characters from a string.
|
444
|
*
|
445
|
* @param string $string
|
446
|
* The input string.
|
447
|
*
|
448
|
* @return string
|
449
|
* The string with four byte characters removed.
|
450
|
*/
|
451
|
public static function stripFourBytes($string) {
|
452
|
return preg_replace(self::$fourByteRegex, '', $string);
|
453
|
}
|
454
|
|
455
|
/**
|
456
|
* Replaces four byte characters with their HTML unicode codepoint.
|
457
|
*
|
458
|
* @param string $string
|
459
|
* The input string.
|
460
|
*
|
461
|
* @return string
|
462
|
* The string with four byte characters converted.
|
463
|
*/
|
464
|
public static function convertFourBytes($string) {
|
465
|
return preg_replace_callback(self::$fourByteRegex, array('FeedsJSONPathParser', 'doFourByteReplace'), $string);
|
466
|
}
|
467
|
|
468
|
/**
|
469
|
* Callback for FeedsJSONPathParser::convertFourBytes().
|
470
|
*
|
471
|
* @param array $matches
|
472
|
* The regular expression matches.
|
473
|
*
|
474
|
* @return string
|
475
|
* A four byte unicode character converted to its HTML representation.
|
476
|
*/
|
477
|
public static function doFourByteReplace(array $matches) {
|
478
|
$char = $matches[0];
|
479
|
|
480
|
// Calculate the codepoint of the character.
|
481
|
$codepoint = ord($char[0]) - 0xF0 << 18;
|
482
|
$codepoint += ord($char[1]) - 0x80 << 12;
|
483
|
$codepoint += ord($char[2]) - 0x80 << 6;
|
484
|
$codepoint += ord($char[3]) - 0x80;
|
485
|
|
486
|
return '&#' . $codepoint . ';';
|
487
|
}
|
488
|
|
489
|
}
|