Projet

Général

Profil

Paste
Télécharger (14,7 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / modules / feeds_jsonpath_parser / FeedsJSONPathParser.inc @ 7707c013

1
<?php
2

    
3
/**
4
 * @file
5
 * Contains FeedsJSONPathParser.
6
 */
7

    
8
/**
9
 * Parses JSON using JSONPath.
10
 */
11
class FeedsJSONPathParser extends FeedsParser {
12

    
13
  /**
14
   * A regular expression that finds four byte UTF-8 chars.
15
   *
16
   * @var string
17
   */
18
  protected static $fourByteRegex = '/(?:\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})/s';
19

    
20
  /**
21
   * The source fields to debug.
22
   *
23
   * @var array
24
   */
25
  protected $debug = array();
26

    
27
  /**
28
   * Implements FeedsParser::parse().
29
   */
30
  public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
31
    $mappings = $this->getOwnMappings();
32
    $source_config = $source->getConfigFor($this);
33
    // Allow config inheritance.
34
    if (empty($source_config)) {
35
      $source_config = $this->config;
36
    }
37
    $this->debug = array_keys(array_filter($source_config['debug']['options']));
38

    
39
    $raw = trim($fetcher_result->getRaw());
40
    $result = new FeedsParserResult();
41
    // Set link so we can set the result link attribute.
42
    $fetcher_config = $source->getConfigFor($source->importer->fetcher);
43
    $result->link = $fetcher_config['source'];
44

    
45
    $array = json_decode($raw, TRUE);
46

    
47
    // Support JSON lines format.
48
    if (!is_array($array)) {
49
      $raw = preg_replace('/}\s*{/', '},{', $raw);
50
      $raw = '[' . $raw . ']';
51
      $array = json_decode($raw, TRUE);
52
    }
53

    
54
    if (!is_array($array)) {
55
      throw new Exception(t('There was an error decoding the JSON document.'));
56
    }
57
    require_once feeds_jsonpath_parser_library_path();
58

    
59
    $all_items = $this->jsonPath($array, $source_config['context']);
60
    unset($array);
61

    
62
    // Batch.
63
    $state = $source->state(FEEDS_PARSE);
64
    if (!$state->total) {
65
      $state->total = count($all_items);
66
    }
67

    
68
    $start = (int) $state->pointer;
69
    $state->pointer = $start + $source->importer->getLimit();
70
    $all_items = array_slice($all_items, $start, $source->importer->getLimit());
71

    
72
    // Set progress state.
73
    $state->progress($state->total, $state->pointer);
74

    
75
    // Debug output.
76
    $this->debug($all_items, 'context');
77

    
78
    foreach ($all_items as $item) {
79
      // Invoke a hook to check whether the item should be skipped.
80
      if ($this->invokeHook($item, $source) === TRUE) {
81
        continue;
82
      }
83

    
84
      $parsed_item = $variables = array();
85
      foreach ($source_config['sources'] as $source_key => $query) {
86
        // Variable substitution.
87
        $query = strtr($query, $variables);
88
        $parsed = $this->parseSourceElement($item, $query, $source_key);
89

    
90
        $variables['{' . $mappings[$source_key] . '}'] = is_array($parsed) ? reset($parsed) : $parsed;
91

    
92
        // Avoid null values.
93
        if (isset($parsed)) {
94
          $parsed_item[$source_key] = $parsed;
95
        }
96
      }
97
      if (!empty($parsed_item)) {
98
        $result->items[] = $parsed_item;
99
      }
100
    }
101
    return $result;
102
  }
103

    
104
  /**
105
   * Utilizes the jsonPath function from jsonpath-0.8.1.php.
106
   *
107
   * jsonPath returns false if the expression returns zero results and that will
108
   * mess up our for loops, so return an empty array instead.
109
   *
110
   * @param array $array
111
   *   The input array to parse.
112
   * @param string $expression
113
   *   The JSONPath expression.
114
   *
115
   * @return array
116
   *   Returns an array that is the output of jsonPath.
117
   *
118
   * @todo
119
   *   Firgure out error handling.
120
   */
121
  protected function jsonPath($array, $expression) {
122
    $result = jsonPath($array, $expression);
123
    return ($result === FALSE) ? array() : $result;
124
  }
125

    
126
  /**
127
   * Parses one item from the context array.
128
   *
129
   * @param array $item
130
   *   An array containing one item from the context.
131
   * @param string $query
132
   *   A JSONPath query.
133
   * @param string $source
134
   *   The source element that corresponds to the query.
135
   *
136
   * @return array
137
   *   An array containing the results of the query.
138
   */
139
  protected function parseSourceElement($item, $query, $source) {
140
    if (empty($query)) {
141
      return;
142
    }
143
    $results = $this->jsonPath($item, $query);
144
    $this->debug($results, $source);
145

    
146
    $count = count($results);
147
    if ($count === 0) {
148
      return;
149
    }
150

    
151
    foreach ($results as $delta => $value) {
152
      if (is_string($value) && $value !== '') {
153
        $results[$delta] = !empty($this->config['convert_four_byte']) ? $this->convertFourBytes($value) : $this->stripFourBytes($value);
154
      }
155
    }
156

    
157
    if ($count === 1) {
158
      return reset($results);
159
    }
160

    
161
    return $results;
162
  }
163

    
164
  /**
165
   * Source form.
166
   */
167
  public function sourceForm($source_config) {
168
    $form = array();
169

    
170
    if (empty($source_config)) {
171
      $source_config = $this->config;
172
    }
173

    
174
    if (isset($source_config['allow_override']) &&
175
        !$source_config['allow_override'] &&
176
        empty($source_config['config'])) {
177
      return;
178
    }
179

    
180
    // Add extensions that might get importerd.
181
    $fetcher = feeds_importer($this->id)->fetcher;
182
    if (isset($fetcher->config['allowed_extensions'])) {
183
      if (strpos($fetcher->config['allowed_extensions'], 'json') === FALSE) {
184
        $fetcher->config['allowed_extensions'] .= ' json';
185
      }
186
    }
187
    $mappings_ = feeds_importer($this->id)->processor->config['mappings'];
188
    $uniques = $mappings = array();
189

    
190
    foreach ($mappings_ as $mapping) {
191
      if (strpos($mapping['source'], 'jsonpath_parser:') === 0) {
192
        $mappings[$mapping['source']] = $mapping['target'];
193
        if (!empty($mapping['unique'])) {
194
          $uniques[] = $mapping['target'];
195
        }
196
      }
197
    }
198
    $form['jsonpath'] = array(
199
      '#type' => 'fieldset',
200
      '#title' => t('JSONPath Parser Settings'),
201
      '#collapsible' => TRUE,
202
      '#collapsed' => TRUE,
203
      '#tree' => TRUE,
204
    );
205
    if (empty($mappings)) {
206
      // Detect if Feeds menu structure has changed. This will take a while to
207
      // be released, but since I run dev it needs to work.
208
      $feeds_menu = feeds_ui_menu();
209
      if (isset($feeds_menu['admin/structure/feeds/list'])) {
210
        $feeds_base = 'admin/structure/feeds/edit/';
211
      }
212
      else {
213
        $feeds_base = 'admin/structure/feeds/';
214
      }
215
      $form['jsonpath']['error_message']['#markup'] = '<div class="help">' . t('No JSONPath mappings are defined. Define mappings !link.', array('!link' => l(t('here'), $feeds_base . $this->id . '/mapping'))) . '</div><br />';
216
      return $form;
217
    }
218
    $form['jsonpath']['context'] = array(
219
      '#type' => 'textfield',
220
      '#title' => t('Context'),
221
      '#required' => TRUE,
222
      '#description' => t('This is the base query, all other queries will execute in this context.'),
223
      '#default_value' => isset($source_config['context']) ? $source_config['context'] : '',
224
      '#maxlength' => 1024,
225
      '#size' => 80,
226
    );
227
    $form['jsonpath']['sources'] = array(
228
      '#type' => 'fieldset',
229
    );
230
    if (!empty($uniques)) {
231
      $items = array(
232
        format_plural(count($uniques),
233
          t('Field <strong>!column</strong> is mandatory and considered unique: only one item per !column value will be created.',
234
            array('!column' => implode(', ', $uniques))),
235
          t('Fields <strong>!columns</strong> are mandatory and values in these columns are considered unique: only one entry per value in one of these columns will be created.',
236
            array('!columns' => implode(', ', $uniques)))),
237
      );
238
      $form['jsonpath']['sources']['help']['#markup'] = '<div class="help">' . theme('item_list', array('items' => $items)) . '</div>';
239
    }
240
    $variables = array();
241
    foreach ($mappings as $source => $target) {
242
      $form['jsonpath']['sources'][$source] = array(
243
        '#type' => 'textfield',
244
        '#title' => $target,
245
        '#description' => t('The JSONPath expression to execute.'),
246
        '#default_value' => isset($source_config['sources'][$source]) ? $source_config['sources'][$source] : '',
247
        '#maxlength' => 1024,
248
        '#size' => 80,
249
      );
250
      if (!empty($variables)) {
251
        $variable_text = format_plural(count($variables),
252
          t('The variable %v is available for replacement.', array('%v' => implode(', ', $variables))),
253
          t('The variables %v are available for replacement.', array('%v' => implode(', ', $variables)))
254
        );
255
        $form['jsonpath']['sources'][$source]['#description'] .= '<br />' . $variable_text;
256
      }
257
      $variables[] = '{' . $target . '}';
258
    }
259
    $form['jsonpath']['debug'] = array(
260
      '#type' => 'fieldset',
261
      '#title' => t('Debug'),
262
      '#collapsible' => TRUE,
263
      '#collapsed' => TRUE,
264
    );
265
    $form['jsonpath']['debug']['options'] = array(
266
      '#type' => 'checkboxes',
267
      '#title' => t('Debug query'),
268
      '#options' => array_merge(array('context' => 'context'), $mappings),
269
      '#default_value' => isset($source_config['debug']['options']) ? $source_config['debug']['options'] : array(),
270
    );
271
    return $form;
272
  }
273

    
274
  /**
275
   * Override parent::configForm().
276
   */
277
  public function configForm(&$form_state) {
278
    $config = $this->getConfig();
279
    $config['config'] = TRUE;
280
    $form = $this->sourceForm($config);
281
    $form['jsonpath']['context']['#required'] = FALSE;
282
    $form['jsonpath']['#collapsed'] = FALSE;
283
    $form['jsonpath']['allow_override'] = array(
284
      '#type' => 'checkbox',
285
      '#title' => t('Allow source configuration override'),
286
      '#description' => t('This setting allows feed nodes to specify their own JSONPath values for the context and sources.'),
287
      '#default_value' => $config['allow_override'],
288
    );
289
    $form['jsonpath']['convert_four_byte'] = array(
290
      '#type' => 'checkbox',
291
      '#title' => t('Convert four byte characters'),
292
      '#description' => t('Coverts four byte UTF-8 characters to their HTML entity. By default, four byte characters will be stripped.'),
293
      '#default_value' => !empty($config['convert_four_byte']),
294
    );
295

    
296
    return $form;
297
  }
298

    
299
  /**
300
   * Override parent::getMappingSources().
301
   */
302
  public function getMappingSources() {
303
    $mappings = $this->filterMappings(feeds_importer($this->id)->processor->config['mappings']);
304
    $next = 0;
305
    if (!empty($mappings)) {
306
      $keys = array_keys($mappings);
307

    
308
      $nums = array();
309
      foreach ($keys as $key) {
310
        list(, $num) = explode(':', $key);
311
        $nums[] = $num;
312
      }
313

    
314
      $max = max($nums);
315
      $next = ++$max;
316
    }
317
    return array(
318
      'jsonpath_parser:' . $next => array(
319
        'name' => t('JSONPath Expression'),
320
        'description' => t('Allows you to configure a JSONPath expression that will populate this field.'),
321
      ),
322
    ) + parent::getMappingSources();
323
  }
324

    
325
  public function sourceDefaults() {
326
    return array();
327
  }
328

    
329
  /**
330
   * Define defaults.
331
   */
332
  public function configDefaults() {
333
    return array(
334
      'context' => '',
335
      'sources' => array(),
336
      'debug' => array(),
337
      'allow_override' => FALSE,
338
      'convert_four_byte' => FALSE,
339
    );
340
  }
341

    
342
  /**
343
   * Override parent::sourceFormValidate().
344
   *
345
   * If the values of this source are the same as the base config we set them to
346
   * blank to that the values will be inherited from the importer defaults.
347
   *
348
   * @param array $values
349
   *   The values from the form to validate, passed by reference.
350
   */
351
  public function sourceFormValidate(&$values) {
352
    $config = $this->getConfig();
353
    $values = $values['jsonpath'];
354
    $allow_override = $config['allow_override'];
355
    unset($config['allow_override']);
356
    unset($config['convert_four_byte']);
357
    ksort($values);
358
    ksort($config);
359
    if ($values === $config || !$allow_override) {
360
      $values = array();
361
      return;
362
    }
363
    $this->configFormValidate($values);
364
  }
365

    
366
  /**
367
   * Override parent::sourceFormValidate().
368
   */
369
  public function configFormValidate(&$values) {
370
    if (isset($values['jsonpath'])) {
371
      $values = $values['jsonpath'];
372
    }
373

    
374
    $values['context'] = isset($values['context']) ? trim($values['context']) : '';
375
    if (!empty($values['sources'])) {
376
      foreach ($values['sources'] as &$source) {
377
        $source = trim($source);
378
      }
379
    }
380
  }
381

    
382
  /**
383
   * Gets the mappings that belong to this parser.
384
   *
385
   * @return array
386
   *   An array of mappings keyed source => target.
387
   */
388
  protected function getOwnMappings() {
389
    $importer_config = feeds_importer($this->id)->getConfig();
390
    return $this->filterMappings($importer_config['processor']['config']['mappings']);
391
  }
392

    
393
  /**
394
   * Filters mappings, returning the ones that belong to us.
395
   *
396
   * @param array $mappings
397
   *   A mapping array from a processor.
398
   *
399
   * @return array
400
   *   An array of mappings keyed source => target.
401
   */
402
  protected function filterMappings($mappings) {
403
    $our_mappings = array();
404
    foreach ($mappings as $mapping) {
405
      if (strpos($mapping['source'], 'jsonpath_parser:') === 0) {
406
        $our_mappings[$mapping['source']] = $mapping['target'];
407
      }
408
    }
409
    return $our_mappings;
410
  }
411

    
412
  protected function debug($item, $source) {
413
    if (in_array($source, $this->debug)) {
414
      $o = '<ul>';
415
      foreach ($item as $i) {
416
        $o .= '<li>' . check_plain(var_export($i, TRUE)) . '</li>';
417
      }
418
      $o .= '</ul>';
419
      drupal_set_message($source . ':' . $o);
420
    }
421
  }
422

    
423
  /**
424
   * Calls our filter hook.
425
   *
426
   * @param array &$item
427
   *   The item to alter.
428
   * @param FeedsSource $source
429
   *   The feed source.
430
   * @return true|null
431
   *   Returns true if the item should be skipped.
432
   */
433
  protected function invokeHook(array &$item, FeedsSource $source) {
434
    foreach (module_implements('feeds_jsonpath_parser_filter') as $module) {
435
      $function = $module . '_feeds_jsonpath_parser_filter';
436
      if ($function($item, $source) === TRUE) {
437
        return TRUE;
438
      }
439
    }
440
  }
441

    
442
  /**
443
   * Strips four byte characters from a string.
444
   *
445
   * @param string $string
446
   *   The input string.
447
   *
448
   * @return string
449
   *   The string with four byte characters removed.
450
   */
451
  public static function stripFourBytes($string) {
452
    return preg_replace(self::$fourByteRegex, '', $string);
453
  }
454

    
455
  /**
456
   * Replaces four byte characters with their HTML unicode codepoint.
457
   *
458
   * @param string $string
459
   *   The input string.
460
   *
461
   * @return string
462
   *   The string with four byte characters converted.
463
   */
464
  public static function convertFourBytes($string) {
465
    return preg_replace_callback(self::$fourByteRegex, array('FeedsJSONPathParser', 'doFourByteReplace'), $string);
466
  }
467

    
468
  /**
469
   * Callback for FeedsJSONPathParser::convertFourBytes().
470
   *
471
   * @param array $matches
472
   *   The regular expression matches.
473
   *
474
   * @return string
475
   *   A four byte unicode character converted to its HTML representation.
476
   */
477
  public static function doFourByteReplace(array $matches) {
478
    $char = $matches[0];
479

    
480
    // Calculate the codepoint of the character.
481
    $codepoint = ord($char[0]) - 0xF0 << 18;
482
    $codepoint += ord($char[1]) - 0x80 << 12;
483
    $codepoint += ord($char[2]) - 0x80 << 6;
484
    $codepoint += ord($char[3]) - 0x80;
485

    
486
    return '&#' . $codepoint . ';';
487
  }
488

    
489
}