Project

General

Profile

Paste
Download (17.1 KB) Statistics
| Branch: | Revision:

root / drupal7 / modules / search / search.extender.inc @ 582db59d

1
<?php
2

    
3
/**
4
 * @file
5
 * Search query extender and helper functions.
6
 */
7

    
8
/**
9
 * Do a query on the full-text search index for a word or words.
10
 *
11
 * This function is normally only called by each module that supports the
12
 * indexed search (and thus, implements hook_update_index()).
13
 *
14
 * Results are retrieved in two logical passes. However, the two passes are
15
 * joined together into a single query. And in the case of most simple
16
 * queries the second pass is not even used.
17
 *
18
 * The first pass selects a set of all possible matches, which has the benefit
19
 * of also providing the exact result set for simple "AND" or "OR" searches.
20
 *
21
 * The second portion of the query further refines this set by verifying
22
 * advanced text conditions (such as negative or phrase matches).
23
 *
24
 * The used query object has the tag 'search_$module' and can be further
25
 * extended with hook_query_alter().
26
 */
27
class SearchQuery extends SelectQueryExtender {
28
  /**
29
   * The search query that is used for searching.
30
   *
31
   * @var string
32
   */
33
  protected $searchExpression;
34

    
35
  /**
36
   * Type of search (search module).
37
   *
38
   * This maps to the value of the type column in search_index, and is equal
39
   * to the machine-readable name of the module that implements
40
   * hook_search_info().
41
   *
42
   * @var string
43
   */
44
  protected $type;
45

    
46
  /**
47
   * Positive and negative search keys.
48
   *
49
   * @var array
50
   */
51
  protected $keys = array('positive' => array(), 'negative' => array());
52

    
53
  /**
54
   * Indicates whether the first pass query requires complex conditions (LIKE).
55
   *
56
   * @var boolean.
57
   */
58
  protected $simple = TRUE;
59

    
60
  /**
61
   * Conditions that are used for exact searches.
62
   *
63
   * This is always used for the second pass query but not for the first pass,
64
   * unless $this->simple is FALSE.
65
   *
66
   * @var DatabaseCondition
67
   */
68
  protected $conditions;
69

    
70
  /**
71
   * Indicates how many matches for a search query are necessary.
72
   *
73
   * @var int
74
   */
75
  protected $matches = 0;
76

    
77
  /**
78
   * Array of search words.
79
   *
80
   * These words have to match against {search_index}.word.
81
   *
82
   * @var array
83
   */
84
  protected $words = array();
85

    
86
  /**
87
   * Multiplier for the normalized search score.
88
   *
89
   * This value is calculated by the first pass query and multiplied with the
90
   * actual score of a specific word to make sure that the resulting calculated
91
   * score is between 0 and 1.
92
   *
93
   * @var float
94
   */
95
  protected $normalize;
96

    
97
  /**
98
   * Indicates whether the first pass query has been executed.
99
   *
100
   * @var boolean
101
   */
102
  protected $executedFirstPass = FALSE;
103

    
104
  /**
105
   * Stores score expressions.
106
   *
107
   * @var array
108
   *
109
   * @see addScore()
110
   */
111
  protected $scores = array();
112

    
113
  /**
114
   * Stores arguments for score expressions.
115
   *
116
   * @var array
117
   */
118
  protected $scoresArguments = array();
119

    
120
  /**
121
   * Stores multipliers for score expressions.
122
   *
123
   * @var array
124
   */
125
  protected $multiply = array();
126

    
127
  /**
128
   * Whether or not search expressions were ignored.
129
   *
130
   * The maximum number of AND/OR combinations exceeded can be configured to
131
   * avoid Denial-of-Service attacks. Expressions beyond the limit are ignored.
132
   *
133
   * @var boolean
134
   */
135
  protected $expressionsIgnored = FALSE;
136

    
137
  /**
138
   * Sets up the search query expression.
139
   *
140
   * @param $query
141
   *   A search query string, which can contain options.
142
   * @param $module
143
   *   The search module. This maps to {search_index}.type in the database.
144
   *
145
   * @return
146
   *   The SearchQuery object.
147
   */
148
  public function searchExpression($expression, $module) {
149
    $this->searchExpression = $expression;
150
    $this->type = $module;
151

    
152
    // Add a search_* tag. This needs to be added before any preExecute methods
153
    // for decorated queries are called, as $this->prepared will be set to TRUE
154
    // and tags added in the execute method will never get used. For example,
155
    // if $query is extended by 'SearchQuery' then 'PagerDefault', the
156
    // search-specific tag will be added too late (when preExecute() has
157
    // already been called from the PagerDefault extender), and as a
158
    // consequence will not be available to hook_query_alter() implementations,
159
    // nor will the correct hook_query_TAG_alter() implementations get invoked.
160
    // See node_search_execute().
161
    $this->addTag('search_' . $module);
162

    
163
    return $this;
164
  }
165

    
166
  /**
167
   * Applies a search option and removes it from the search query string.
168
   *
169
   * These options are in the form option:value,value2,value3.
170
   *
171
   * @param $option
172
   *   Name of the option.
173
   * @param $column
174
   *   Name of the database column to which the value should be applied.
175
   *
176
   * @return
177
   *   TRUE if a value for that option was found, FALSE if not.
178
   */
179
  public function setOption($option, $column) {
180
    if ($values = search_expression_extract($this->searchExpression, $option)) {
181
      $or = db_or();
182
      foreach (explode(',', $values) as $value) {
183
        $or->condition($column, $value);
184
      }
185
      $this->condition($or);
186
      $this->searchExpression = search_expression_insert($this->searchExpression, $option);
187
      return TRUE;
188
    }
189
    return FALSE;
190
  }
191

    
192
  /**
193
   * Parses the search query into SQL conditions.
194
   *
195
   * We build two queries that match the dataset bodies.
196
   */
197
  protected function parseSearchExpression() {
198
    // Matchs words optionally prefixed by a dash. A word in this case is
199
    // something between two spaces, optionally quoted.
200
    preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' .  $this->searchExpression , $keywords, PREG_SET_ORDER);
201

    
202
    if (count($keywords) ==  0) {
203
      return;
204
    }
205

    
206
    // Classify tokens.
207
    $or = FALSE;
208
    $warning = '';
209
    $limit_combinations = variable_get('search_and_or_limit', 7);
210
    // The first search expression does not count as AND.
211
    $and_count = -1;
212
    $or_count = 0;
213
    foreach ($keywords as $match) {
214
      if ($or_count && $and_count + $or_count >= $limit_combinations) {
215
        // Ignore all further search expressions to prevent Denial-of-Service
216
        // attacks using a high number of AND/OR combinations.
217
        $this->expressionsIgnored = TRUE;
218
        break;
219
      }
220
      $phrase = FALSE;
221
      // Strip off phrase quotes.
222
      if ($match[2]{0} == '"') {
223
        $match[2] = substr($match[2], 1, -1);
224
        $phrase = TRUE;
225
        $this->simple = FALSE;
226
      }
227
      // Simplify keyword according to indexing rules and external
228
      // preprocessors. Use same process as during search indexing, so it
229
      // will match search index.
230
      $words = search_simplify($match[2]);
231
      // Re-explode in case simplification added more words, except when
232
      // matching a phrase.
233
      $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
234
      // Negative matches.
235
      if ($match[1] == '-') {
236
        $this->keys['negative'] = array_merge($this->keys['negative'], $words);
237
      }
238
      // OR operator: instead of a single keyword, we store an array of all
239
      // OR'd keywords.
240
      elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
241
        $last = array_pop($this->keys['positive']);
242
        // Starting a new OR?
243
        if (!is_array($last)) {
244
          $last = array($last);
245
        }
246
        $this->keys['positive'][] = $last;
247
        $or = TRUE;
248
        $or_count++;
249
        continue;
250
      }
251
      // AND operator: implied, so just ignore it.
252
      elseif ($match[2] == 'AND' || $match[2] == 'and') {
253
        $warning = $match[2];
254
        continue;
255
      }
256

    
257
      // Plain keyword.
258
      else {
259
        if ($match[2] == 'or') {
260
          $warning = $match[2];
261
        }
262
        if ($or) {
263
          // Add to last element (which is an array).
264
          $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
265
        }
266
        else {
267
          $this->keys['positive'] = array_merge($this->keys['positive'], $words);
268
          $and_count++;
269
        }
270
      }
271
      $or = FALSE;
272
    }
273

    
274
    // Convert keywords into SQL statements.
275
    $this->conditions = db_and();
276
    $simple_and = FALSE;
277
    $simple_or = FALSE;
278
    // Positive matches.
279
    foreach ($this->keys['positive'] as $key) {
280
      // Group of ORed terms.
281
      if (is_array($key) && count($key)) {
282
        $simple_or = TRUE;
283
        $any = FALSE;
284
        $queryor = db_or();
285
        foreach ($key as $or) {
286
          list($num_new_scores) = $this->parseWord($or);
287
          $any |= $num_new_scores;
288
          $queryor->condition('d.data', "% $or %", 'LIKE');
289
        }
290
        if (count($queryor)) {
291
          $this->conditions->condition($queryor);
292
          // A group of OR keywords only needs to match once.
293
          $this->matches += ($any > 0);
294
        }
295
      }
296
      // Single ANDed term.
297
      else {
298
        $simple_and = TRUE;
299
        list($num_new_scores, $num_valid_words) = $this->parseWord($key);
300
        $this->conditions->condition('d.data', "% $key %", 'LIKE');
301
        if (!$num_valid_words) {
302
          $this->simple = FALSE;
303
        }
304
        // Each AND keyword needs to match at least once.
305
        $this->matches += $num_new_scores;
306
      }
307
    }
308
    if ($simple_and && $simple_or) {
309
      $this->simple = FALSE;
310
    }
311
    // Negative matches.
312
    foreach ($this->keys['negative'] as $key) {
313
      $this->conditions->condition('d.data', "% $key %", 'NOT LIKE');
314
      $this->simple = FALSE;
315
    }
316

    
317
    if ($warning == 'or') {
318
      drupal_set_message(t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'));
319
    }
320
  }
321

    
322
  /**
323
   * Helper function for parseQuery().
324
   */
325
  protected function parseWord($word) {
326
    $num_new_scores = 0;
327
    $num_valid_words = 0;
328
    // Determine the scorewords of this word/phrase.
329
    $split = explode(' ', $word);
330
    foreach ($split as $s) {
331
      $num = is_numeric($s);
332
      if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
333
        if (!isset($this->words[$s])) {
334
          $this->words[$s] = $s;
335
          $num_new_scores++;
336
        }
337
        $num_valid_words++;
338
      }
339
    }
340
    // Return matching snippet and number of added words.
341
    return array($num_new_scores, $num_valid_words);
342
  }
343

    
344
  /**
345
   * Executes the first pass query.
346
   *
347
   * This can either be done explicitly, so that additional scores and
348
   * conditions can be applied to the second pass query, or implicitly by
349
   * addScore() or execute().
350
   *
351
   * @return
352
   *   TRUE if search items exist, FALSE if not.
353
   */
354
  public function executeFirstPass() {
355
    $this->parseSearchExpression();
356

    
357
    if (count($this->words) == 0) {
358
      form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.'));
359
      return FALSE;
360
    }
361
    if ($this->expressionsIgnored) {
362
      drupal_set_message(t('Your search used too many AND/OR expressions. Only the first @count terms were included in this search.', array('@count' => variable_get('search_and_or_limit', 7))), 'warning');
363
    }
364
    $this->executedFirstPass = TRUE;
365

    
366
    if (!empty($this->words)) {
367
      $or = db_or();
368
      foreach ($this->words as $word) {
369
        $or->condition('i.word', $word);
370
      }
371
      $this->condition($or);
372
    }
373
    // Build query for keyword normalization.
374
    $this->join('search_total', 't', 'i.word = t.word');
375
    $this
376
      ->condition('i.type', $this->type)
377
      ->groupBy('i.type')
378
      ->groupBy('i.sid')
379
      ->having('COUNT(*) >= :matches', array(':matches' => $this->matches));
380

    
381
    // Clone the query object to do the firstPass query;
382
    $first = clone $this->query;
383

    
384
    // For complex search queries, add the LIKE conditions to the first pass query.
385
    if (!$this->simple) {
386
      $first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
387
      $first->condition($this->conditions);
388
    }
389

    
390
    // Calculate maximum keyword relevance, to normalize it.
391
    $first->addExpression('SUM(i.score * t.count)', 'calculated_score');
392
    $this->normalize = $first
393
      ->range(0, 1)
394
      ->orderBy('calculated_score', 'DESC')
395
      ->execute()
396
      ->fetchField();
397

    
398
    if ($this->normalize) {
399
      return TRUE;
400
    }
401
    return FALSE;
402
  }
403

    
404
  /**
405
   * Adds a custom score expression to the search query.
406
   *
407
   * Score expressions are used to order search results. If no calls to
408
   * addScore() have taken place, a default keyword relevance score will be
409
   * used. However, if at least one call to addScore() has taken place, the
410
   * keyword relevance score is not automatically added.
411
   *
412
   * Note that you must use this method to add ordering to your searches, and
413
   * not call orderBy() directly, when using the SearchQuery extender. This is
414
   * because of the two-pass system the SearchQuery class uses to normalize
415
   * scores.
416
   *
417
   * @param $score
418
   *   The score expression, which should evaluate to a number between 0 and 1.
419
   *   The string 'i.relevance' in a score expression will be replaced by a
420
   *   measure of keyword relevance between 0 and 1.
421
   * @param $arguments
422
   *   Query arguments needed to provide values to the score expression.
423
   * @param $multiply
424
   *   If set, the score is multiplied with this value. However, all scores
425
   *   with multipliers are then divided by the total of all multipliers, so
426
   *   that overall, the normalization is maintained.
427
   *
428
   * @return object
429
   *   The updated query object.
430
   */
431
  public function addScore($score, $arguments = array(), $multiply = FALSE) {
432
    if ($multiply) {
433
      $i = count($this->multiply);
434
      // Modify the score expression so it is multiplied by the multiplier,
435
      // with a divisor to renormalize.
436
      $score = "CAST(:multiply_$i AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_$i AS DECIMAL)";
437
      // Add an argument for the multiplier. The :total_$i argument is taken
438
      // care of in the execute() method, which is when the total divisor is
439
      // calculated.
440
      $arguments[':multiply_' . $i] = $multiply;
441
      $this->multiply[] = $multiply;
442
    }
443

    
444
    $this->scores[] = $score;
445
    $this->scoresArguments += $arguments;
446

    
447
    return $this;
448
  }
449

    
450
  /**
451
   * Executes the search.
452
   *
453
   * If not already done, this executes the first pass query. Then the complex
454
   * conditions are applied to the query including score expressions and
455
   * ordering.
456
   *
457
   * @return
458
   *   FALSE if the first pass query returned no results, and a database result
459
   *   set if there were results.
460
   */
461
  public function execute()
462
  {
463
    if (!$this->executedFirstPass) {
464
      $this->executeFirstPass();
465
    }
466
    if (!$this->normalize) {
467
      return new DatabaseStatementEmpty();
468
    }
469

    
470
    // Add conditions to query.
471
    $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
472
    $this->condition($this->conditions);
473

    
474
    if (empty($this->scores)) {
475
      // Add default score.
476
      $this->addScore('i.relevance');
477
    }
478

    
479
    if (count($this->multiply)) {
480
      // Re-normalize scores with multipliers by dividing by the total of all
481
      // multipliers. The expressions were altered in addScore(), so here just
482
      // add the arguments for the total.
483
      $i = 0;
484
      $sum = array_sum($this->multiply);
485
      foreach ($this->multiply as $total) {
486
        $this->scoresArguments[':total_' . $i] = $sum;
487
        $i++;
488
      }
489
    }
490

    
491
    // Replace the pseudo-expression 'i.relevance' with a measure of keyword
492
    // relevance in all score expressions, using string replacement. Careful
493
    // though! If you just print out a float, some locales use ',' as the
494
    // decimal separator in PHP, while SQL always uses '.'. So, make sure to
495
    // set the number format correctly.
496
    $relevance = number_format((1.0 / $this->normalize), 10, '.', '');
497
    $this->scores = str_replace('i.relevance', '(' . $relevance . ' * i.score * t.count)', $this->scores);
498

    
499
    // Add all scores together to form a query field.
500
    $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
501

    
502
    // If an order has not yet been set for this query, add a default order
503
    // that sorts by the calculated sum of scores.
504
    if (count($this->getOrderBy()) == 0) {
505
      $this->orderBy('calculated_score', 'DESC');
506
    }
507

    
508
    // Add useful metadata.
509
    $this
510
      ->addMetaData('normalize', $this->normalize)
511
      ->fields('i', array('type', 'sid'));
512

    
513
    return $this->query->execute();
514
  }
515

    
516
  /**
517
   * Builds the default count query for SearchQuery.
518
   *
519
   * Since SearchQuery always uses GROUP BY, we can default to a subquery. We
520
   * also add the same conditions as execute() because countQuery() is called
521
   * first.
522
   */
523
  public function countQuery() {
524
    // Clone the inner query.
525
    $inner = clone $this->query;
526

    
527
    // Add conditions to query.
528
    $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
529
    $inner->condition($this->conditions);
530

    
531
    // Remove existing fields and expressions, they are not needed for a count
532
    // query.
533
    $fields =& $inner->getFields();
534
    $fields = array();
535
    $expressions =& $inner->getExpressions();
536
    $expressions = array();
537

    
538
    // Add the sid as the only field and count them as a subquery.
539
    $count = db_select($inner->fields('i', array('sid')), NULL, array('target' => 'slave'));
540

    
541
    // Add the COUNT() expression.
542
    $count->addExpression('COUNT(*)');
543

    
544
    return $count;
545
  }
546
}