1
|
<?php
|
2
|
|
3
|
/**
|
4
|
* @file
|
5
|
* Search query extender and helper functions.
|
6
|
*/
|
7
|
|
8
|
/**
|
9
|
* Do a query on the full-text search index for a word or words.
|
10
|
*
|
11
|
* This function is normally only called by each module that supports the
|
12
|
* indexed search (and thus, implements hook_update_index()).
|
13
|
*
|
14
|
* Results are retrieved in two logical passes. However, the two passes are
|
15
|
* joined together into a single query. And in the case of most simple
|
16
|
* queries the second pass is not even used.
|
17
|
*
|
18
|
* The first pass selects a set of all possible matches, which has the benefit
|
19
|
* of also providing the exact result set for simple "AND" or "OR" searches.
|
20
|
*
|
21
|
* The second portion of the query further refines this set by verifying
|
22
|
* advanced text conditions (such as negative or phrase matches).
|
23
|
*
|
24
|
* The used query object has the tag 'search_$module' and can be further
|
25
|
* extended with hook_query_alter().
|
26
|
*/
|
27
|
class SearchQuery extends SelectQueryExtender {
|
28
|
/**
|
29
|
* The search query that is used for searching.
|
30
|
*
|
31
|
* @var string
|
32
|
*/
|
33
|
protected $searchExpression;
|
34
|
|
35
|
/**
|
36
|
* Type of search (search module).
|
37
|
*
|
38
|
* This maps to the value of the type column in search_index, and is equal
|
39
|
* to the machine-readable name of the module that implements
|
40
|
* hook_search_info().
|
41
|
*
|
42
|
* @var string
|
43
|
*/
|
44
|
protected $type;
|
45
|
|
46
|
/**
|
47
|
* Positive and negative search keys.
|
48
|
*
|
49
|
* @var array
|
50
|
*/
|
51
|
protected $keys = array('positive' => array(), 'negative' => array());
|
52
|
|
53
|
/**
|
54
|
* Indicates whether the first pass query requires complex conditions (LIKE).
|
55
|
*
|
56
|
* @var boolean.
|
57
|
*/
|
58
|
protected $simple = TRUE;
|
59
|
|
60
|
/**
|
61
|
* Conditions that are used for exact searches.
|
62
|
*
|
63
|
* This is always used for the second pass query but not for the first pass,
|
64
|
* unless $this->simple is FALSE.
|
65
|
*
|
66
|
* @var DatabaseCondition
|
67
|
*/
|
68
|
protected $conditions;
|
69
|
|
70
|
/**
|
71
|
* Indicates how many matches for a search query are necessary.
|
72
|
*
|
73
|
* @var int
|
74
|
*/
|
75
|
protected $matches = 0;
|
76
|
|
77
|
/**
|
78
|
* Array of search words.
|
79
|
*
|
80
|
* These words have to match against {search_index}.word.
|
81
|
*
|
82
|
* @var array
|
83
|
*/
|
84
|
protected $words = array();
|
85
|
|
86
|
/**
|
87
|
* Multiplier for the normalized search score.
|
88
|
*
|
89
|
* This value is calculated by the first pass query and multiplied with the
|
90
|
* actual score of a specific word to make sure that the resulting calculated
|
91
|
* score is between 0 and 1.
|
92
|
*
|
93
|
* @var float
|
94
|
*/
|
95
|
protected $normalize;
|
96
|
|
97
|
/**
|
98
|
* Indicates whether the first pass query has been executed.
|
99
|
*
|
100
|
* @var boolean
|
101
|
*/
|
102
|
protected $executedFirstPass = FALSE;
|
103
|
|
104
|
/**
|
105
|
* Stores score expressions.
|
106
|
*
|
107
|
* @var array
|
108
|
*
|
109
|
* @see addScore()
|
110
|
*/
|
111
|
protected $scores = array();
|
112
|
|
113
|
/**
|
114
|
* Stores arguments for score expressions.
|
115
|
*
|
116
|
* @var array
|
117
|
*/
|
118
|
protected $scoresArguments = array();
|
119
|
|
120
|
/**
|
121
|
* Stores multipliers for score expressions.
|
122
|
*
|
123
|
* @var array
|
124
|
*/
|
125
|
protected $multiply = array();
|
126
|
|
127
|
/**
|
128
|
* Whether or not search expressions were ignored.
|
129
|
*
|
130
|
* The maximum number of AND/OR combinations exceeded can be configured to
|
131
|
* avoid Denial-of-Service attacks. Expressions beyond the limit are ignored.
|
132
|
*
|
133
|
* @var boolean
|
134
|
*/
|
135
|
protected $expressionsIgnored = FALSE;
|
136
|
|
137
|
/**
|
138
|
* Sets up the search query expression.
|
139
|
*
|
140
|
* @param $query
|
141
|
* A search query string, which can contain options.
|
142
|
* @param $module
|
143
|
* The search module. This maps to {search_index}.type in the database.
|
144
|
*
|
145
|
* @return
|
146
|
* The SearchQuery object.
|
147
|
*/
|
148
|
public function searchExpression($expression, $module) {
|
149
|
$this->searchExpression = $expression;
|
150
|
$this->type = $module;
|
151
|
|
152
|
// Add a search_* tag. This needs to be added before any preExecute methods
|
153
|
// for decorated queries are called, as $this->prepared will be set to TRUE
|
154
|
// and tags added in the execute method will never get used. For example,
|
155
|
// if $query is extended by 'SearchQuery' then 'PagerDefault', the
|
156
|
// search-specific tag will be added too late (when preExecute() has
|
157
|
// already been called from the PagerDefault extender), and as a
|
158
|
// consequence will not be available to hook_query_alter() implementations,
|
159
|
// nor will the correct hook_query_TAG_alter() implementations get invoked.
|
160
|
// See node_search_execute().
|
161
|
$this->addTag('search_' . $module);
|
162
|
|
163
|
return $this;
|
164
|
}
|
165
|
|
166
|
/**
|
167
|
* Applies a search option and removes it from the search query string.
|
168
|
*
|
169
|
* These options are in the form option:value,value2,value3.
|
170
|
*
|
171
|
* @param $option
|
172
|
* Name of the option.
|
173
|
* @param $column
|
174
|
* Name of the database column to which the value should be applied.
|
175
|
*
|
176
|
* @return
|
177
|
* TRUE if a value for that option was found, FALSE if not.
|
178
|
*/
|
179
|
public function setOption($option, $column) {
|
180
|
if ($values = search_expression_extract($this->searchExpression, $option)) {
|
181
|
$or = db_or();
|
182
|
foreach (explode(',', $values) as $value) {
|
183
|
$or->condition($column, $value);
|
184
|
}
|
185
|
$this->condition($or);
|
186
|
$this->searchExpression = search_expression_insert($this->searchExpression, $option);
|
187
|
return TRUE;
|
188
|
}
|
189
|
return FALSE;
|
190
|
}
|
191
|
|
192
|
/**
|
193
|
* Parses the search query into SQL conditions.
|
194
|
*
|
195
|
* We build two queries that match the dataset bodies.
|
196
|
*/
|
197
|
protected function parseSearchExpression() {
|
198
|
// Matchs words optionally prefixed by a dash. A word in this case is
|
199
|
// something between two spaces, optionally quoted.
|
200
|
preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression , $keywords, PREG_SET_ORDER);
|
201
|
|
202
|
if (count($keywords) == 0) {
|
203
|
return;
|
204
|
}
|
205
|
|
206
|
// Classify tokens.
|
207
|
$or = FALSE;
|
208
|
$warning = '';
|
209
|
$limit_combinations = variable_get('search_and_or_limit', 7);
|
210
|
// The first search expression does not count as AND.
|
211
|
$and_count = -1;
|
212
|
$or_count = 0;
|
213
|
foreach ($keywords as $match) {
|
214
|
if ($or_count && $and_count + $or_count >= $limit_combinations) {
|
215
|
// Ignore all further search expressions to prevent Denial-of-Service
|
216
|
// attacks using a high number of AND/OR combinations.
|
217
|
$this->expressionsIgnored = TRUE;
|
218
|
break;
|
219
|
}
|
220
|
$phrase = FALSE;
|
221
|
// Strip off phrase quotes.
|
222
|
if ($match[2]{0} == '"') {
|
223
|
$match[2] = substr($match[2], 1, -1);
|
224
|
$phrase = TRUE;
|
225
|
$this->simple = FALSE;
|
226
|
}
|
227
|
// Simplify keyword according to indexing rules and external
|
228
|
// preprocessors. Use same process as during search indexing, so it
|
229
|
// will match search index.
|
230
|
$words = search_simplify($match[2]);
|
231
|
// Re-explode in case simplification added more words, except when
|
232
|
// matching a phrase.
|
233
|
$words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
|
234
|
// Negative matches.
|
235
|
if ($match[1] == '-') {
|
236
|
$this->keys['negative'] = array_merge($this->keys['negative'], $words);
|
237
|
}
|
238
|
// OR operator: instead of a single keyword, we store an array of all
|
239
|
// OR'd keywords.
|
240
|
elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
|
241
|
$last = array_pop($this->keys['positive']);
|
242
|
// Starting a new OR?
|
243
|
if (!is_array($last)) {
|
244
|
$last = array($last);
|
245
|
}
|
246
|
$this->keys['positive'][] = $last;
|
247
|
$or = TRUE;
|
248
|
$or_count++;
|
249
|
continue;
|
250
|
}
|
251
|
// AND operator: implied, so just ignore it.
|
252
|
elseif ($match[2] == 'AND' || $match[2] == 'and') {
|
253
|
$warning = $match[2];
|
254
|
continue;
|
255
|
}
|
256
|
|
257
|
// Plain keyword.
|
258
|
else {
|
259
|
if ($match[2] == 'or') {
|
260
|
$warning = $match[2];
|
261
|
}
|
262
|
if ($or) {
|
263
|
// Add to last element (which is an array).
|
264
|
$this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
|
265
|
}
|
266
|
else {
|
267
|
$this->keys['positive'] = array_merge($this->keys['positive'], $words);
|
268
|
$and_count++;
|
269
|
}
|
270
|
}
|
271
|
$or = FALSE;
|
272
|
}
|
273
|
|
274
|
// Convert keywords into SQL statements.
|
275
|
$this->conditions = db_and();
|
276
|
$simple_and = FALSE;
|
277
|
$simple_or = FALSE;
|
278
|
// Positive matches.
|
279
|
foreach ($this->keys['positive'] as $key) {
|
280
|
// Group of ORed terms.
|
281
|
if (is_array($key) && count($key)) {
|
282
|
$simple_or = TRUE;
|
283
|
$any = FALSE;
|
284
|
$queryor = db_or();
|
285
|
foreach ($key as $or) {
|
286
|
list($num_new_scores) = $this->parseWord($or);
|
287
|
$any |= $num_new_scores;
|
288
|
$queryor->condition('d.data', "% $or %", 'LIKE');
|
289
|
}
|
290
|
if (count($queryor)) {
|
291
|
$this->conditions->condition($queryor);
|
292
|
// A group of OR keywords only needs to match once.
|
293
|
$this->matches += ($any > 0);
|
294
|
}
|
295
|
}
|
296
|
// Single ANDed term.
|
297
|
else {
|
298
|
$simple_and = TRUE;
|
299
|
list($num_new_scores, $num_valid_words) = $this->parseWord($key);
|
300
|
$this->conditions->condition('d.data', "% $key %", 'LIKE');
|
301
|
if (!$num_valid_words) {
|
302
|
$this->simple = FALSE;
|
303
|
}
|
304
|
// Each AND keyword needs to match at least once.
|
305
|
$this->matches += $num_new_scores;
|
306
|
}
|
307
|
}
|
308
|
if ($simple_and && $simple_or) {
|
309
|
$this->simple = FALSE;
|
310
|
}
|
311
|
// Negative matches.
|
312
|
foreach ($this->keys['negative'] as $key) {
|
313
|
$this->conditions->condition('d.data', "% $key %", 'NOT LIKE');
|
314
|
$this->simple = FALSE;
|
315
|
}
|
316
|
|
317
|
if ($warning == 'or') {
|
318
|
drupal_set_message(t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'));
|
319
|
}
|
320
|
}
|
321
|
|
322
|
/**
|
323
|
* Helper function for parseQuery().
|
324
|
*/
|
325
|
protected function parseWord($word) {
|
326
|
$num_new_scores = 0;
|
327
|
$num_valid_words = 0;
|
328
|
// Determine the scorewords of this word/phrase.
|
329
|
$split = explode(' ', $word);
|
330
|
foreach ($split as $s) {
|
331
|
$num = is_numeric($s);
|
332
|
if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
|
333
|
if (!isset($this->words[$s])) {
|
334
|
$this->words[$s] = $s;
|
335
|
$num_new_scores++;
|
336
|
}
|
337
|
$num_valid_words++;
|
338
|
}
|
339
|
}
|
340
|
// Return matching snippet and number of added words.
|
341
|
return array($num_new_scores, $num_valid_words);
|
342
|
}
|
343
|
|
344
|
/**
|
345
|
* Executes the first pass query.
|
346
|
*
|
347
|
* This can either be done explicitly, so that additional scores and
|
348
|
* conditions can be applied to the second pass query, or implicitly by
|
349
|
* addScore() or execute().
|
350
|
*
|
351
|
* @return
|
352
|
* TRUE if search items exist, FALSE if not.
|
353
|
*/
|
354
|
public function executeFirstPass() {
|
355
|
$this->parseSearchExpression();
|
356
|
|
357
|
if (count($this->words) == 0) {
|
358
|
form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.'));
|
359
|
return FALSE;
|
360
|
}
|
361
|
if ($this->expressionsIgnored) {
|
362
|
drupal_set_message(t('Your search used too many AND/OR expressions. Only the first @count terms were included in this search.', array('@count' => variable_get('search_and_or_limit', 7))), 'warning');
|
363
|
}
|
364
|
$this->executedFirstPass = TRUE;
|
365
|
|
366
|
if (!empty($this->words)) {
|
367
|
$or = db_or();
|
368
|
foreach ($this->words as $word) {
|
369
|
$or->condition('i.word', $word);
|
370
|
}
|
371
|
$this->condition($or);
|
372
|
}
|
373
|
// Build query for keyword normalization.
|
374
|
$this->join('search_total', 't', 'i.word = t.word');
|
375
|
$this
|
376
|
->condition('i.type', $this->type)
|
377
|
->groupBy('i.type')
|
378
|
->groupBy('i.sid')
|
379
|
->having('COUNT(*) >= :matches', array(':matches' => $this->matches));
|
380
|
|
381
|
// Clone the query object to do the firstPass query;
|
382
|
$first = clone $this->query;
|
383
|
|
384
|
// For complex search queries, add the LIKE conditions to the first pass query.
|
385
|
if (!$this->simple) {
|
386
|
$first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
|
387
|
$first->condition($this->conditions);
|
388
|
}
|
389
|
|
390
|
// Calculate maximum keyword relevance, to normalize it.
|
391
|
$first->addExpression('SUM(i.score * t.count)', 'calculated_score');
|
392
|
$this->normalize = $first
|
393
|
->range(0, 1)
|
394
|
->orderBy('calculated_score', 'DESC')
|
395
|
->execute()
|
396
|
->fetchField();
|
397
|
|
398
|
if ($this->normalize) {
|
399
|
return TRUE;
|
400
|
}
|
401
|
return FALSE;
|
402
|
}
|
403
|
|
404
|
/**
|
405
|
* Adds a custom score expression to the search query.
|
406
|
*
|
407
|
* Score expressions are used to order search results. If no calls to
|
408
|
* addScore() have taken place, a default keyword relevance score will be
|
409
|
* used. However, if at least one call to addScore() has taken place, the
|
410
|
* keyword relevance score is not automatically added.
|
411
|
*
|
412
|
* Note that you must use this method to add ordering to your searches, and
|
413
|
* not call orderBy() directly, when using the SearchQuery extender. This is
|
414
|
* because of the two-pass system the SearchQuery class uses to normalize
|
415
|
* scores.
|
416
|
*
|
417
|
* @param $score
|
418
|
* The score expression, which should evaluate to a number between 0 and 1.
|
419
|
* The string 'i.relevance' in a score expression will be replaced by a
|
420
|
* measure of keyword relevance between 0 and 1.
|
421
|
* @param $arguments
|
422
|
* Query arguments needed to provide values to the score expression.
|
423
|
* @param $multiply
|
424
|
* If set, the score is multiplied with this value. However, all scores
|
425
|
* with multipliers are then divided by the total of all multipliers, so
|
426
|
* that overall, the normalization is maintained.
|
427
|
*
|
428
|
* @return object
|
429
|
* The updated query object.
|
430
|
*/
|
431
|
public function addScore($score, $arguments = array(), $multiply = FALSE) {
|
432
|
if ($multiply) {
|
433
|
$i = count($this->multiply);
|
434
|
// Modify the score expression so it is multiplied by the multiplier,
|
435
|
// with a divisor to renormalize.
|
436
|
$score = "CAST(:multiply_$i AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_$i AS DECIMAL)";
|
437
|
// Add an argument for the multiplier. The :total_$i argument is taken
|
438
|
// care of in the execute() method, which is when the total divisor is
|
439
|
// calculated.
|
440
|
$arguments[':multiply_' . $i] = $multiply;
|
441
|
$this->multiply[] = $multiply;
|
442
|
}
|
443
|
|
444
|
$this->scores[] = $score;
|
445
|
$this->scoresArguments += $arguments;
|
446
|
|
447
|
return $this;
|
448
|
}
|
449
|
|
450
|
/**
|
451
|
* Executes the search.
|
452
|
*
|
453
|
* If not already done, this executes the first pass query. Then the complex
|
454
|
* conditions are applied to the query including score expressions and
|
455
|
* ordering.
|
456
|
*
|
457
|
* @return
|
458
|
* FALSE if the first pass query returned no results, and a database result
|
459
|
* set if there were results.
|
460
|
*/
|
461
|
public function execute()
|
462
|
{
|
463
|
if (!$this->executedFirstPass) {
|
464
|
$this->executeFirstPass();
|
465
|
}
|
466
|
if (!$this->normalize) {
|
467
|
return new DatabaseStatementEmpty();
|
468
|
}
|
469
|
|
470
|
// Add conditions to query.
|
471
|
$this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
|
472
|
$this->condition($this->conditions);
|
473
|
|
474
|
if (empty($this->scores)) {
|
475
|
// Add default score.
|
476
|
$this->addScore('i.relevance');
|
477
|
}
|
478
|
|
479
|
if (count($this->multiply)) {
|
480
|
// Re-normalize scores with multipliers by dividing by the total of all
|
481
|
// multipliers. The expressions were altered in addScore(), so here just
|
482
|
// add the arguments for the total.
|
483
|
$i = 0;
|
484
|
$sum = array_sum($this->multiply);
|
485
|
foreach ($this->multiply as $total) {
|
486
|
$this->scoresArguments[':total_' . $i] = $sum;
|
487
|
$i++;
|
488
|
}
|
489
|
}
|
490
|
|
491
|
// Replace the pseudo-expression 'i.relevance' with a measure of keyword
|
492
|
// relevance in all score expressions, using string replacement. Careful
|
493
|
// though! If you just print out a float, some locales use ',' as the
|
494
|
// decimal separator in PHP, while SQL always uses '.'. So, make sure to
|
495
|
// set the number format correctly.
|
496
|
$relevance = number_format((1.0 / $this->normalize), 10, '.', '');
|
497
|
$this->scores = str_replace('i.relevance', '(' . $relevance . ' * i.score * t.count)', $this->scores);
|
498
|
|
499
|
// Add all scores together to form a query field.
|
500
|
$this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
|
501
|
|
502
|
// If an order has not yet been set for this query, add a default order
|
503
|
// that sorts by the calculated sum of scores.
|
504
|
if (count($this->getOrderBy()) == 0) {
|
505
|
$this->orderBy('calculated_score', 'DESC');
|
506
|
}
|
507
|
|
508
|
// Add useful metadata.
|
509
|
$this
|
510
|
->addMetaData('normalize', $this->normalize)
|
511
|
->fields('i', array('type', 'sid'));
|
512
|
|
513
|
return $this->query->execute();
|
514
|
}
|
515
|
|
516
|
/**
|
517
|
* Builds the default count query for SearchQuery.
|
518
|
*
|
519
|
* Since SearchQuery always uses GROUP BY, we can default to a subquery. We
|
520
|
* also add the same conditions as execute() because countQuery() is called
|
521
|
* first.
|
522
|
*/
|
523
|
public function countQuery() {
|
524
|
// Clone the inner query.
|
525
|
$inner = clone $this->query;
|
526
|
|
527
|
// Add conditions to query.
|
528
|
$inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
|
529
|
$inner->condition($this->conditions);
|
530
|
|
531
|
// Remove existing fields and expressions, they are not needed for a count
|
532
|
// query.
|
533
|
$fields =& $inner->getFields();
|
534
|
$fields = array();
|
535
|
$expressions =& $inner->getExpressions();
|
536
|
$expressions = array();
|
537
|
|
538
|
// Add the sid as the only field and count them as a subquery.
|
539
|
$count = db_select($inner->fields('i', array('sid')), NULL, array('target' => 'slave'));
|
540
|
|
541
|
// Add the COUNT() expression.
|
542
|
$count->addExpression('COUNT(*)');
|
543
|
|
544
|
return $count;
|
545
|
}
|
546
|
}
|