1 |
85ad3d82
|
Assos Assos
|
<?php
|
2 |
|
|
// $Id $
|
3 |
|
|
|
4 |
|
|
/**
|
5 |
|
|
* @file
|
6 |
|
|
* Helper class to clean strings to make them URL safe and translatable.
|
7 |
|
|
*
|
8 |
|
|
* This was copied directly from pathauto and put here to be made available
|
9 |
|
|
* to all, because more things than just pathauto want URL safe strings.
|
10 |
|
|
*
|
11 |
|
|
* To use, simply:
|
12 |
|
|
* @code
|
13 |
|
|
* ctools_include('cleanstring');
|
14 |
|
|
* $output = ctools_cleanstring($string);
|
15 |
|
|
*
|
16 |
|
|
* You can add a variety of settings as an array in the second argument,
|
17 |
|
|
* including words to ignore, how to deal with punctuation, length
|
18 |
|
|
* limits, and more. See the function itself for options.
|
19 |
|
|
*/
|
20 |
|
|
|
21 |
|
|
/**
|
22 |
|
|
* Matches Unicode character classes.
|
23 |
|
|
*
|
24 |
|
|
* See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
|
25 |
|
|
*
|
26 |
|
|
* The index only contains the following character classes:
|
27 |
|
|
* Lu Letter, Uppercase
|
28 |
|
|
* Ll Letter, Lowercase
|
29 |
|
|
* Lt Letter, Titlecase
|
30 |
|
|
* Lo Letter, Other
|
31 |
|
|
* Nd Number, Decimal Digit
|
32 |
|
|
* No Number, Other
|
33 |
|
|
*
|
34 |
|
|
* Copied from search.module's PREG_CLASS_SEARCH_EXCLUDE.
|
35 |
|
|
*/
|
36 |
|
|
define('CTOOLS_PREG_CLASS_ALNUM',
|
37 |
|
|
'\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-' .
|
38 |
|
|
'\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-' .
|
39 |
|
|
'\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}' .
|
40 |
|
|
'\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-' .
|
41 |
|
|
'\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-' .
|
42 |
|
|
'\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-' .
|
43 |
|
|
'\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}' .
|
44 |
|
|
'\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-' .
|
45 |
|
|
'\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}' .
|
46 |
|
|
'\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-' .
|
47 |
|
|
'\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}' .
|
48 |
|
|
'\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-' .
|
49 |
|
|
'\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}' .
|
50 |
|
|
'\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}' .
|
51 |
|
|
'\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}' .
|
52 |
|
|
'\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}' .
|
53 |
|
|
'\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-' .
|
54 |
|
|
'\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-' .
|
55 |
|
|
'\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}' .
|
56 |
|
|
'\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}' .
|
57 |
|
|
'\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}' .
|
58 |
|
|
'\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}' .
|
59 |
|
|
'\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}' .
|
60 |
|
|
'\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}' .
|
61 |
|
|
'\x{a80b}\x{a823}-\x{a82b}\x{e000}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}' .
|
62 |
|
|
'\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-' .
|
63 |
|
|
'\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
|
64 |
|
|
|
65 |
|
|
/**
|
66 |
|
|
* Clean up a string value provided by a module.
|
67 |
|
|
*
|
68 |
|
|
* Resulting string contains only alphanumerics and separators.
|
69 |
|
|
*
|
70 |
|
|
* @param $string
|
71 |
|
|
* A string to clean.
|
72 |
|
|
* @param $settings
|
73 |
|
|
* An optional array of settings to use.
|
74 |
|
|
* - 'clean slash': If set, slashes will be cleaned. Defaults to TRUE,
|
75 |
|
|
* so you have to explicitly set this to FALSE to not clean the
|
76 |
|
|
* slashes.
|
77 |
|
|
* - 'ignore words': Set to an array of words that will be removed
|
78 |
|
|
* rather than made safe. Defaults to an empty array.
|
79 |
|
|
* - 'separator': Change spaces and untranslatable characters to
|
80 |
|
|
* this character. Defaults to '-' .
|
81 |
|
|
* - 'replacements': An array of direct replacements to be made that will
|
82 |
|
|
* be implemented via strtr(). Defaults to an empty array.
|
83 |
|
|
* - 'transliterate': If set, use the transliteration replacements. If set
|
84 |
|
|
* to an array, use these replacements instead of the defaults in CTools.
|
85 |
|
|
* Defaults to FALSE.
|
86 |
|
|
* - 'reduce ascii': If set to TRUE further reduce to ASCII96 only. Defaults
|
87 |
|
|
* to TRUE.
|
88 |
|
|
* - 'max length': If set to a number, reduce the resulting string to this
|
89 |
|
|
* maximum length. Defaults to no maximum length.
|
90 |
|
|
* - 'lower case': If set to TRUE, convert the result to lower case.
|
91 |
|
|
* Defaults to false.
|
92 |
|
|
* These settings will be passed through drupal_alter.
|
93 |
|
|
*
|
94 |
|
|
* @return
|
95 |
|
|
* The cleaned string.
|
96 |
|
|
*/
|
97 |
|
|
function ctools_cleanstring($string, $settings = array()) {
|
98 |
|
|
$settings += array(
|
99 |
|
|
'clean slash' => TRUE,
|
100 |
|
|
'ignore words' => array(),
|
101 |
|
|
'separator' => '-',
|
102 |
|
|
'replacements' => array(),
|
103 |
|
|
'transliterate' => FALSE,
|
104 |
|
|
'reduce ascii' => TRUE,
|
105 |
|
|
'max length' => FALSE,
|
106 |
|
|
'lower case' => FALSE,
|
107 |
|
|
);
|
108 |
|
|
|
109 |
|
|
// Allow modules to make other changes to the settings.
|
110 |
|
|
if (isset($settings['clean id'])) {
|
111 |
|
|
drupal_alter('ctools_cleanstring_' . $settings['clean id'], $settings);
|
112 |
|
|
}
|
113 |
|
|
|
114 |
|
|
drupal_alter('ctools_cleanstring', $settings);
|
115 |
|
|
|
116 |
|
|
$output = $string;
|
117 |
|
|
|
118 |
|
|
// Do any replacements the user selected up front.
|
119 |
|
|
if (!empty($settings['replacements'])) {
|
120 |
|
|
$output = strtr($output, $settings['replacements']);
|
121 |
|
|
}
|
122 |
|
|
|
123 |
|
|
// Remove slashes if instructed to do so.
|
124 |
|
|
if ($settings['clean slash']) {
|
125 |
|
|
$output = str_replace('/', '', $output);
|
126 |
|
|
}
|
127 |
|
|
|
128 |
|
|
if (!empty($settings['transliterate']) && module_exists('transliteration')) {
|
129 |
|
|
$output = transliteration_get($output);
|
130 |
|
|
}
|
131 |
|
|
|
132 |
|
|
// Reduce to the subset of ASCII96 letters and numbers
|
133 |
|
|
if ($settings['reduce ascii']) {
|
134 |
|
|
$pattern = '/[^a-zA-Z0-9\/]+/';
|
135 |
|
|
$output = preg_replace($pattern, $settings['separator'], $output);
|
136 |
|
|
}
|
137 |
|
|
|
138 |
|
|
// Get rid of words that are on the ignore list
|
139 |
|
|
if (!empty($settings['ignore words'])) {
|
140 |
|
|
$ignore_re = '\b' . preg_replace('/,/', '\b|\b', $settings['ignore words']) . '\b';
|
141 |
|
|
|
142 |
|
|
if (function_exists('mb_eregi_replace')) {
|
143 |
|
|
$output = mb_eregi_replace($ignore_re, '', $output);
|
144 |
|
|
}
|
145 |
|
|
else {
|
146 |
|
|
$output = preg_replace("/$ignore_re/i", '', $output);
|
147 |
|
|
}
|
148 |
|
|
}
|
149 |
|
|
|
150 |
|
|
// Always replace whitespace with the separator.
|
151 |
|
|
$output = preg_replace('/\s+/', $settings['separator'], $output);
|
152 |
|
|
|
153 |
|
|
// In preparation for pattern matching,
|
154 |
|
|
// escape the separator if and only if it is not alphanumeric.
|
155 |
|
|
if (isset($settings['separator'])) {
|
156 |
|
|
if (preg_match('/^[^' . CTOOLS_PREG_CLASS_ALNUM . ']+$/uD', $settings['separator'])) {
|
157 |
|
|
$seppattern = $settings['separator'];
|
158 |
|
|
}
|
159 |
|
|
else {
|
160 |
|
|
$seppattern = '\\' . $settings['separator'];
|
161 |
|
|
}
|
162 |
|
|
// Trim any leading or trailing separators (note the need to
|
163 |
|
|
$output = preg_replace("/^$seppattern+|$seppattern+$/", '', $output);
|
164 |
|
|
|
165 |
|
|
// Replace multiple separators with a single one
|
166 |
|
|
$output = preg_replace("/$seppattern+/", $settings['separator'], $output);
|
167 |
|
|
}
|
168 |
|
|
|
169 |
|
|
// Enforce the maximum component length
|
170 |
|
|
if (!empty($settings['max length'])) {
|
171 |
|
|
$output = ctools_cleanstring_truncate($output, $settings['max length'], $settings['separator']);
|
172 |
|
|
}
|
173 |
|
|
|
174 |
|
|
if (!empty($settings['lower case'])) {
|
175 |
|
|
$output = drupal_strtolower($output);
|
176 |
|
|
}
|
177 |
|
|
return $output;
|
178 |
|
|
}
|
179 |
|
|
|
180 |
|
|
/**
|
181 |
|
|
* A friendly version of truncate_utf8.
|
182 |
|
|
*
|
183 |
|
|
* @param $string
|
184 |
|
|
* The string to be truncated.
|
185 |
|
|
* @param $length
|
186 |
|
|
* An integer for the maximum desired length.
|
187 |
|
|
* @param $separator
|
188 |
|
|
* A string which contains the word boundary such as - or _.
|
189 |
|
|
*
|
190 |
|
|
* @return
|
191 |
|
|
* The string truncated below the maxlength.
|
192 |
|
|
*/
|
193 |
|
|
function ctools_cleanstring_truncate($string, $length, $separator) {
|
194 |
|
|
if (drupal_strlen($string) > $length) {
|
195 |
|
|
$string = drupal_substr($string, 0, $length + 1); // leave one more character
|
196 |
|
|
if ($last_break = strrpos($string, $separator)) { // space exists AND is not on position 0
|
197 |
|
|
$string = substr($string, 0, $last_break);
|
198 |
|
|
}
|
199 |
|
|
else {
|
200 |
|
|
$string = drupal_substr($string, 0, $length);
|
201 |
|
|
}
|
202 |
|
|
}
|
203 |
|
|
return $string;
|
204 |
|
|
} |