1
|
<?php
|
2
|
|
3
|
require_once '../autoloader.php';
|
4
|
|
5
|
function normalize_character_set($charset)
|
6
|
{
|
7
|
return strtolower(preg_replace('/(?:[^a-zA-Z0-9]+|([^0-9])0+)/', '\1', $charset));
|
8
|
}
|
9
|
|
10
|
function build_character_set_list()
|
11
|
{
|
12
|
$file = new SimplePie_File('http://www.iana.org/assignments/character-sets');
|
13
|
if (!$file->success && !($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
|
14
|
{
|
15
|
return false;
|
16
|
}
|
17
|
else
|
18
|
{
|
19
|
$data = explode("\n", $file->body);
|
20
|
unset($file);
|
21
|
|
22
|
foreach ($data as $line)
|
23
|
{
|
24
|
|
25
|
if (preg_match('/^Name:\s+(\S+)/', $line, $match))
|
26
|
{
|
27
|
|
28
|
if (isset($aliases))
|
29
|
{
|
30
|
foreach ($aliases as &$alias)
|
31
|
{
|
32
|
$alias = normalize_character_set($alias);
|
33
|
}
|
34
|
$charsets[$preferred] = array_unique($aliases);
|
35
|
natsort($charsets[$preferred]);
|
36
|
}
|
37
|
|
38
|
$aliases = array($match[1]);
|
39
|
$preferred = $match[1];
|
40
|
}
|
41
|
|
42
|
elseif (preg_match('/^Alias:\s+(\S+)(\s+\(preferred MIME name\))?\s*$/', $line, $match))
|
43
|
{
|
44
|
if ($match[1] !== 'None')
|
45
|
{
|
46
|
$aliases[] = $match[1];
|
47
|
if (isset($match[2]))
|
48
|
{
|
49
|
$preferred = $match[1];
|
50
|
}
|
51
|
}
|
52
|
}
|
53
|
}
|
54
|
|
55
|
|
56
|
|
57
|
$compat = array(
|
58
|
'EUC-KR' => 'windows-949',
|
59
|
'GB2312' => 'GBK',
|
60
|
'GB_2312-80' => 'GBK',
|
61
|
'ISO-8859-1' => 'windows-1252',
|
62
|
'ISO-8859-9' => 'windows-1254',
|
63
|
'ISO-8859-11' => 'windows-874',
|
64
|
'KS_C_5601-1987' => 'windows-949',
|
65
|
'Shift_JIS' => 'Windows-31J',
|
66
|
'TIS-620' => 'windows-874',
|
67
|
|
68
|
);
|
69
|
|
70
|
foreach ($compat as $real => $replace)
|
71
|
{
|
72
|
if (isset($charsets[$real]) && isset($charsets[$replace]))
|
73
|
{
|
74
|
$charsets[$replace] = array_merge($charsets[$replace], $charsets[$real]);
|
75
|
unset($charsets[$real]);
|
76
|
}
|
77
|
elseif (isset($charsets[$real]))
|
78
|
{
|
79
|
$charsets[$replace] = $charsets[$real];
|
80
|
$charsets[$replace][] = normalize_character_set($replace);
|
81
|
unset($charsets[$real]);
|
82
|
}
|
83
|
else
|
84
|
{
|
85
|
$charsets[$replace][] = normalize_character_set($real);
|
86
|
}
|
87
|
$charsets[$replace] = array_unique($charsets[$replace]);
|
88
|
natsort($charsets[$replace]);
|
89
|
}
|
90
|
|
91
|
|
92
|
uksort($charsets, 'strnatcasecmp');
|
93
|
|
94
|
|
95
|
$all = call_user_func_array('array_merge', $charsets);
|
96
|
$all_count = array_count_values($all);
|
97
|
if (max($all_count) > 1)
|
98
|
{
|
99
|
echo "Duplicated charsets:\n";
|
100
|
foreach ($all_count as $charset => $count)
|
101
|
{
|
102
|
if ($count > 1)
|
103
|
{
|
104
|
echo "$charset\n";
|
105
|
}
|
106
|
}
|
107
|
}
|
108
|
|
109
|
|
110
|
return $charsets;
|
111
|
}
|
112
|
}
|
113
|
|
114
|
function charset($charset)
|
115
|
{
|
116
|
$normalized_charset = normalize_character_set($charset);
|
117
|
if ($charsets = build_character_set_list())
|
118
|
{
|
119
|
foreach ($charsets as $preferred => $aliases)
|
120
|
{
|
121
|
if (in_array($normalized_charset, $aliases))
|
122
|
{
|
123
|
return $preferred;
|
124
|
}
|
125
|
}
|
126
|
return $charset;
|
127
|
}
|
128
|
else
|
129
|
{
|
130
|
return false;
|
131
|
}
|
132
|
}
|
133
|
|
134
|
function build_function()
|
135
|
{
|
136
|
if ($charsets = build_character_set_list())
|
137
|
{
|
138
|
$return = <<<EOF
|
139
|
public static function encoding(\$charset)
|
140
|
{
|
141
|
// Normalization from UTS #22
|
142
|
switch (strtolower(preg_replace('/(?:[^a-zA-Z0-9]+|([^0-9])0+)/', '\\1', \$charset)))
|
143
|
{
|
144
|
|
145
|
EOF;
|
146
|
foreach ($charsets as $preferred => $aliases)
|
147
|
{
|
148
|
foreach ($aliases as $alias)
|
149
|
{
|
150
|
$return .= "\t\tcase " . var_export($alias, true) . ":\n";
|
151
|
}
|
152
|
$return .= "\t\t\treturn " . var_export($preferred, true) . ";\n\n";
|
153
|
}
|
154
|
$return .= <<<EOF
|
155
|
default:
|
156
|
return \$charset;
|
157
|
}
|
158
|
}
|
159
|
EOF;
|
160
|
return $return;
|
161
|
}
|
162
|
else
|
163
|
{
|
164
|
return false;
|
165
|
}
|
166
|
}
|
167
|
|
168
|
if (php_sapi_name() === 'cli' && realpath($_SERVER['argv'][0]) === __FILE__)
|
169
|
{
|
170
|
echo build_function();
|
171
|
}
|
172
|
|
173
|
?>
|