Projet

Général

Profil

Paste
Télécharger (3,75 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / libraries / simplepie / build / charset.php @ 41cc1b08

1
<?php
2

    
3
require_once '../autoloader.php';
4

    
5
function normalize_character_set($charset)
6
{
7
        return strtolower(preg_replace('/(?:[^a-zA-Z0-9]+|([^0-9])0+)/', '\1', $charset));
8
}
9

    
10
function build_character_set_list()
11
{
12
        $file = new SimplePie_File('http://www.iana.org/assignments/character-sets');
13
        if (!$file->success && !($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
14
        {
15
                return false;
16
        }
17
        else
18
        {
19
                $data = explode("\n", $file->body);
20
                unset($file);
21
                
22
                foreach ($data as $line)
23
                {
24
                        // New character set
25
                        if (preg_match('/^Name:\s+(\S+)/', $line, $match))
26
                        {
27
                                // If we already have one, push it on to the array
28
                                if (isset($aliases))
29
                                {
30
                                        foreach ($aliases as &$alias)
31
                                        {
32
                                                $alias = normalize_character_set($alias);
33
                                        }
34
                                        $charsets[$preferred] = array_unique($aliases);
35
                                        natsort($charsets[$preferred]);
36
                                }
37
                                
38
                                $aliases = array($match[1]);
39
                                $preferred = $match[1];
40
                        }
41
                        // Another alias
42
                        elseif (preg_match('/^Alias:\s+(\S+)(\s+\(preferred MIME name\))?\s*$/', $line, $match))
43
                        {
44
                                if ($match[1] !== 'None')
45
                                {
46
                                        $aliases[] = $match[1];
47
                                        if (isset($match[2]))
48
                                        {
49
                                                $preferred = $match[1];
50
                                        }
51
                                }
52
                        }
53
                }
54
                
55
                // Compatibility replacements
56
                // From http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#misinterpreted-for-compatibility
57
                $compat = array(
58
                        'EUC-KR' => 'windows-949',
59
                        'GB2312' => 'GBK',
60
                        'GB_2312-80' => 'GBK',
61
                        'ISO-8859-1' => 'windows-1252',
62
                        'ISO-8859-9' => 'windows-1254',
63
                        'ISO-8859-11' => 'windows-874',
64
                        'KS_C_5601-1987' => 'windows-949',
65
                        'Shift_JIS' => 'Windows-31J',
66
                        'TIS-620' => 'windows-874',
67
                        //'US-ASCII' => 'windows-1252',
68
                );
69
                
70
                foreach ($compat as $real => $replace)
71
                {
72
                        if (isset($charsets[$real]) && isset($charsets[$replace]))
73
                        {
74
                                $charsets[$replace] = array_merge($charsets[$replace], $charsets[$real]);
75
                                unset($charsets[$real]);
76
                        }
77
                        elseif (isset($charsets[$real]))
78
                        {
79
                                $charsets[$replace] = $charsets[$real];
80
                                $charsets[$replace][] = normalize_character_set($replace);
81
                                unset($charsets[$real]);
82
                        }
83
                        else
84
                        {
85
                                $charsets[$replace][] = normalize_character_set($real);
86
                        }
87
                        $charsets[$replace] = array_unique($charsets[$replace]);
88
                        natsort($charsets[$replace]);
89
                }
90
                
91
                // Sort it
92
                uksort($charsets, 'strnatcasecmp');
93
                
94
                // Check that nothing matches more than one
95
                $all = call_user_func_array('array_merge', $charsets);
96
                $all_count = array_count_values($all);
97
                if (max($all_count) > 1)
98
                {
99
                        echo "Duplicated charsets:\n";
100
                        foreach ($all_count as $charset => $count)
101
                        {
102
                                if ($count > 1)
103
                                {
104
                                        echo "$charset\n";
105
                                }
106
                        }
107
                }
108
                
109
                // And we're done!
110
                return $charsets;
111
        }
112
}
113

    
114
function charset($charset)
115
{
116
        $normalized_charset = normalize_character_set($charset);
117
        if ($charsets = build_character_set_list())
118
        {
119
                foreach ($charsets as $preferred => $aliases)
120
                {
121
                        if (in_array($normalized_charset, $aliases))
122
                        {
123
                                return $preferred;
124
                        }
125
                }
126
                return $charset;
127
        }
128
        else
129
        {
130
                return false;
131
        }
132
}
133

    
134
function build_function()
135
{
136
        if ($charsets = build_character_set_list())
137
        {
138
                $return = <<<EOF
139
public static function encoding(\$charset)
140
{
141
        // Normalization from UTS #22
142
        switch (strtolower(preg_replace('/(?:[^a-zA-Z0-9]+|([^0-9])0+)/', '\\1', \$charset)))
143
        {
144

145
EOF;
146
                foreach ($charsets as $preferred => $aliases)
147
                {
148
                        foreach ($aliases as $alias)
149
                        {
150
                                $return .= "\t\tcase " . var_export($alias, true) . ":\n";
151
                        }
152
                        $return .= "\t\t\treturn " . var_export($preferred, true) . ";\n\n";
153
                }
154
                $return .= <<<EOF
155
                default:
156
                        return \$charset;
157
        }
158
}
159
EOF;
160
                return $return;
161
        }
162
        else
163
        {
164
                return false;
165
        }
166
}
167

    
168
if (php_sapi_name() === 'cli' && realpath($_SERVER['argv'][0]) === __FILE__)
169
{
170
        echo build_function();
171
}
172

    
173
?>