/drupal7/sites/all/libraries/simplepie-1.3.1/idn/idna_convert.class.php - Annoter - Club Drupal - Forge Centrale Marseille

41cc1b08

Assos Assos

<?php

2

// {{{ license

3

4

/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */

5

//

6

// +----------------------------------------------------------------------+

7

// | This library is free software; you can redistribute it and/or modify |

8

// | it under the terms of the GNU Lesser General Public License as       |

9

// | published by the Free Software Foundation; either version 2.1 of the |

10

// | License, or (at your option) any later version.                      |

11

// |                                                                      |

12

// | This library is distributed in the hope that it will be useful, but  |

13

// | WITHOUT ANY WARRANTY; without even the implied warranty of           |

14

// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    |

15

// | Lesser General Public License for more details.                      |

16

// |                                                                      |

17

// | You should have received a copy of the GNU Lesser General Public     |

18

// | License along with this library; if not, write to the Free Software  |

19

// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 |

20

// | USA.                                                                 |

21

// +----------------------------------------------------------------------+

22

//

23

24

// }}}

25

26

/**

27

 * Encode/decode Internationalized Domain Names.

28

29

 * The class allows to convert internationalized domain names

30

 * (see RFC 3490 for details) as they can be used with various registries worldwide

31

 * to be translated between their original (localized) form and their encoded form

32

 * as it will be used in the DNS (Domain Name System).

33

34

 * The class provides two public methods, encode() and decode(), which do exactly

35

 * what you would expect them to do. You are allowed to use complete domain names,

36

 * simple strings and complete email addresses as well. That means, that you might

37

 * use any of the following notations:

38

39

 * - www.nörgler.com

40

 * - xn--nrgler-wxa

41

 * - xn--brse-5qa.xn--knrz-1ra.info

42

43

 * Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4

44

 * array. Unicode output is available in the same formats.

45

 * You can select your preferred format via {@link set_paramter()}.

46

47

 * ACE input and output is always expected to be ASCII.

48

49

 * @author  Matthias Sommerfeld <mso@phlylabs.de>

50

 * @copyright 2004-2007 phlyLabs Berlin, http://phlylabs.de

51

 * @version 0.5.1

52

53

*/

54

class idna_convert

55

56

/**

57

     * Holds all relevant mapping tables, loaded from a seperate file on construct

58

     * See RFC3454 for details

59

60

     * @var array

61

     * @access private

62

*/

63

    var $NP = array();

64

65

    // Internal settings, do not mess with them

66

    var $_punycode_prefix = 'xn--';

67

    var $_invalid_ucs =     0x80000000;

68

    var $_max_ucs =         0x10FFFF;

69

    var $_base =            36;

70

    var $_tmin =            1;

71

    var $_tmax =            26;

72

    var $_skew =            38;

73

    var $_damp =            700;

74

    var $_initial_bias =    72;

75

    var $_initial_n =       0x80;

76

    var $_sbase =           0xAC00;

77

    var $_lbase =           0x1100;

78

    var $_vbase =           0x1161;

79

    var $_tbase =           0x11A7;

80

    var $_lcount =          19;

81

    var $_vcount =          21;

82

    var $_tcount =          28;

83

    var $_ncount =          588;   // _vcount * _tcount

84

    var $_scount =          11172; // _lcount * _tcount * _vcount

85

    var $_error =           false;

86

87

    // See {@link set_paramter()} for details of how to change the following

88

    // settings from within your script / application

89

    var $_api_encoding   =  'utf8'; // Default input charset is UTF-8

90

    var $_allow_overlong =  false;  // Overlong UTF-8 encodings are forbidden

91

    var $_strict_mode    =  false;  // Behave strict or not

92

93

    // The constructor

94

    function idna_convert($options = false)

95

96

        $this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;

97

        if (function_exists('file_get_contents')) {

98

            $this->NP = unserialize(file_get_contents(dirname(__FILE__).'/npdata.ser'));

99

        } else {

100

            $this->NP = unserialize(join('', file(dirname(__FILE__).'/npdata.ser')));

101

102

        // If parameters are given, pass these to the respective method

103

        if (is_array($options)) {

104

            return $this->set_parameter($options);

105

106

        return true;

107

108

109

/**

110

     * Sets a new option value. Available options and values:

111

     * [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8,

112

     *         'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8]

113

     * [overlong - Unicode does not allow unnecessarily long encodings of chars,

114

     *             to allow this, set this parameter to true, else to false;

115

     *             default is false.]

116

     * [strict - true: strict mode, good for registration purposes - Causes errors

117

     *           on failures; false: loose mode, ideal for "wildlife" applications

118

     *           by silently ignoring errors and returning the original input instead

119

120

     * @param    mixed     Parameter to set (string: single parameter; array of Parameter => Value pairs)

121

     * @param    string    Value to use (if parameter 1 is a string)

122

     * @return   boolean   true on success, false otherwise

123

     * @access   public

124

*/

125

    function set_parameter($option, $value = false)

126

127

        if (!is_array($option)) {

128

            $option = array($option => $value);

129

130

        foreach ($option as $k => $v) {

131

            switch ($k) {

132

            case 'encoding':

133

                switch ($v) {

134

                case 'utf8':

135

                case 'ucs4_string':

136

                case 'ucs4_array':

137

                    $this->_api_encoding = $v;

138

                    break;

139

                default:

140

                    $this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);

141

                    return false;

142

143

                break;

144

            case 'overlong':

145

                $this->_allow_overlong = ($v) ? true : false;

146

                break;

147

            case 'strict':

148

                $this->_strict_mode = ($v) ? true : false;

149

                break;

150

            default:

151

                $this->_error('Set Parameter: Unknown option '.$k);

152

                return false;

153

154

155

        return true;

156

157

158

/**

159

     * Decode a given ACE domain name

160

     * @param    string   Domain name (ACE string)

161

     * [@param    string   Desired output encoding, see {@link set_parameter}]

162

     * @return   string   Decoded Domain name (UTF-8 or UCS-4)

163

     * @access   public

164

*/

165

    function decode($input, $one_time_encoding = false)

166

167

        // Optionally set

168

        if ($one_time_encoding) {

169

            switch ($one_time_encoding) {

170

            case 'utf8':

171

            case 'ucs4_string':

172

            case 'ucs4_array':

173

                break;

174

            default:

175

                $this->_error('Unknown encoding '.$one_time_encoding);

176

                return false;

177

178

179

        // Make sure to drop any newline characters around

180

        $input = trim($input);

181

182

        // Negotiate input and try to determine, whether it is a plain string,

183

        // an email address or something like a complete URL

184

        if (strpos($input, '@')) { // Maybe it is an email address

185

            // No no in strict mode

186

            if ($this->_strict_mode) {

187

                $this->_error('Only simple domain name parts can be handled in strict mode');

188

                return false;

189

190

            list ($email_pref, $input) = explode('@', $input, 2);

191

            $arr = explode('.', $input);

192

            foreach ($arr as $k => $v) {

193

                if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) {

194

                    $conv = $this->_decode($v);

195

                    if ($conv) $arr[$k] = $conv;

196

197

198

            $input = join('.', $arr);

199

            $arr = explode('.', $email_pref);

200

            foreach ($arr as $k => $v) {

201

                if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) {

202

                    $conv = $this->_decode($v);

203

                    if ($conv) $arr[$k] = $conv;

204

205

206

            $email_pref = join('.', $arr);

207

            $return = $email_pref . '@' . $input;

208

        } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)

209

            // No no in strict mode

210

            if ($this->_strict_mode) {

211

                $this->_error('Only simple domain name parts can be handled in strict mode');

212

                return false;

213

214

            $parsed = parse_url($input);

215

            if (isset($parsed['host'])) {

216

                $arr = explode('.', $parsed['host']);

217

                foreach ($arr as $k => $v) {

218

                    $conv = $this->_decode($v);

219

                    if ($conv) $arr[$k] = $conv;

220

221

                $parsed['host'] = join('.', $arr);

222

                $return =

223

                        (empty($parsed['scheme']) ? '' : $parsed['scheme'].(strtolower($parsed['scheme']) == 'mailto' ? ':' : '://'))

224

                        .(empty($parsed['user']) ? '' : $parsed['user'].(empty($parsed['pass']) ? '' : ':'.$parsed['pass']).'@')

225

                        .$parsed['host']

226

                        .(empty($parsed['port']) ? '' : ':'.$parsed['port'])

227

                        .(empty($parsed['path']) ? '' : $parsed['path'])

228

                        .(empty($parsed['query']) ? '' : '?'.$parsed['query'])

229

                        .(empty($parsed['fragment']) ? '' : '#'.$parsed['fragment']);

230

            } else { // parse_url seems to have failed, try without it

231

                $arr = explode('.', $input);

232

                foreach ($arr as $k => $v) {

233

                    $conv = $this->_decode($v);

234

                    $arr[$k] = ($conv) ? $conv : $v;

235

236

                $return = join('.', $arr);

237

238

        } else { // Otherwise we consider it being a pure domain name string

239

            $return = $this->_decode($input);

240

            if (!$return) $return = $input;

241

242

        // The output is UTF-8 by default, other output formats need conversion here

243

        // If one time encoding is given, use this, else the objects property

244

        switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {

245

        case 'utf8':

246

            return $return;

247

            break;

248

        case 'ucs4_string':

249

           return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));

250

           break;

251

        case 'ucs4_array':

252

            return $this->_utf8_to_ucs4($return);

253

            break;

254

        default:

255

            $this->_error('Unsupported output format');

256

            return false;

/**

261

     * Encode a given UTF-8 domain name

262

     * @param    string   Domain name (UTF-8 or UCS-4)

263

     * [@param    string   Desired input encoding, see {@link set_parameter}]

264

     * @return   string   Encoded Domain name (ACE string)

265

     * @access   public

266

*/

267

    function encode($decoded, $one_time_encoding = false)

268

269

        // Forcing conversion of input to UCS4 array

270

        // If one time encoding is given, use this, else the objects property

271

        switch ($one_time_encoding ? $one_time_encoding : $this->_api_encoding) {

272

        case 'utf8':

273

            $decoded = $this->_utf8_to_ucs4($decoded);

274

            break;

275

        case 'ucs4_string':

276

           $decoded = $this->_ucs4_string_to_ucs4($decoded);

277

        case 'ucs4_array':

278

           break;

279

        default:

280

            $this->_error('Unsupported input format: '.($one_time_encoding ? $one_time_encoding : $this->_api_encoding));

281

            return false;

282

283

284

        // No input, no output, what else did you expect?

285

        if (empty($decoded)) return '';

286

287

        // Anchors for iteration

288

        $last_begin = 0;

289

        // Output string

290

        $output = '';

291

        foreach ($decoded as $k => $v) {

292

            // Make sure to use just the plain dot

293

            switch($v) {

294

            case 0x3002:

295

            case 0xFF0E:

296

            case 0xFF61:

297

                $decoded[$k] = 0x2E;

298

                // Right, no break here, the above are converted to dots anyway

299

            // Stumbling across an anchoring character

300

            case 0x2E:

301

            case 0x2F:

302

            case 0x3A:

303

            case 0x3F:

304

            case 0x40:

305

                // Neither email addresses nor URLs allowed in strict mode

306

                if ($this->_strict_mode) {

307

                   $this->_error('Neither email addresses nor URLs are allowed in strict mode.');

308

                   return false;

309

                } else {

310

                    // Skip first char

311

                    if ($k) {

312

                        $encoded = '';

313

                        $encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));

314

                        if ($encoded) {

315

                            $output .= $encoded;

316

                        } else {

317

                            $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));

318

319

                        $output .= chr($decoded[$k]);

320

321

                    $last_begin = $k + 1;

        // Catch the rest of the string

326

        if ($last_begin) {

327

            $inp_len = sizeof($decoded);

328

            $encoded = '';

329

            $encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));

330

            if ($encoded) {

331

                $output .= $encoded;

332

            } else {

333

                $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));

334

335

            return $output;

336

        } else {

337

            if ($output = $this->_encode($decoded)) {

338

                return $output;

339

            } else {

340

                return $this->_ucs4_to_utf8($decoded);

/**

346

     * Use this method to get the last error ocurred

347

     * @param    void

348

     * @return   string   The last error, that occured

349

     * @access   public

350

*/

351

    function get_last_error()

352

353

        return $this->_error;

354

355

356

/**

357

     * The actual decoding algorithm

358

     * @access   private

359

*/

360

    function _decode($encoded)

361

362

        // We do need to find the Punycode prefix

363

        if (!preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $encoded)) {

364

            $this->_error('This is not a punycode string');

365

            return false;

366

367

        $encode_test = preg_replace('!^'.preg_quote($this->_punycode_prefix, '!').'!', '', $encoded);

368

        // If nothing left after removing the prefix, it is hopeless

369

        if (!$encode_test) {

370

            $this->_error('The given encoded string was empty');

371

            return false;

372

373

        // Find last occurence of the delimiter

374

        $delim_pos = strrpos($encoded, '-');

375

        if ($delim_pos > strlen($this->_punycode_prefix)) {

376

            for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) {

377

                $decoded[] = ord($encoded{$k});

378

379

        } else {

380

            $decoded = array();

381

382

        $deco_len = count($decoded);

383

        $enco_len = strlen($encoded);

384

385

        // Wandering through the strings; init

386

        $is_first = true;

387

        $bias     = $this->_initial_bias;

388

        $idx      = 0;

389

        $char     = $this->_initial_n;

390

391

        for ($enco_idx = ($delim_pos) ? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {

392

            for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) {

393

                $digit = $this->_decode_digit($encoded{$enco_idx++});

394

                $idx += $digit * $w;

395

                $t = ($k <= $bias) ? $this->_tmin :

396

                        (($k >= $bias + $this->_tmax) ? $this->_tmax : ($k - $bias));

397

                if ($digit < $t) break;

398

                $w = (int) ($w * ($this->_base - $t));

399

400

            $bias = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first);

401

            $is_first = false;

402

            $char += (int) ($idx / ($deco_len + 1));

403

            $idx %= ($deco_len + 1);

404

            if ($deco_len > 0) {

405

                // Make room for the decoded char

406

                for ($i = $deco_len; $i > $idx; $i--) {

407

                    $decoded[$i] = $decoded[($i - 1)];

408

409

410

            $decoded[$idx++] = $char;

411

412

        return $this->_ucs4_to_utf8($decoded);

413

414

415

/**

416

     * The actual encoding algorithm

417

     * @access   private

418

*/

419

    function _encode($decoded)

420

421

        // We cannot encode a domain name containing the Punycode prefix

422

        $extract = strlen($this->_punycode_prefix);

423

        $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);

424

        $check_deco = array_slice($decoded, 0, $extract);

425

426

        if ($check_pref == $check_deco) {

427

            $this->_error('This is already a punycode string');

428

            return false;

429

430

        // We will not try to encode strings consisting of basic code points only

431

        $encodable = false;

432

        foreach ($decoded as $k => $v) {

433

            if ($v > 0x7a) {

434

                $encodable = true;

435

                break;

436

437

438

        if (!$encodable) {

439

            $this->_error('The given string does not contain encodable chars');

440

            return false;

441

442

443

        // Do NAMEPREP

444

        $decoded = $this->_nameprep($decoded);

445

        if (!$decoded || !is_array($decoded)) return false; // NAMEPREP failed

446

447

        $deco_len  = count($decoded);

448

        if (!$deco_len) return false; // Empty array

449

450

        $codecount = 0; // How many chars have been consumed

451

452

        $encoded = '';

453

        // Copy all basic code points to output

454

        for ($i = 0; $i < $deco_len; ++$i) {

455

            $test = $decoded[$i];

456

            // Will match [-0-9a-zA-Z]

457

            if ((0x2F < $test && $test < 0x40) || (0x40 < $test && $test < 0x5B)

458

                    || (0x60 < $test && $test <= 0x7B) || (0x2D == $test)) {

459

                $encoded .= chr($decoded[$i]);

460

                $codecount++;

461

462

463

        if ($codecount == $deco_len) return $encoded; // All codepoints were basic ones

464

465

        // Start with the prefix; copy it to output

466

        $encoded = $this->_punycode_prefix.$encoded;

467

468

        // If we have basic code points in output, add an hyphen to the end

469

        if ($codecount) $encoded .= '-';

470

471

        // Now find and encode all non-basic code points

472

        $is_first  = true;

473

        $cur_code  = $this->_initial_n;

474

        $bias      = $this->_initial_bias;

475

        $delta     = 0;

476

        while ($codecount < $deco_len) {

477

            // Find the smallest code point >= the current code point and

478

            // remember the last ouccrence of it in the input

479

            for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {

480

                if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {

481

                    $next_code = $decoded[$i];

            $delta += ($next_code - $cur_code) * ($codecount + 1);

486

            $cur_code = $next_code;

487

488

            // Scan input again and encode all characters whose code point is $cur_code

489

            for ($i = 0; $i < $deco_len; $i++) {

490

                if ($decoded[$i] < $cur_code) {

491

                    $delta++;

492

                } elseif ($decoded[$i] == $cur_code) {

493

                    for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) {

494

                        $t = ($k <= $bias) ? $this->_tmin :

495

                                (($k >= $bias + $this->_tmax) ? $this->_tmax : $k - $bias);

496

                        if ($q < $t) break;

497

                        $encoded .= $this->_encode_digit(intval($t + (($q - $t) % ($this->_base - $t)))); //v0.4.5 Changed from ceil() to intval()

498

                        $q = (int) (($q - $t) / ($this->_base - $t));

499

500

                    $encoded .= $this->_encode_digit($q);

501

                    $bias = $this->_adapt($delta, $codecount+1, $is_first);

502

                    $codecount++;

503

                    $delta = 0;

504

                    $is_first = false;

505

506

507

            $delta++;

508

            $cur_code++;

509

510

        return $encoded;

511

512

513

/**

514

     * Adapt the bias according to the current code point and position

515

     * @access   private

516

*/

517

    function _adapt($delta, $npoints, $is_first)

518

519

        $delta = intval($is_first ? ($delta / $this->_damp) : ($delta / 2));

520

        $delta += intval($delta / $npoints);

521

        for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) {

522

            $delta = intval($delta / ($this->_base - $this->_tmin));

523

524

        return intval($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));

525

526

527

/**

528

     * Encoding a certain digit

529

     * @access   private

530

*/

531

    function _encode_digit($d)

532

533

        return chr($d + 22 + 75 * ($d < 26));

534

535

536

/**

537

     * Decode a certain digit

538

     * @access   private

539

*/

540

    function _decode_digit($cp)

541

542

        $cp = ord($cp);

543

        return ($cp - 48 < 10) ? $cp - 22 : (($cp - 65 < 26) ? $cp - 65 : (($cp - 97 < 26) ? $cp - 97 : $this->_base));

544

545

546

/**

547

     * Internal error handling method

548

     * @access   private

549

*/

550

    function _error($error = '')

551

552

        $this->_error = $error;

553

554

555

/**

556

     * Do Nameprep according to RFC3491 and RFC3454

557

     * @param    array    Unicode Characters

558

     * @return   string   Unicode Characters, Nameprep'd

559

     * @access   private

560

*/

561

    function _nameprep($input)

562

563

        $output = array();

564

        $error = false;

565

//

566

        // Mapping

567

        // Walking through the input array, performing the required steps on each of

568

        // the input chars and putting the result into the output array

569

        // While mapping required chars we apply the cannonical ordering

570

        foreach ($input as $v) {

571

            // Map to nothing == skip that code point

572

            if (in_array($v, $this->NP['map_nothing'])) continue;

573

574

            // Try to find prohibited input

575

            if (in_array($v, $this->NP['prohibit']) || in_array($v, $this->NP['general_prohibited'])) {

576

                $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));

577

                return false;

578

579

            foreach ($this->NP['prohibit_ranges'] as $range) {

580

                if ($range[0] <= $v && $v <= $range[1]) {

581

                    $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));

582

                    return false;

583

584

585

//

586

            // Hangul syllable decomposition

587

            if (0xAC00 <= $v && $v <= 0xD7AF) {

588

                foreach ($this->_hangul_decompose($v) as $out) {

589

                    $output[] = (int) $out;

590

591

            // There's a decomposition mapping for that code point

592

            } elseif (isset($this->NP['replacemaps'][$v])) {

593

                foreach ($this->_apply_cannonical_ordering($this->NP['replacemaps'][$v]) as $out) {

594

                    $output[] = (int) $out;

595

596

            } else {

597

                $output[] = (int) $v;

598

599

600

        // Before applying any Combining, try to rearrange any Hangul syllables

601

        $output = $this->_hangul_compose($output);

602

//

603

        // Combine code points

604

//

605

        $last_class   = 0;

606

        $last_starter = 0;

607

        $out_len      = count($output);

608

        for ($i = 0; $i < $out_len; ++$i) {

609

            $class = $this->_get_combining_class($output[$i]);

610

            if ((!$last_class || $last_class > $class) && $class) {

611

                // Try to match

612

                $seq_len = $i - $last_starter;

613

                $out = $this->_combine(array_slice($output, $last_starter, $seq_len));

614

                // On match: Replace the last starter with the composed character and remove

615

                // the now redundant non-starter(s)

616

                if ($out) {

617

                    $output[$last_starter] = $out;

618

                    if (count($out) != $seq_len) {

619

                        for ($j = $i+1; $j < $out_len; ++$j) {

620

                            $output[$j-1] = $output[$j];

621

622

                        unset($output[$out_len]);

623

624

                    // Rewind the for loop by one, since there can be more possible compositions

625

                    $i--;

626

                    $out_len--;

627

                    $last_class = ($i == $last_starter) ? 0 : $this->_get_combining_class($output[$i-1]);

628

                    continue;

629

630

631

            // The current class is 0

632

            if (!$class) $last_starter = $i;

633

            $last_class = $class;

634

635

        return $output;

636

637

638

/**

639

     * Decomposes a Hangul syllable

640

     * (see http://www.unicode.org/unicode/reports/tr15/#Hangul

641

     * @param    integer  32bit UCS4 code point

642

     * @return   array    Either Hangul Syllable decomposed or original 32bit value as one value array

643

     * @access   private

644

*/

645

    function _hangul_decompose($char)

646

647

        $sindex = (int) $char - $this->_sbase;

648

        if ($sindex < 0 || $sindex >= $this->_scount) {

649

            return array($char);

650

651

        $result = array();

652

        $result[] = (int) $this->_lbase + $sindex / $this->_ncount;

653

        $result[] = (int) $this->_vbase + ($sindex % $this->_ncount) / $this->_tcount;

654

        $T = intval($this->_tbase + $sindex % $this->_tcount);

655

        if ($T != $this->_tbase) $result[] = $T;

656

        return $result;

657

658

/**

659

     * Ccomposes a Hangul syllable

660

     * (see http://www.unicode.org/unicode/reports/tr15/#Hangul

661

     * @param    array    Decomposed UCS4 sequence

662

     * @return   array    UCS4 sequence with syllables composed

663

     * @access   private

664

*/

665

    function _hangul_compose($input)

666

667

        $inp_len = count($input);

668

        if (!$inp_len) return array();

669

        $result = array();

670

        $last = (int) $input[0];

671

        $result[] = $last; // copy first char from input to output

672

673

        for ($i = 1; $i < $inp_len; ++$i) {

674

            $char = (int) $input[$i];

675

            $sindex = $last - $this->_sbase;

676

            $lindex = $last - $this->_lbase;

677

            $vindex = $char - $this->_vbase;

678

            $tindex = $char - $this->_tbase;

679

            // Find out, whether two current characters are LV and T

680

            if (0 <= $sindex && $sindex < $this->_scount && ($sindex % $this->_tcount == 0)

681

                    && 0 <= $tindex && $tindex <= $this->_tcount) {

682

                // create syllable of form LVT

683

                $last += $tindex;

684

                $result[(count($result) - 1)] = $last; // reset last

685

                continue; // discard char

686

687

            // Find out, whether two current characters form L and V

688

            if (0 <= $lindex && $lindex < $this->_lcount && 0 <= $vindex && $vindex < $this->_vcount) {

689

                // create syllable of form LV

690

                $last = (int) $this->_sbase + ($lindex * $this->_vcount + $vindex) * $this->_tcount;

691

                $result[(count($result) - 1)] = $last; // reset last

692

                continue; // discard char

693

694

            // if neither case was true, just add the character

695

            $last = $char;

696

            $result[] = $char;

697

698

        return $result;

699

700

701

/**

702

     * Returns the combining class of a certain wide char

703

     * @param    integer    Wide char to check (32bit integer)

704

     * @return   integer    Combining class if found, else 0

705

     * @access   private

706

*/

707

    function _get_combining_class($char)

708

709

        return isset($this->NP['norm_combcls'][$char]) ? $this->NP['norm_combcls'][$char] : 0;

710

711

712

/**

713

     * Apllies the cannonical ordering of a decomposed UCS4 sequence

714

     * @param    array      Decomposed UCS4 sequence

715

     * @return   array      Ordered USC4 sequence

716

     * @access   private

717

*/

718

    function _apply_cannonical_ordering($input)

719

720

        $swap = true;

721

        $size = count($input);

722

        while ($swap) {

723

            $swap = false;

724

            $last = $this->_get_combining_class(intval($input[0]));

725

            for ($i = 0; $i < $size-1; ++$i) {

726

                $next = $this->_get_combining_class(intval($input[$i+1]));

727

                if ($next != 0 && $last > $next) {

728

                    // Move item leftward until it fits

729

                    for ($j = $i + 1; $j > 0; --$j) {

730

                        if ($this->_get_combining_class(intval($input[$j-1])) <= $next) break;

731

                        $t = intval($input[$j]);

732

                        $input[$j] = intval($input[$j-1]);

733

                        $input[$j-1] = $t;

734

                        $swap = true;

735

736

                    // Reentering the loop looking at the old character again

737

                    $next = $last;

738

739

                $last = $next;

740

741

742

        return $input;

743

744

745

/**

746

     * Do composition of a sequence of starter and non-starter

747

     * @param    array      UCS4 Decomposed sequence

748

     * @return   array      Ordered USC4 sequence

749

     * @access   private

750

*/

751

    function _combine($input)

752

753

        $inp_len = count($input);

754

        foreach ($this->NP['replacemaps'] as $np_src => $np_target) {

755

            if ($np_target[0] != $input[0]) continue;

756

            if (count($np_target) != $inp_len) continue;

757

            $hit = false;

758

            foreach ($input as $k2 => $v2) {

759

                if ($v2 == $np_target[$k2]) {

760

                    $hit = true;

761

                } else {

762

                    $hit = false;

763

                    break;

764

765

766

            if ($hit) return $np_src;

767

768

        return false;

769

770

771

/**

772

     * This converts an UTF-8 encoded string to its UCS-4 representation

773

     * By talking about UCS-4 "strings" we mean arrays of 32bit integers representing

774

     * each of the "chars". This is due to PHP not being able to handle strings with

775

     * bit depth different from 8. This apllies to the reverse method _ucs4_to_utf8(), too.

776

     * The following UTF-8 encodings are supported:

777

     * bytes bits  representation

778

     * 1        7  0xxxxxxx

779

     * 2       11  110xxxxx 10xxxxxx

780

     * 3       16  1110xxxx 10xxxxxx 10xxxxxx

781

     * 4       21  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

782

     * 5       26  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

783

     * 6       31  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

784

     * Each x represents a bit that can be used to store character data.

785

     * The five and six byte sequences are part of Annex D of ISO/IEC 10646-1:2000

786

     * @access   private

787

*/

788

    function _utf8_to_ucs4($input)

789

790

        $output = array();

791

        $out_len = 0;

792

        $inp_len = strlen($input);

793

        $mode = 'next';

794

        $test = 'none';

795

        for ($k = 0; $k < $inp_len; ++$k) {

796

            $v = ord($input{$k}); // Extract byte from input string

797

798

            if ($v < 128) { // We found an ASCII char - put into stirng as is

799

                $output[$out_len] = $v;

800

                ++$out_len;

801

                if ('add' == $mode) {

802

                    $this->_error('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);

803

                    return false;

804

805

                continue;

806

807

            if ('next' == $mode) { // Try to find the next start byte; determine the width of the Unicode char

808

                $start_byte = $v;

809

                $mode = 'add';

810

                $test = 'range';

811

                if ($v >> 5 == 6) { // &110xxxxx 10xxxxx

812

                    $next_byte = 0; // Tells, how many times subsequent bitmasks must rotate 6bits to the left

813

                    $v = ($v - 192) << 6;

814

                } elseif ($v >> 4 == 14) { // &1110xxxx 10xxxxxx 10xxxxxx

815

                    $next_byte = 1;

816

                    $v = ($v - 224) << 12;

817

                } elseif ($v >> 3 == 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

818

                    $next_byte = 2;

819

                    $v = ($v - 240) << 18;

820

                } elseif ($v >> 2 == 62) { // &111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

821

                    $next_byte = 3;

822

                    $v = ($v - 248) << 24;

823

                } elseif ($v >> 1 == 126) { // &1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

824

                    $next_byte = 4;

825

                    $v = ($v - 252) << 30;

826

                } else {

827

                    $this->_error('This might be UTF-8, but I don\'t understand it at byte '.$k);

828

                    return false;

829

830

                if ('add' == $mode) {

831

                    $output[$out_len] = (int) $v;

832

                    ++$out_len;

833

                    continue;

834

835

836

            if ('add' == $mode) {

837

                if (!$this->_allow_overlong && $test == 'range') {

838

                    $test = 'none';

839

                    if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) {

840

                        $this->_error('Bogus UTF-8 character detected (out of legal range) at byte '.$k);

841

                        return false;

842

843

844

                if ($v >> 6 == 2) { // Bit mask must be 10xxxxxx

845

                    $v = ($v - 128) << ($next_byte * 6);

846

                    $output[($out_len - 1)] += $v;

847

                    --$next_byte;

848

                } else {

849

                    $this->_error('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);

850

                    return false;

851

852

                if ($next_byte < 0) {

853

                    $mode = 'next';

854

855

856

        } // for

857

        return $output;

858

859

860

/**

861

     * Convert UCS-4 string into UTF-8 string

862

     * See _utf8_to_ucs4() for details

863

     * @access   private

864

*/

865

    function _ucs4_to_utf8($input)

866

867

        $output = '';

868

        $k = 0;

869

        foreach ($input as $v) {

870

            ++$k;

871

            // $v = ord($v);

872

            if ($v < 128) { // 7bit are transferred literally

873

                $output .= chr($v);

874

            } elseif ($v < (1 << 11)) { // 2 bytes

875

                $output .= chr(192 + ($v >> 6)) . chr(128 + ($v & 63));

876

            } elseif ($v < (1 << 16)) { // 3 bytes

877

                $output .= chr(224 + ($v >> 12)) . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63));

878

            } elseif ($v < (1 << 21)) { // 4 bytes

879

                $output .= chr(240 + ($v >> 18)) . chr(128 + (($v >> 12) & 63))

880

                         . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63));

881

            } elseif ($v < (1 << 26)) { // 5 bytes

882

                $output .= chr(248 + ($v >> 24)) . chr(128 + (($v >> 18) & 63))

883

                         . chr(128 + (($v >> 12) & 63)) . chr(128 + (($v >> 6) & 63))

884

                         . chr(128 + ($v & 63));

885

            } elseif ($v < (1 << 31)) { // 6 bytes

886

                $output .= chr(252 + ($v >> 30)) . chr(128 + (($v >> 24) & 63))

887

                         . chr(128 + (($v >> 18) & 63)) . chr(128 + (($v >> 12) & 63))

888

                         . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63));

889

            } else {

890

                $this->_error('Conversion from UCS-4 to UTF-8 failed: malformed input at byte '.$k);

891

                return false;

892

893

894

        return $output;

895

896

897

/**

898

      * Convert UCS-4 array into UCS-4 string

899

900

      * @access   private

901

*/

902

    function _ucs4_to_ucs4_string($input)

903

904

        $output = '';

905

        // Take array values and split output to 4 bytes per value

906

        // The bit mask is 255, which reads &11111111

907

        foreach ($input as $v) {

908

            $output .= chr(($v >> 24) & 255).chr(($v >> 16) & 255).chr(($v >> 8) & 255).chr($v & 255);

909

910

        return $output;

911

912

913

/**

914

      * Convert UCS-4 strin into UCS-4 garray

915

916

      * @access   private

917

*/

918

    function _ucs4_string_to_ucs4($input)

919

920

        $output = array();

921

        $inp_len = strlen($input);

922

        // Input length must be dividable by 4

923

        if ($inp_len % 4) {

924

            $this->_error('Input UCS4 string is broken');

925

            return false;

926

927

        // Empty input - return empty output

928

        if (!$inp_len) return $output;

929

        for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) {

930

            // Increment output position every 4 input bytes

931

            if (!($i % 4)) {

932

                $out_len++;

933

                $output[$out_len] = 0;

934

935

            $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) );

936

937

        return $output;

/**

942

* Adapter class for aligning the API of idna_convert with that of Net_IDNA

943

* @author  Matthias Sommerfeld <mso@phlylabs.de>

944

*/

945

class Net_IDNA_php4 extends idna_convert

946

947

/**

948

     * Sets a new option value. Available options and values:

949

     * [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8,

950

     *         'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8]

951

     * [overlong - Unicode does not allow unnecessarily long encodings of chars,

952

     *             to allow this, set this parameter to true, else to false;

953

     *             default is false.]

954

     * [strict - true: strict mode, good for registration purposes - Causes errors

955

     *           on failures; false: loose mode, ideal for "wildlife" applications

956

     *           by silently ignoring errors and returning the original input instead

957

958

     * @param    mixed     Parameter to set (string: single parameter; array of Parameter => Value pairs)

959

     * @param    string    Value to use (if parameter 1 is a string)

960

     * @return   boolean   true on success, false otherwise

961

     * @access   public

962

*/

963

    function setParams($option, $param = false)

964

965

        return $this->IC->set_parameters($option, $param);

?>

Projet

Général

Profil

Club Drupal

root / drupal7 / sites / all / libraries / simplepie-1.3.1 / idn / idna_convert.class.php @ 7295e063