root / htmltest / sites / all / libraries / fpdi-version / pdf_parser.php @ 5fc58a68
1 | 85ad3d82 | Assos Assos | <?php
|
---|---|---|---|
2 | //
|
||
3 | // FPDI - Version 1.4.4
|
||
4 | //
|
||
5 | // Copyright 2004-2013 Setasign - Jan Slabon
|
||
6 | //
|
||
7 | // Licensed under the Apache License, Version 2.0 (the "License");
|
||
8 | // you may not use this file except in compliance with the License.
|
||
9 | // You may obtain a copy of the License at
|
||
10 | //
|
||
11 | // http://www.apache.org/licenses/LICENSE-2.0
|
||
12 | //
|
||
13 | // Unless required by applicable law or agreed to in writing, software
|
||
14 | // distributed under the License is distributed on an "AS IS" BASIS,
|
||
15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
16 | // See the License for the specific language governing permissions and
|
||
17 | // limitations under the License.
|
||
18 | //
|
||
19 | |||
20 | if (!defined ('PDF_TYPE_NULL')) |
||
21 | define ('PDF_TYPE_NULL', 0); |
||
22 | if (!defined ('PDF_TYPE_NUMERIC')) |
||
23 | define ('PDF_TYPE_NUMERIC', 1); |
||
24 | if (!defined ('PDF_TYPE_TOKEN')) |
||
25 | define ('PDF_TYPE_TOKEN', 2); |
||
26 | if (!defined ('PDF_TYPE_HEX')) |
||
27 | define ('PDF_TYPE_HEX', 3); |
||
28 | if (!defined ('PDF_TYPE_STRING')) |
||
29 | define ('PDF_TYPE_STRING', 4); |
||
30 | if (!defined ('PDF_TYPE_DICTIONARY')) |
||
31 | define ('PDF_TYPE_DICTIONARY', 5); |
||
32 | if (!defined ('PDF_TYPE_ARRAY')) |
||
33 | define ('PDF_TYPE_ARRAY', 6); |
||
34 | if (!defined ('PDF_TYPE_OBJDEC')) |
||
35 | define ('PDF_TYPE_OBJDEC', 7); |
||
36 | if (!defined ('PDF_TYPE_OBJREF')) |
||
37 | define ('PDF_TYPE_OBJREF', 8); |
||
38 | if (!defined ('PDF_TYPE_OBJECT')) |
||
39 | define ('PDF_TYPE_OBJECT', 9); |
||
40 | if (!defined ('PDF_TYPE_STREAM')) |
||
41 | define ('PDF_TYPE_STREAM', 10); |
||
42 | if (!defined ('PDF_TYPE_BOOLEAN')) |
||
43 | define ('PDF_TYPE_BOOLEAN', 11); |
||
44 | if (!defined ('PDF_TYPE_REAL')) |
||
45 | define ('PDF_TYPE_REAL', 12); |
||
46 | |||
47 | require_once('pdf_context.php'); |
||
48 | |||
49 | if (!class_exists('pdf_parser', false)) { |
||
50 | |||
51 | class pdf_parser { |
||
52 | |||
53 | /**
|
||
54 | * Filename
|
||
55 | * @var string
|
||
56 | */
|
||
57 | var $filename; |
||
58 | |||
59 | /**
|
||
60 | * File resource
|
||
61 | * @var resource
|
||
62 | */
|
||
63 | var $f; |
||
64 | |||
65 | /**
|
||
66 | * PDF Context
|
||
67 | * @var object pdf_context-Instance
|
||
68 | */
|
||
69 | var $c; |
||
70 | |||
71 | /**
|
||
72 | * xref-Data
|
||
73 | * @var array
|
||
74 | */
|
||
75 | var $xref; |
||
76 | |||
77 | /**
|
||
78 | * root-Object
|
||
79 | * @var array
|
||
80 | */
|
||
81 | var $root; |
||
82 | |||
83 | /**
|
||
84 | * PDF version of the loaded document
|
||
85 | * @var string
|
||
86 | */
|
||
87 | var $pdfVersion; |
||
88 | |||
89 | /**
|
||
90 | * For reading encrypted documents and xref/objectstreams are in use
|
||
91 | *
|
||
92 | * @var boolean
|
||
93 | */
|
||
94 | var $readPlain = true; |
||
95 | |||
96 | /**
|
||
97 | * Constructor
|
||
98 | *
|
||
99 | * @param string $filename Source-Filename
|
||
100 | */
|
||
101 | function pdf_parser($filename) { |
||
102 | $this->filename = $filename; |
||
103 | |||
104 | $this->f = @fopen($this->filename, 'rb'); |
||
105 | |||
106 | if (!$this->f) |
||
107 | $this->error(sprintf('Cannot open %s !', $filename)); |
||
108 | |||
109 | $this->getPDFVersion();
|
||
110 | |||
111 | $this->c = new pdf_context($this->f); |
||
112 | |||
113 | // Read xref-Data
|
||
114 | $this->xref = array(); |
||
115 | $this->pdf_read_xref($this->xref, $this->pdf_find_xref()); |
||
116 | |||
117 | // Check for Encryption
|
||
118 | $this->getEncryption();
|
||
119 | |||
120 | // Read root
|
||
121 | $this->pdf_read_root();
|
||
122 | } |
||
123 | |||
124 | /**
|
||
125 | * Close the opened file
|
||
126 | */
|
||
127 | function closeFile() { |
||
128 | if (isset($this->f) && is_resource($this->f)) { |
||
129 | fclose($this->f); |
||
130 | unset($this->f); |
||
131 | } |
||
132 | } |
||
133 | |||
134 | /**
|
||
135 | * Print Error and die
|
||
136 | *
|
||
137 | * @param string $msg Error-Message
|
||
138 | */
|
||
139 | function error($msg) { |
||
140 | die('<b>PDF-Parser Error:</b> ' . $msg); |
||
141 | } |
||
142 | |||
143 | /**
|
||
144 | * Check Trailer for Encryption
|
||
145 | */
|
||
146 | function getEncryption() { |
||
147 | if (isset($this->xref['trailer'][1]['/Encrypt'])) { |
||
148 | $this->error('File is encrypted!'); |
||
149 | } |
||
150 | } |
||
151 | |||
152 | /**
|
||
153 | * Find/Return /Root
|
||
154 | *
|
||
155 | * @return array
|
||
156 | */
|
||
157 | function pdf_find_root() { |
||
158 | if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) { |
||
159 | $this->error('Wrong Type of Root-Element! Must be an indirect reference'); |
||
160 | } |
||
161 | |||
162 | return $this->xref['trailer'][1]['/Root']; |
||
163 | } |
||
164 | |||
165 | /**
|
||
166 | * Read the /Root
|
||
167 | */
|
||
168 | function pdf_read_root() { |
||
169 | // read root
|
||
170 | $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root()); |
||
171 | } |
||
172 | |||
173 | /**
|
||
174 | * Get PDF-Version
|
||
175 | *
|
||
176 | * And reset the PDF Version used in FPDI if needed
|
||
177 | */
|
||
178 | function getPDFVersion() { |
||
179 | fseek($this->f, 0); |
||
180 | preg_match('/\d\.\d/',fread($this->f, 16), $m); |
||
181 | if (isset($m[0])) |
||
182 | $this->pdfVersion = $m[0]; |
||
183 | return $this->pdfVersion; |
||
184 | } |
||
185 | |||
186 | /**
|
||
187 | * Find the xref-Table
|
||
188 | */
|
||
189 | function pdf_find_xref() { |
||
190 | $toRead = 1500; |
||
191 | |||
192 | $stat = fseek ($this->f, -$toRead, SEEK_END); |
||
193 | if ($stat === -1) { |
||
194 | fseek ($this->f, 0); |
||
195 | } |
||
196 | $data = fread($this->f, $toRead); |
||
197 | |||
198 | $pos = strlen($data) - strpos(strrev($data), strrev('startxref')); |
||
199 | $data = substr($data, $pos); |
||
200 | |||
201 | if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) { |
||
202 | $this->error('Unable to find pointer to xref table'); |
||
203 | } |
||
204 | |||
205 | return (int) $matches[1]; |
||
206 | } |
||
207 | |||
208 | /**
|
||
209 | * Read xref-table
|
||
210 | *
|
||
211 | * @param array $result Array of xref-table
|
||
212 | * @param integer $offset of xref-table
|
||
213 | */
|
||
214 | function pdf_read_xref(&$result, $offset) { |
||
215 | $o_pos = $offset-min(20, $offset); |
||
216 | fseek($this->f, $o_pos); // set some bytes backwards to fetch errorious docs |
||
217 | |||
218 | $data = fread($this->f, 100); |
||
219 | |||
220 | $xrefPos = strrpos($data, 'xref'); |
||
221 | |||
222 | if ($xrefPos === false) { |
||
223 | fseek($this->f, $offset); |
||
224 | $c = new pdf_context($this->f); |
||
225 | $xrefStreamObjDec = $this->pdf_read_value($c); |
||
226 | |||
227 | if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == PDF_TYPE_OBJDEC) { |
||
228 | $this->error(sprintf('This document (%s) probably uses a compression technique which is not supported by the free parser shipped with FPDI.', $this->filename)); |
||
229 | } else {
|
||
230 | $this->error('Unable to find xref table.'); |
||
231 | } |
||
232 | } |
||
233 | |||
234 | if (!isset($result['xref_location'])) { |
||
235 | $result['xref_location'] = $o_pos + $xrefPos; |
||
236 | $result['max_object'] = 0; |
||
237 | } |
||
238 | |||
239 | $cylces = -1; |
||
240 | $bytesPerCycle = 100; |
||
241 | |||
242 | fseek($this->f, $o_pos = $o_pos + $xrefPos + 4); // set the handle directly after the "xref"-keyword |
||
243 | $data = fread($this->f, $bytesPerCycle); |
||
244 | |||
245 | while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cylces++, 0))) === false && !feof($this->f)) { |
||
246 | $data .= fread($this->f, $bytesPerCycle); |
||
247 | } |
||
248 | |||
249 | if ($trailerPos === false) { |
||
250 | $this->error('Trailer keyword not found after xref table'); |
||
251 | } |
||
252 | |||
253 | $data = substr($data, 0, $trailerPos); |
||
254 | |||
255 | // get Line-Ending
|
||
256 | preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks |
||
257 | |||
258 | $differentLineEndings = count(array_unique($m[0])); |
||
259 | if ($differentLineEndings > 1) { |
||
260 | $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY); |
||
261 | } else {
|
||
262 | $lines = explode($m[0][1], $data); |
||
263 | } |
||
264 | |||
265 | $data = $differentLineEndings = $m = null; |
||
266 | unset($data, $differentLineEndings, $m); |
||
267 | |||
268 | $linesCount = count($lines); |
||
269 | |||
270 | $start = 1; |
||
271 | |||
272 | for ($i = 0; $i < $linesCount; $i++) { |
||
273 | $line = trim($lines[$i]); |
||
274 | if ($line) { |
||
275 | $pieces = explode(' ', $line); |
||
276 | $c = count($pieces); |
||
277 | switch($c) { |
||
278 | case 2: |
||
279 | $start = (int)$pieces[0]; |
||
280 | $end = $start + (int)$pieces[1]; |
||
281 | if ($end > $result['max_object']) |
||
282 | $result['max_object'] = $end; |
||
283 | break;
|
||
284 | case 3: |
||
285 | if (!isset($result['xref'][$start])) |
||
286 | $result['xref'][$start] = array(); |
||
287 | |||
288 | if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) { |
||
289 | $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null; |
||
290 | } |
||
291 | $start++;
|
||
292 | break;
|
||
293 | default:
|
||
294 | $this->error('Unexpected data in xref table'); |
||
295 | } |
||
296 | } |
||
297 | } |
||
298 | |||
299 | $lines = $pieces = $line = $start = $end = $gen = null; |
||
300 | unset($lines, $pieces, $line, $start, $end, $gen); |
||
301 | |||
302 | fseek($this->f, $o_pos + $trailerPos + 7); |
||
303 | |||
304 | $c = new pdf_context($this->f); |
||
305 | $trailer = $this->pdf_read_value($c); |
||
306 | |||
307 | $c = null; |
||
308 | unset($c); |
||
309 | |||
310 | if (!isset($result['trailer'])) { |
||
311 | $result['trailer'] = $trailer; |
||
312 | } |
||
313 | |||
314 | if (isset($trailer[1]['/Prev'])) { |
||
315 | $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]); |
||
316 | } |
||
317 | |||
318 | $trailer = null; |
||
319 | unset($trailer); |
||
320 | |||
321 | return true; |
||
322 | } |
||
323 | |||
324 | /**
|
||
325 | * Reads an Value
|
||
326 | *
|
||
327 | * @param object $c pdf_context
|
||
328 | * @param string $token a Token
|
||
329 | * @return mixed
|
||
330 | */
|
||
331 | function pdf_read_value(&$c, $token = null) { |
||
332 | if (is_null($token)) { |
||
333 | $token = $this->pdf_read_token($c); |
||
334 | } |
||
335 | |||
336 | if ($token === false) { |
||
337 | return false; |
||
338 | } |
||
339 | |||
340 | switch ($token) { |
||
341 | case '<': |
||
342 | // This is a hex string.
|
||
343 | // Read the value, then the terminator
|
||
344 | |||
345 | $pos = $c->offset; |
||
346 | |||
347 | while(1) { |
||
348 | |||
349 | $match = strpos ($c->buffer, '>', $pos); |
||
350 | |||
351 | // If you can't find it, try
|
||
352 | // reading more data from the stream
|
||
353 | |||
354 | if ($match === false) { |
||
355 | if (!$c->increase_length()) { |
||
356 | return false; |
||
357 | } else {
|
||
358 | continue;
|
||
359 | } |
||
360 | } |
||
361 | |||
362 | $result = substr ($c->buffer, $c->offset, $match - $c->offset); |
||
363 | $c->offset = $match + 1; |
||
364 | |||
365 | return array (PDF_TYPE_HEX, $result); |
||
366 | } |
||
367 | |||
368 | break;
|
||
369 | case '<<': |
||
370 | // This is a dictionary.
|
||
371 | |||
372 | $result = array(); |
||
373 | |||
374 | // Recurse into this function until we reach
|
||
375 | // the end of the dictionary.
|
||
376 | while (($key = $this->pdf_read_token($c)) !== '>>') { |
||
377 | if ($key === false) { |
||
378 | return false; |
||
379 | } |
||
380 | |||
381 | if (($value = $this->pdf_read_value($c)) === false) { |
||
382 | return false; |
||
383 | } |
||
384 | |||
385 | // Catch missing value
|
||
386 | if ($value[0] == PDF_TYPE_TOKEN && $value[1] == '>>') { |
||
387 | $result[$key] = array(PDF_TYPE_NULL); |
||
388 | break;
|
||
389 | } |
||
390 | |||
391 | $result[$key] = $value; |
||
392 | } |
||
393 | |||
394 | return array (PDF_TYPE_DICTIONARY, $result); |
||
395 | |||
396 | case '[': |
||
397 | // This is an array.
|
||
398 | |||
399 | $result = array(); |
||
400 | |||
401 | // Recurse into this function until we reach
|
||
402 | // the end of the array.
|
||
403 | while (($token = $this->pdf_read_token($c)) !== ']') { |
||
404 | if ($token === false) { |
||
405 | return false; |
||
406 | } |
||
407 | |||
408 | if (($value = $this->pdf_read_value($c, $token)) === false) { |
||
409 | return false; |
||
410 | } |
||
411 | |||
412 | $result[] = $value; |
||
413 | } |
||
414 | |||
415 | return array (PDF_TYPE_ARRAY, $result); |
||
416 | |||
417 | case '(' : |
||
418 | // This is a string
|
||
419 | $pos = $c->offset; |
||
420 | |||
421 | $openBrackets = 1; |
||
422 | do {
|
||
423 | for (; $openBrackets != 0 && $pos < $c->length; $pos++) { |
||
424 | switch (ord($c->buffer[$pos])) { |
||
425 | case 0x28: // '(' |
||
426 | $openBrackets++;
|
||
427 | break;
|
||
428 | case 0x29: // ')' |
||
429 | $openBrackets--;
|
||
430 | break;
|
||
431 | case 0x5C: // backslash |
||
432 | $pos++;
|
||
433 | } |
||
434 | } |
||
435 | } while($openBrackets != 0 && $c->increase_length()); |
||
436 | |||
437 | $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1); |
||
438 | $c->offset = $pos; |
||
439 | |||
440 | return array (PDF_TYPE_STRING, $result); |
||
441 | |||
442 | case 'stream': |
||
443 | $o_pos = ftell($c->file)-strlen($c->buffer); |
||
444 | $o_offset = $c->offset; |
||
445 | |||
446 | $c->reset($startpos = $o_pos + $o_offset); |
||
447 | |||
448 | $e = 0; // ensure line breaks in front of the stream |
||
449 | if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13)) |
||
450 | $e++;
|
||
451 | if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10)) |
||
452 | $e++;
|
||
453 | |||
454 | if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) { |
||
455 | $tmp_c = new pdf_context($this->f); |
||
456 | $tmp_length = $this->pdf_resolve_object($tmp_c, $this->actual_obj[1][1]['/Length']); |
||
457 | $length = $tmp_length[1][1]; |
||
458 | } else {
|
||
459 | $length = $this->actual_obj[1][1]['/Length'][1]; |
||
460 | } |
||
461 | |||
462 | if ($length > 0) { |
||
463 | $c->reset($startpos + $e,$length); |
||
464 | $v = $c->buffer; |
||
465 | } else {
|
||
466 | $v = ''; |
||
467 | } |
||
468 | $c->reset($startpos + $e + $length + 9); // 9 = strlen("endstream") |
||
469 | |||
470 | return array(PDF_TYPE_STREAM, $v); |
||
471 | |||
472 | default :
|
||
473 | if (is_numeric ($token)) { |
||
474 | // A numeric token. Make sure that
|
||
475 | // it is not part of something else.
|
||
476 | if (($tok2 = $this->pdf_read_token ($c)) !== false) { |
||
477 | if (is_numeric ($tok2)) { |
||
478 | |||
479 | // Two numeric tokens in a row.
|
||
480 | // In this case, we're probably in
|
||
481 | // front of either an object reference
|
||
482 | // or an object specification.
|
||
483 | // Determine the case and return the data
|
||
484 | if (($tok3 = $this->pdf_read_token ($c)) !== false) { |
||
485 | switch ($tok3) { |
||
486 | case 'obj': |
||
487 | return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2); |
||
488 | case 'R': |
||
489 | return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2); |
||
490 | } |
||
491 | // If we get to this point, that numeric value up
|
||
492 | // there was just a numeric value. Push the extra
|
||
493 | // tokens back into the stack and return the value.
|
||
494 | array_push ($c->stack, $tok3); |
||
495 | } |
||
496 | } |
||
497 | |||
498 | array_push ($c->stack, $tok2); |
||
499 | } |
||
500 | |||
501 | if ($token === (string)((int)$token)) |
||
502 | return array (PDF_TYPE_NUMERIC, (int)$token); |
||
503 | else
|
||
504 | return array (PDF_TYPE_REAL, (float)$token); |
||
505 | } elseif ($token == 'true' || $token == 'false') { |
||
506 | return array (PDF_TYPE_BOOLEAN, $token == 'true'); |
||
507 | } elseif ($token == 'null') { |
||
508 | return array (PDF_TYPE_NULL); |
||
509 | } else {
|
||
510 | // Just a token. Return it.
|
||
511 | return array (PDF_TYPE_TOKEN, $token); |
||
512 | } |
||
513 | } |
||
514 | } |
||
515 | |||
516 | /**
|
||
517 | * Resolve an object
|
||
518 | *
|
||
519 | * @param object $c pdf_context
|
||
520 | * @param array $obj_spec The object-data
|
||
521 | * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para
|
||
522 | */
|
||
523 | function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) { |
||
524 | // Exit if we get invalid data
|
||
525 | if (!is_array($obj_spec)) { |
||
526 | $ret = false; |
||
527 | return $ret; |
||
528 | } |
||
529 | |||
530 | if ($obj_spec[0] == PDF_TYPE_OBJREF) { |
||
531 | |||
532 | // This is a reference, resolve it
|
||
533 | if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) { |
||
534 | |||
535 | // Save current file position
|
||
536 | // This is needed if you want to resolve
|
||
537 | // references while you're reading another object
|
||
538 | // (e.g.: if you need to determine the length
|
||
539 | // of a stream)
|
||
540 | |||
541 | $old_pos = ftell($c->file); |
||
542 | |||
543 | // Reposition the file pointer and
|
||
544 | // load the object header.
|
||
545 | |||
546 | $c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]); |
||
547 | |||
548 | $header = $this->pdf_read_value($c); |
||
549 | |||
550 | if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) { |
||
551 | $toSearchFor = $obj_spec[1] . ' ' . $obj_spec[2] . ' obj'; |
||
552 | if (preg_match('/' . $toSearchFor . '/', $c->buffer)) { |
||
553 | $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor); |
||
554 | // reset stack
|
||
555 | $c->stack = array(); |
||
556 | } else {
|
||
557 | $this->error("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location"); |
||
558 | } |
||
559 | } |
||
560 | |||
561 | // If we're being asked to store all the information
|
||
562 | // about the object, we add the object ID and generation
|
||
563 | // number for later use
|
||
564 | $result = array(); |
||
565 | $this->actual_obj =& $result; |
||
566 | if ($encapsulate) { |
||
567 | $result = array ( |
||
568 | PDF_TYPE_OBJECT,
|
||
569 | 'obj' => $obj_spec[1], |
||
570 | 'gen' => $obj_spec[2] |
||
571 | ); |
||
572 | } |
||
573 | |||
574 | // Now simply read the object data until
|
||
575 | // we encounter an end-of-object marker
|
||
576 | while(1) { |
||
577 | $value = $this->pdf_read_value($c); |
||
578 | if ($value === false || count($result) > 4) { |
||
579 | // in this case the parser coudn't find an endobj so we break here
|
||
580 | break;
|
||
581 | } |
||
582 | |||
583 | if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') { |
||
584 | break;
|
||
585 | } |
||
586 | |||
587 | $result[] = $value; |
||
588 | } |
||
589 | |||
590 | $c->reset($old_pos); |
||
591 | |||
592 | if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) { |
||
593 | $result[0] = PDF_TYPE_STREAM; |
||
594 | } |
||
595 | |||
596 | return $result; |
||
597 | } |
||
598 | } else {
|
||
599 | return $obj_spec; |
||
600 | } |
||
601 | } |
||
602 | |||
603 | |||
604 | |||
605 | /**
|
||
606 | * Reads a token from the file
|
||
607 | *
|
||
608 | * @param object $c pdf_context
|
||
609 | * @return mixed
|
||
610 | */
|
||
611 | function pdf_read_token(&$c) |
||
612 | { |
||
613 | // If there is a token available
|
||
614 | // on the stack, pop it out and
|
||
615 | // return it.
|
||
616 | |||
617 | if (count($c->stack)) { |
||
618 | return array_pop($c->stack); |
||
619 | } |
||
620 | |||
621 | // Strip away any whitespace
|
||
622 | |||
623 | do {
|
||
624 | if (!$c->ensure_content()) { |
||
625 | return false; |
||
626 | } |
||
627 | $c->offset += strspn($c->buffer, "\x20\x0A\x0C\x0D\x09\x00", $c->offset); |
||
628 | } while ($c->offset >= $c->length - 1); |
||
629 | |||
630 | // Get the first character in the stream
|
||
631 | |||
632 | $char = $c->buffer[$c->offset++]; |
||
633 | |||
634 | switch ($char) { |
||
635 | |||
636 | case '[': |
||
637 | case ']': |
||
638 | case '(': |
||
639 | case ')': |
||
640 | |||
641 | // This is either an array or literal string
|
||
642 | // delimiter, Return it
|
||
643 | |||
644 | return $char; |
||
645 | |||
646 | case '<': |
||
647 | case '>': |
||
648 | |||
649 | // This could either be a hex string or
|
||
650 | // dictionary delimiter. Determine the
|
||
651 | // appropriate case and return the token
|
||
652 | |||
653 | if ($c->buffer[$c->offset] == $char) { |
||
654 | if (!$c->ensure_content()) { |
||
655 | return false; |
||
656 | } |
||
657 | $c->offset++;
|
||
658 | return $char . $char; |
||
659 | } else {
|
||
660 | return $char; |
||
661 | } |
||
662 | |||
663 | case '%': |
||
664 | |||
665 | // This is a comment - jump over it!
|
||
666 | |||
667 | $pos = $c->offset; |
||
668 | while(1) { |
||
669 | $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos); |
||
670 | if ($match === 0) { |
||
671 | if (!$c->increase_length()) { |
||
672 | return false; |
||
673 | } else {
|
||
674 | continue;
|
||
675 | } |
||
676 | } |
||
677 | |||
678 | $c->offset = $m[0][1]+strlen($m[0][0]); |
||
679 | |||
680 | return $this->pdf_read_token($c); |
||
681 | } |
||
682 | |||
683 | default:
|
||
684 | |||
685 | // This is "another" type of token (probably
|
||
686 | // a dictionary entry or a numeric value)
|
||
687 | // Find the end and return it.
|
||
688 | |||
689 | if (!$c->ensure_content()) { |
||
690 | return false; |
||
691 | } |
||
692 | |||
693 | while(1) { |
||
694 | |||
695 | // Determine the length of the token
|
||
696 | |||
697 | $pos = strcspn($c->buffer, "\x20%[]<>()/\x0A\x0C\x0D\x09\x00", $c->offset); |
||
698 | |||
699 | if ($c->offset + $pos <= $c->length - 1) { |
||
700 | break;
|
||
701 | } else {
|
||
702 | // If the script reaches this point,
|
||
703 | // the token may span beyond the end
|
||
704 | // of the current buffer. Therefore,
|
||
705 | // we increase the size of the buffer
|
||
706 | // and try again--just to be safe.
|
||
707 | |||
708 | $c->increase_length();
|
||
709 | } |
||
710 | } |
||
711 | |||
712 | $result = substr($c->buffer, $c->offset - 1, $pos + 1); |
||
713 | |||
714 | $c->offset += $pos; |
||
715 | return $result; |
||
716 | } |
||
717 | } |
||
718 | } |
||
719 | } |