1 |
60370e04
|
Assos Assos
|
<?php
|
2 |
|
|
|
3 |
|
|
|
4 |
|
|
|
5 |
|
|
|
6 |
|
|
|
7 |
|
|
|
8 |
|
|
|
9 |
|
|
|
10 |
|
|
|
11 |
|
|
|
12 |
|
|
|
13 |
|
|
|
14 |
|
|
|
15 |
|
|
|
16 |
|
|
|
17 |
|
|
|
18 |
|
|
|
19 |
|
|
|
20 |
|
|
|
21 |
|
|
|
22 |
|
|
|
23 |
|
|
class pdf_parser
|
24 |
|
|
{
|
25 |
|
|
|
26 |
|
|
|
27 |
|
|
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
const TYPE_NULL = 0;
|
31 |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
const TYPE_NUMERIC = 1;
|
38 |
|
|
|
39 |
|
|
|
40 |
|
|
|
41 |
|
|
|
42 |
|
|
|
43 |
|
|
|
44 |
|
|
const TYPE_TOKEN = 2;
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
|
51 |
|
|
const TYPE_HEX = 3;
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 |
|
|
|
56 |
|
|
|
57 |
|
|
|
58 |
|
|
const TYPE_STRING = 4;
|
59 |
|
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
const TYPE_DICTIONARY = 5;
|
66 |
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
|
|
|
70 |
|
|
|
71 |
|
|
|
72 |
|
|
const TYPE_ARRAY = 6;
|
73 |
|
|
|
74 |
|
|
|
75 |
|
|
|
76 |
|
|
|
77 |
|
|
|
78 |
|
|
|
79 |
|
|
const TYPE_OBJDEC = 7;
|
80 |
|
|
|
81 |
|
|
|
82 |
|
|
|
83 |
|
|
|
84 |
|
|
|
85 |
|
|
|
86 |
|
|
const TYPE_OBJREF = 8;
|
87 |
|
|
|
88 |
|
|
|
89 |
|
|
|
90 |
|
|
|
91 |
|
|
|
92 |
|
|
|
93 |
|
|
const TYPE_OBJECT = 9;
|
94 |
|
|
|
95 |
|
|
|
96 |
|
|
|
97 |
|
|
|
98 |
|
|
|
99 |
|
|
|
100 |
|
|
const TYPE_STREAM = 10;
|
101 |
|
|
|
102 |
|
|
|
103 |
|
|
|
104 |
|
|
|
105 |
|
|
|
106 |
|
|
|
107 |
|
|
const TYPE_BOOLEAN = 11;
|
108 |
|
|
|
109 |
|
|
|
110 |
|
|
|
111 |
|
|
|
112 |
|
|
|
113 |
|
|
|
114 |
|
|
const TYPE_REAL = 12;
|
115 |
|
|
|
116 |
|
|
|
117 |
|
|
|
118 |
|
|
|
119 |
|
|
|
120 |
|
|
|
121 |
|
|
static public $searchForStartxrefLength = 5500;
|
122 |
|
|
|
123 |
|
|
|
124 |
|
|
|
125 |
|
|
|
126 |
|
|
|
127 |
|
|
|
128 |
|
|
public $filename;
|
129 |
|
|
|
130 |
|
|
|
131 |
|
|
|
132 |
|
|
|
133 |
|
|
|
134 |
|
|
|
135 |
|
|
protected $_f;
|
136 |
|
|
|
137 |
|
|
|
138 |
|
|
|
139 |
|
|
|
140 |
|
|
|
141 |
|
|
|
142 |
|
|
protected $_c;
|
143 |
|
|
|
144 |
|
|
|
145 |
|
|
|
146 |
|
|
|
147 |
|
|
|
148 |
|
|
|
149 |
|
|
protected $_xref;
|
150 |
|
|
|
151 |
|
|
|
152 |
|
|
|
153 |
|
|
|
154 |
|
|
|
155 |
|
|
|
156 |
|
|
protected $_root;
|
157 |
|
|
|
158 |
|
|
|
159 |
|
|
|
160 |
|
|
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
protected $_pdfVersion;
|
164 |
|
|
|
165 |
|
|
|
166 |
|
|
|
167 |
|
|
|
168 |
|
|
|
169 |
|
|
|
170 |
|
|
protected $_readPlain = true;
|
171 |
|
|
|
172 |
|
|
|
173 |
|
|
|
174 |
|
|
|
175 |
|
|
|
176 |
|
|
|
177 |
|
|
protected $_currentObj;
|
178 |
|
|
|
179 |
|
|
|
180 |
|
|
|
181 |
|
|
|
182 |
|
|
|
183 |
|
|
|
184 |
|
|
|
185 |
|
|
public function __construct($filename)
|
186 |
|
|
{
|
187 |
|
|
$this->filename = $filename;
|
188 |
|
|
|
189 |
|
|
$this->_f = @fopen($this->filename, 'rb');
|
190 |
|
|
|
191 |
|
|
if (!$this->_f) {
|
192 |
|
|
throw new InvalidArgumentException(sprintf('Cannot open %s !', $filename));
|
193 |
|
|
}
|
194 |
|
|
|
195 |
|
|
$this->getPdfVersion();
|
196 |
|
|
|
197 |
|
|
if (!class_exists('pdf_context')) {
|
198 |
|
|
require_once('pdf_context.php');
|
199 |
|
|
}
|
200 |
|
|
$this->_c = new pdf_context($this->_f);
|
201 |
|
|
|
202 |
|
|
|
203 |
|
|
$this->_xref = array();
|
204 |
|
|
$this->_readXref($this->_xref, $this->_findXref());
|
205 |
|
|
|
206 |
|
|
|
207 |
|
|
$this->getEncryption();
|
208 |
|
|
|
209 |
|
|
|
210 |
|
|
$this->_readRoot();
|
211 |
|
|
}
|
212 |
|
|
|
213 |
|
|
|
214 |
|
|
|
215 |
|
|
|
216 |
|
|
public function __destruct()
|
217 |
|
|
{
|
218 |
|
|
$this->closeFile();
|
219 |
|
|
}
|
220 |
|
|
|
221 |
|
|
|
222 |
|
|
|
223 |
|
|
|
224 |
|
|
public function closeFile()
|
225 |
|
|
{
|
226 |
|
|
if (isset($this->_f) && is_resource($this->_f)) {
|
227 |
|
|
fclose($this->_f);
|
228 |
|
|
unset($this->_f);
|
229 |
|
|
}
|
230 |
|
|
}
|
231 |
|
|
|
232 |
|
|
|
233 |
|
|
|
234 |
|
|
|
235 |
|
|
|
236 |
|
|
|
237 |
|
|
public function getEncryption()
|
238 |
|
|
{
|
239 |
|
|
if (isset($this->_xref['trailer'][1]['/Encrypt'])) {
|
240 |
|
|
throw new Exception('File is encrypted!');
|
241 |
|
|
}
|
242 |
|
|
}
|
243 |
|
|
|
244 |
|
|
|
245 |
|
|
|
246 |
|
|
|
247 |
|
|
|
248 |
|
|
|
249 |
|
|
public function getPdfVersion()
|
250 |
|
|
{
|
251 |
|
|
if ($this->_pdfVersion === null) {
|
252 |
|
|
fseek($this->_f, 0);
|
253 |
|
|
preg_match('/\d\.\d/', fread($this->_f, 16), $m);
|
254 |
|
|
if (isset($m[0]))
|
255 |
|
|
$this->_pdfVersion = $m[0];
|
256 |
|
|
}
|
257 |
|
|
|
258 |
|
|
return $this->_pdfVersion;
|
259 |
|
|
}
|
260 |
|
|
|
261 |
|
|
|
262 |
|
|
|
263 |
|
|
|
264 |
|
|
protected function _readRoot()
|
265 |
|
|
{
|
266 |
|
|
if ($this->_xref['trailer'][1]['/Root'][0] != self::TYPE_OBJREF) {
|
267 |
|
|
throw new Exception('Wrong Type of Root-Element! Must be an indirect reference');
|
268 |
|
|
}
|
269 |
|
|
|
270 |
|
|
$this->_root = $this->resolveObject($this->_xref['trailer'][1]['/Root']);
|
271 |
|
|
}
|
272 |
|
|
|
273 |
|
|
|
274 |
|
|
|
275 |
|
|
|
276 |
|
|
|
277 |
|
|
|
278 |
|
|
|
279 |
|
|
protected function _findXref()
|
280 |
|
|
{
|
281 |
|
|
$toRead = self::$searchForStartxrefLength;
|
282 |
|
|
|
283 |
|
|
$stat = fseek($this->_f, -$toRead, SEEK_END);
|
284 |
|
|
if ($stat === -1) {
|
285 |
|
|
fseek($this->_f, 0);
|
286 |
|
|
}
|
287 |
|
|
|
288 |
|
|
$data = fread($this->_f, $toRead);
|
289 |
|
|
|
290 |
|
|
$keywordPos = strpos(strrev($data), strrev('startxref'));
|
291 |
|
|
if (false === $keywordPos) {
|
292 |
|
|
$keywordPos = strpos(strrev($data), strrev('startref'));
|
293 |
|
|
}
|
294 |
|
|
|
295 |
|
|
if (false === $keywordPos) {
|
296 |
|
|
throw new Exception('Unable to find "startxref" keyword.');
|
297 |
|
|
}
|
298 |
|
|
|
299 |
|
|
$pos = strlen($data) - $keywordPos;
|
300 |
|
|
$data = substr($data, $pos);
|
301 |
|
|
|
302 |
|
|
if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {
|
303 |
|
|
throw new Exception('Unable to find pointer to xref table.');
|
304 |
|
|
}
|
305 |
|
|
|
306 |
|
|
return (int) $matches[1];
|
307 |
|
|
}
|
308 |
|
|
|
309 |
|
|
|
310 |
|
|
|
311 |
|
|
|
312 |
|
|
|
313 |
|
|
|
314 |
|
|
|
315 |
|
|
|
316 |
|
|
|
317 |
|
|
protected function _readXref(&$result, $offset)
|
318 |
|
|
{
|
319 |
|
|
$tempPos = $offset - min(20, $offset);
|
320 |
|
|
fseek($this->_f, $tempPos);
|
321 |
|
|
|
322 |
|
|
$data = fread($this->_f, 100);
|
323 |
|
|
|
324 |
|
|
$xrefPos = strrpos($data, 'xref');
|
325 |
|
|
|
326 |
|
|
if ($xrefPos === false) {
|
327 |
|
|
$this->_c->reset($offset);
|
328 |
|
|
$xrefStreamObjDec = $this->_readValue($this->_c);
|
329 |
|
|
|
330 |
|
|
if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == self::TYPE_OBJDEC) {
|
331 |
|
|
throw new Exception(
|
332 |
|
|
sprintf(
|
333 |
|
|
'This document (%s) probably uses a compression technique which is not supported by the ' .
|
334 |
|
|
'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)',
|
335 |
|
|
$this->filename
|
336 |
|
|
)
|
337 |
|
|
);
|
338 |
|
|
} else {
|
339 |
|
|
throw new Exception('Unable to find xref table.');
|
340 |
|
|
}
|
341 |
|
|
}
|
342 |
|
|
|
343 |
|
|
if (!isset($result['xrefLocation'])) {
|
344 |
|
|
$result['xrefLocation'] = $tempPos + $xrefPos;
|
345 |
|
|
$result['maxObject'] = 0;
|
346 |
|
|
}
|
347 |
|
|
|
348 |
|
|
$cycles = -1;
|
349 |
|
|
$bytesPerCycle = 100;
|
350 |
|
|
|
351 |
|
|
fseek($this->_f, $tempPos = $tempPos + $xrefPos + 4);
|
352 |
|
|
$data = fread($this->_f, $bytesPerCycle);
|
353 |
|
|
|
354 |
|
|
while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cycles++, 0))) === false && !feof($this->_f)) {
|
355 |
|
|
$data .= fread($this->_f, $bytesPerCycle);
|
356 |
|
|
}
|
357 |
|
|
|
358 |
|
|
if ($trailerPos === false) {
|
359 |
|
|
throw new Exception('Trailer keyword not found after xref table');
|
360 |
|
|
}
|
361 |
|
|
|
362 |
|
|
$data = ltrim(substr($data, 0, $trailerPos));
|
363 |
|
|
|
364 |
|
|
|
365 |
|
|
$found = preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m);
|
366 |
|
|
if ($found === 0) {
|
367 |
|
|
throw new Exception('Xref table seems to be corrupted.');
|
368 |
|
|
}
|
369 |
|
|
$differentLineEndings = count(array_unique($m[0]));
|
370 |
|
|
if ($differentLineEndings > 1) {
|
371 |
|
|
$lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY);
|
372 |
|
|
} else {
|
373 |
|
|
$lines = explode($m[0][0], $data);
|
374 |
|
|
}
|
375 |
|
|
|
376 |
|
|
$data = $differentLineEndings = $m = null;
|
377 |
|
|
unset($data, $differentLineEndings, $m);
|
378 |
|
|
|
379 |
|
|
$linesCount = count($lines);
|
380 |
|
|
|
381 |
|
|
$start = 1;
|
382 |
|
|
|
383 |
|
|
for ($i = 0; $i < $linesCount; $i++) {
|
384 |
|
|
$line = trim($lines[$i]);
|
385 |
|
|
if ($line) {
|
386 |
|
|
$pieces = explode(' ', $line);
|
387 |
|
|
$c = count($pieces);
|
388 |
|
|
switch($c) {
|
389 |
|
|
case 2:
|
390 |
|
|
$start = (int)$pieces[0];
|
391 |
|
|
$end = $start + (int)$pieces[1];
|
392 |
|
|
if ($end > $result['maxObject'])
|
393 |
|
|
$result['maxObject'] = $end;
|
394 |
|
|
break;
|
395 |
|
|
case 3:
|
396 |
|
|
if (!isset($result['xref'][$start]))
|
397 |
|
|
$result['xref'][$start] = array();
|
398 |
|
|
|
399 |
|
|
if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) {
|
400 |
|
|
$result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null;
|
401 |
|
|
}
|
402 |
|
|
$start++;
|
403 |
|
|
break;
|
404 |
|
|
default:
|
405 |
|
|
throw new Exception('Unexpected data in xref table');
|
406 |
|
|
}
|
407 |
|
|
}
|
408 |
|
|
}
|
409 |
|
|
|
410 |
|
|
$lines = $pieces = $line = $start = $end = $gen = null;
|
411 |
|
|
unset($lines, $pieces, $line, $start, $end, $gen);
|
412 |
|
|
|
413 |
|
|
$this->_c->reset($tempPos + $trailerPos + 7);
|
414 |
|
|
$trailer = $this->_readValue($this->_c);
|
415 |
|
|
|
416 |
|
|
if (!isset($result['trailer'])) {
|
417 |
|
|
$result['trailer'] = $trailer;
|
418 |
|
|
}
|
419 |
|
|
|
420 |
|
|
if (isset($trailer[1]['/Prev'])) {
|
421 |
|
|
$this->_readXref($result, $trailer[1]['/Prev'][1]);
|
422 |
|
|
}
|
423 |
|
|
|
424 |
|
|
$trailer = null;
|
425 |
|
|
unset($trailer);
|
426 |
|
|
|
427 |
|
|
return true;
|
428 |
|
|
}
|
429 |
|
|
|
430 |
|
|
|
431 |
|
|
|
432 |
|
|
|
433 |
|
|
|
434 |
|
|
|
435 |
|
|
|
436 |
|
|
|
437 |
|
|
|
438 |
|
|
protected function _readValue(&$c, $token = null)
|
439 |
|
|
{
|
440 |
|
|
if (is_null($token)) {
|
441 |
|
|
$token = $this->_readToken($c);
|
442 |
|
|
}
|
443 |
|
|
|
444 |
|
|
if ($token === false) {
|
445 |
|
|
return false;
|
446 |
|
|
}
|
447 |
|
|
|
448 |
|
|
switch ($token) {
|
449 |
|
|
case '<':
|
450 |
|
|
|
451 |
|
|
|
452 |
|
|
|
453 |
|
|
$pos = $c->offset;
|
454 |
|
|
|
455 |
|
|
while(1) {
|
456 |
|
|
|
457 |
|
|
$match = strpos($c->buffer, '>', $pos);
|
458 |
|
|
|
459 |
|
|
|
460 |
|
|
|
461 |
|
|
|
462 |
|
|
if ($match === false) {
|
463 |
|
|
if (!$c->increaseLength()) {
|
464 |
|
|
return false;
|
465 |
|
|
} else {
|
466 |
|
|
continue;
|
467 |
|
|
}
|
468 |
|
|
}
|
469 |
|
|
|
470 |
|
|
$result = substr($c->buffer, $c->offset, $match - $c->offset);
|
471 |
|
|
$c->offset = $match + 1;
|
472 |
|
|
|
473 |
|
|
return array (self::TYPE_HEX, $result);
|
474 |
|
|
}
|
475 |
|
|
break;
|
476 |
|
|
|
477 |
|
|
case '<<':
|
478 |
|
|
|
479 |
|
|
|
480 |
|
|
$result = array();
|
481 |
|
|
|
482 |
|
|
|
483 |
|
|
|
484 |
|
|
while (($key = $this->_readToken($c)) !== '>>') {
|
485 |
|
|
if ($key === false) {
|
486 |
|
|
return false;
|
487 |
|
|
}
|
488 |
|
|
|
489 |
|
|
if (($value = $this->_readValue($c)) === false) {
|
490 |
|
|
return false;
|
491 |
|
|
}
|
492 |
|
|
|
493 |
|
|
|
494 |
|
|
if ($value[0] == self::TYPE_TOKEN && $value[1] == '>>') {
|
495 |
|
|
$result[$key] = array(self::TYPE_NULL);
|
496 |
|
|
break;
|
497 |
|
|
}
|
498 |
|
|
|
499 |
|
|
$result[$key] = $value;
|
500 |
|
|
}
|
501 |
|
|
|
502 |
|
|
return array (self::TYPE_DICTIONARY, $result);
|
503 |
|
|
|
504 |
|
|
case '[':
|
505 |
|
|
|
506 |
|
|
|
507 |
|
|
$result = array();
|
508 |
|
|
|
509 |
|
|
|
510 |
|
|
|
511 |
|
|
while (($token = $this->_readToken($c)) !== ']') {
|
512 |
|
|
if ($token === false) {
|
513 |
|
|
return false;
|
514 |
|
|
}
|
515 |
|
|
|
516 |
|
|
if (($value = $this->_readValue($c, $token)) === false) {
|
517 |
|
|
return false;
|
518 |
|
|
}
|
519 |
|
|
|
520 |
|
|
$result[] = $value;
|
521 |
|
|
}
|
522 |
|
|
|
523 |
|
|
return array (self::TYPE_ARRAY, $result);
|
524 |
|
|
|
525 |
|
|
case '(':
|
526 |
|
|
|
527 |
|
|
$pos = $c->offset;
|
528 |
|
|
|
529 |
|
|
$openBrackets = 1;
|
530 |
|
|
do {
|
531 |
|
|
for (; $openBrackets != 0 && $pos < $c->length; $pos++) {
|
532 |
|
|
switch (ord($c->buffer[$pos])) {
|
533 |
|
|
case 0x28:
|
534 |
|
|
$openBrackets++;
|
535 |
|
|
break;
|
536 |
|
|
case 0x29:
|
537 |
|
|
$openBrackets--;
|
538 |
|
|
break;
|
539 |
|
|
case 0x5C:
|
540 |
|
|
$pos++;
|
541 |
|
|
}
|
542 |
|
|
}
|
543 |
|
|
} while($openBrackets != 0 && $c->increaseLength());
|
544 |
|
|
|
545 |
|
|
$result = substr($c->buffer, $c->offset, $pos - $c->offset - 1);
|
546 |
|
|
$c->offset = $pos;
|
547 |
|
|
|
548 |
|
|
return array (self::TYPE_STRING, $result);
|
549 |
|
|
|
550 |
|
|
case 'stream':
|
551 |
|
|
$tempPos = $c->getPos() - strlen($c->buffer);
|
552 |
|
|
$tempOffset = $c->offset;
|
553 |
|
|
|
554 |
|
|
$c->reset($startPos = $tempPos + $tempOffset);
|
555 |
|
|
|
556 |
|
|
|
557 |
|
|
while ($c->buffer[0] !== chr(10) && $c->buffer[0] !== chr(13)) {
|
558 |
|
|
$c->reset(++$startPos);
|
559 |
|
|
if ($c->ensureContent() === false) {
|
560 |
|
|
throw new Exception(
|
561 |
|
|
'Unable to parse stream data. No newline followed the stream keyword.'
|
562 |
|
|
);
|
563 |
|
|
}
|
564 |
|
|
}
|
565 |
|
|
|
566 |
|
|
$e = 0;
|
567 |
|
|
if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))
|
568 |
|
|
$e++;
|
569 |
|
|
if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))
|
570 |
|
|
$e++;
|
571 |
|
|
|
572 |
|
|
if ($this->_currentObj[1][1]['/Length'][0] == self::TYPE_OBJREF) {
|
573 |
|
|
$tmpLength = $this->resolveObject($this->_currentObj[1][1]['/Length']);
|
574 |
|
|
$length = $tmpLength[1][1];
|
575 |
|
|
} else {
|
576 |
|
|
$length = $this->_currentObj[1][1]['/Length'][1];
|
577 |
|
|
}
|
578 |
|
|
|
579 |
|
|
if ($length > 0) {
|
580 |
|
|
$c->reset($startPos + $e, $length);
|
581 |
|
|
$v = $c->buffer;
|
582 |
|
|
} else {
|
583 |
|
|
$v = '';
|
584 |
|
|
}
|
585 |
|
|
|
586 |
|
|
$c->reset($startPos + $e + $length);
|
587 |
|
|
$endstream = $this->_readToken($c);
|
588 |
|
|
|
589 |
|
|
if ($endstream != 'endstream') {
|
590 |
|
|
$c->reset($startPos + $e + $length + 9);
|
591 |
|
|
|
592 |
|
|
|
593 |
|
|
}
|
594 |
|
|
|
595 |
|
|
return array(self::TYPE_STREAM, $v);
|
596 |
|
|
|
597 |
|
|
default :
|
598 |
|
|
if (is_numeric($token)) {
|
599 |
|
|
|
600 |
|
|
|
601 |
|
|
if (($tok2 = $this->_readToken($c)) !== false) {
|
602 |
|
|
if (is_numeric($tok2)) {
|
603 |
|
|
|
604 |
|
|
|
605 |
|
|
|
606 |
|
|
|
607 |
|
|
|
608 |
|
|
|
609 |
|
|
if (($tok3 = $this->_readToken($c)) !== false) {
|
610 |
|
|
switch ($tok3) {
|
611 |
|
|
case 'obj':
|
612 |
|
|
return array(self::TYPE_OBJDEC, (int)$token, (int)$tok2);
|
613 |
|
|
case 'R':
|
614 |
|
|
return array(self::TYPE_OBJREF, (int)$token, (int)$tok2);
|
615 |
|
|
}
|
616 |
|
|
|
617 |
|
|
|
618 |
|
|
|
619 |
|
|
array_push($c->stack, $tok3);
|
620 |
|
|
}
|
621 |
|
|
}
|
622 |
|
|
|
623 |
|
|
array_push($c->stack, $tok2);
|
624 |
|
|
}
|
625 |
|
|
|
626 |
|
|
if ($token === (string)((int)$token))
|
627 |
|
|
return array(self::TYPE_NUMERIC, (int)$token);
|
628 |
|
|
else
|
629 |
|
|
return array(self::TYPE_REAL, (float)$token);
|
630 |
|
|
} else if ($token == 'true' || $token == 'false') {
|
631 |
|
|
return array(self::TYPE_BOOLEAN, $token == 'true');
|
632 |
|
|
} else if ($token == 'null') {
|
633 |
|
|
return array(self::TYPE_NULL);
|
634 |
|
|
} else {
|
635 |
|
|
|
636 |
|
|
return array(self::TYPE_TOKEN, $token);
|
637 |
|
|
}
|
638 |
|
|
}
|
639 |
|
|
}
|
640 |
|
|
|
641 |
|
|
|
642 |
|
|
|
643 |
|
|
|
644 |
|
|
|
645 |
|
|
|
646 |
|
|
|
647 |
|
|
|
648 |
|
|
public function resolveObject($objSpec)
|
649 |
|
|
{
|
650 |
|
|
$c = $this->_c;
|
651 |
|
|
|
652 |
|
|
|
653 |
|
|
if (!is_array($objSpec)) {
|
654 |
|
|
return false;
|
655 |
|
|
}
|
656 |
|
|
|
657 |
|
|
if ($objSpec[0] == self::TYPE_OBJREF) {
|
658 |
|
|
|
659 |
|
|
|
660 |
|
|
if (isset($this->_xref['xref'][$objSpec[1]][$objSpec[2]])) {
|
661 |
|
|
|
662 |
|
|
|
663 |
|
|
|
664 |
|
|
|
665 |
|
|
|
666 |
|
|
|
667 |
|
|
|
668 |
|
|
$oldPos = $c->getPos();
|
669 |
|
|
|
670 |
|
|
|
671 |
|
|
|
672 |
|
|
|
673 |
|
|
$c->reset($this->_xref['xref'][$objSpec[1]][$objSpec[2]]);
|
674 |
|
|
|
675 |
|
|
$header = $this->_readValue($c);
|
676 |
|
|
|
677 |
|
|
if ($header[0] != self::TYPE_OBJDEC || $header[1] != $objSpec[1] || $header[2] != $objSpec[2]) {
|
678 |
|
|
$toSearchFor = $objSpec[1] . ' ' . $objSpec[2] . ' obj';
|
679 |
|
|
if (preg_match('/' . $toSearchFor . '/', $c->buffer)) {
|
680 |
|
|
$c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor);
|
681 |
|
|
|
682 |
|
|
$c->stack = array();
|
683 |
|
|
} else {
|
684 |
|
|
throw new Exception(
|
685 |
|
|
sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])
|
686 |
|
|
);
|
687 |
|
|
}
|
688 |
|
|
}
|
689 |
|
|
|
690 |
|
|
|
691 |
|
|
|
692 |
|
|
|
693 |
|
|
$result = array (
|
694 |
|
|
self::TYPE_OBJECT,
|
695 |
|
|
'obj' => $objSpec[1],
|
696 |
|
|
'gen' => $objSpec[2]
|
697 |
|
|
);
|
698 |
|
|
|
699 |
|
|
$this->_currentObj =& $result;
|
700 |
|
|
|
701 |
|
|
|
702 |
|
|
|
703 |
|
|
while (true) {
|
704 |
|
|
$value = $this->_readValue($c);
|
705 |
|
|
if ($value === false || count($result) > 4) {
|
706 |
|
|
|
707 |
|
|
break;
|
708 |
|
|
}
|
709 |
|
|
|
710 |
|
|
if ($value[0] == self::TYPE_TOKEN && $value[1] === 'endobj') {
|
711 |
|
|
break;
|
712 |
|
|
}
|
713 |
|
|
|
714 |
|
|
$result[] = $value;
|
715 |
|
|
}
|
716 |
|
|
|
717 |
|
|
$c->reset($oldPos);
|
718 |
|
|
|
719 |
|
|
if (isset($result[2][0]) && $result[2][0] == self::TYPE_STREAM) {
|
720 |
|
|
$result[0] = self::TYPE_STREAM;
|
721 |
|
|
}
|
722 |
|
|
|
723 |
|
|
} else {
|
724 |
|
|
throw new Exception(
|
725 |
|
|
sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])
|
726 |
|
|
);
|
727 |
|
|
}
|
728 |
|
|
|
729 |
|
|
return $result;
|
730 |
|
|
} else {
|
731 |
|
|
return $objSpec;
|
732 |
|
|
}
|
733 |
|
|
}
|
734 |
|
|
|
735 |
|
|
|
736 |
|
|
|
737 |
|
|
|
738 |
|
|
|
739 |
|
|
|
740 |
|
|
|
741 |
|
|
protected function _readToken($c)
|
742 |
|
|
{
|
743 |
|
|
|
744 |
|
|
|
745 |
|
|
|
746 |
|
|
|
747 |
|
|
if (count($c->stack)) {
|
748 |
|
|
return array_pop($c->stack);
|
749 |
|
|
}
|
750 |
|
|
|
751 |
|
|
|
752 |
|
|
|
753 |
|
|
do {
|
754 |
|
|
if (!$c->ensureContent()) {
|
755 |
|
|
return false;
|
756 |
|
|
}
|
757 |
|
|
$c->offset += strspn($c->buffer, "\x20\x0A\x0C\x0D\x09\x00", $c->offset);
|
758 |
|
|
} while ($c->offset >= $c->length - 1);
|
759 |
|
|
|
760 |
|
|
|
761 |
|
|
|
762 |
|
|
$char = $c->buffer[$c->offset++];
|
763 |
|
|
|
764 |
|
|
switch ($char) {
|
765 |
|
|
|
766 |
|
|
case '[':
|
767 |
|
|
case ']':
|
768 |
|
|
case '(':
|
769 |
|
|
case ')':
|
770 |
|
|
|
771 |
|
|
|
772 |
|
|
|
773 |
|
|
|
774 |
|
|
return $char;
|
775 |
|
|
|
776 |
|
|
case '<':
|
777 |
|
|
case '>':
|
778 |
|
|
|
779 |
|
|
|
780 |
|
|
|
781 |
|
|
|
782 |
|
|
|
783 |
|
|
if ($c->buffer[$c->offset] == $char) {
|
784 |
|
|
if (!$c->ensureContent()) {
|
785 |
|
|
return false;
|
786 |
|
|
}
|
787 |
|
|
$c->offset++;
|
788 |
|
|
return $char . $char;
|
789 |
|
|
} else {
|
790 |
|
|
return $char;
|
791 |
|
|
}
|
792 |
|
|
|
793 |
|
|
case '%':
|
794 |
|
|
|
795 |
|
|
|
796 |
|
|
|
797 |
|
|
$pos = $c->offset;
|
798 |
|
|
while(1) {
|
799 |
|
|
$match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos);
|
800 |
|
|
if ($match === 0) {
|
801 |
|
|
if (!$c->increaseLength()) {
|
802 |
|
|
return false;
|
803 |
|
|
} else {
|
804 |
|
|
continue;
|
805 |
|
|
}
|
806 |
|
|
}
|
807 |
|
|
|
808 |
|
|
$c->offset = $m[0][1] + strlen($m[0][0]);
|
809 |
|
|
|
810 |
|
|
return $this->_readToken($c);
|
811 |
|
|
}
|
812 |
|
|
|
813 |
|
|
default:
|
814 |
|
|
|
815 |
|
|
|
816 |
|
|
|
817 |
|
|
|
818 |
|
|
|
819 |
|
|
if (!$c->ensureContent()) {
|
820 |
|
|
return false;
|
821 |
|
|
}
|
822 |
|
|
|
823 |
|
|
while(1) {
|
824 |
|
|
|
825 |
|
|
|
826 |
|
|
|
827 |
|
|
$pos = strcspn($c->buffer, "\x20%[]<>()/\x0A\x0C\x0D\x09\x00", $c->offset);
|
828 |
|
|
|
829 |
|
|
if ($c->offset + $pos <= $c->length - 1) {
|
830 |
|
|
break;
|
831 |
|
|
} else {
|
832 |
|
|
|
833 |
|
|
|
834 |
|
|
|
835 |
|
|
|
836 |
|
|
|
837 |
|
|
|
838 |
|
|
$c->increaseLength();
|
839 |
|
|
}
|
840 |
|
|
}
|
841 |
|
|
|
842 |
|
|
$result = substr($c->buffer, $c->offset - 1, $pos + 1);
|
843 |
|
|
|
844 |
|
|
$c->offset += $pos;
|
845 |
|
|
|
846 |
|
|
return $result;
|
847 |
|
|
}
|
848 |
|
|
}
|
849 |
|
|
|
850 |
|
|
|
851 |
|
|
|
852 |
|
|
|
853 |
|
|
|
854 |
|
|
|
855 |
|
|
|
856 |
|
|
|
857 |
|
|
protected function _unFilterStream($obj)
|
858 |
|
|
{
|
859 |
|
|
$filters = array();
|
860 |
|
|
|
861 |
|
|
if (isset($obj[1][1]['/Filter'])) {
|
862 |
|
|
$filter = $obj[1][1]['/Filter'];
|
863 |
|
|
|
864 |
|
|
if ($filter[0] == pdf_parser::TYPE_OBJREF) {
|
865 |
|
|
$tmpFilter = $this->resolveObject($filter);
|
866 |
|
|
$filter = $tmpFilter[1];
|
867 |
|
|
}
|
868 |
|
|
|
869 |
|
|
if ($filter[0] == pdf_parser::TYPE_TOKEN) {
|
870 |
|
|
$filters[] = $filter;
|
871 |
|
|
} else if ($filter[0] == pdf_parser::TYPE_ARRAY) {
|
872 |
|
|
$filters = $filter[1];
|
873 |
|
|
}
|
874 |
|
|
}
|
875 |
|
|
|
876 |
|
|
$stream = $obj[2][1];
|
877 |
|
|
|
878 |
|
|
foreach ($filters AS $filter) {
|
879 |
|
|
switch ($filter[1]) {
|
880 |
|
|
case '/FlateDecode':
|
881 |
|
|
case '/Fl':
|
882 |
|
|
if (function_exists('gzuncompress')) {
|
883 |
|
|
$oStream = $stream;
|
884 |
|
|
$stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';
|
885 |
|
|
} else {
|
886 |
|
|
throw new Exception(
|
887 |
|
|
sprintf('To handle %s filter, please compile php with zlib support.', $filter[1])
|
888 |
|
|
);
|
889 |
|
|
}
|
890 |
|
|
|
891 |
|
|
if ($stream === false) {
|
892 |
|
|
$tries = 0;
|
893 |
|
|
while ($tries < 8 && ($stream === false || strlen($stream) < strlen($oStream))) {
|
894 |
|
|
$oStream = substr($oStream, 1);
|
895 |
|
|
$stream = @gzinflate($oStream);
|
896 |
|
|
$tries++;
|
897 |
|
|
}
|
898 |
|
|
|
899 |
|
|
if ($stream === false) {
|
900 |
|
|
throw new Exception('Error while decompressing stream.');
|
901 |
|
|
}
|
902 |
|
|
}
|
903 |
|
|
break;
|
904 |
|
|
case '/LZWDecode':
|
905 |
|
|
if (!class_exists('FilterLZW')) {
|
906 |
|
|
require_once('filters/FilterLZW.php');
|
907 |
|
|
}
|
908 |
|
|
$decoder = new FilterLZW();
|
909 |
|
|
$stream = $decoder->decode($stream);
|
910 |
|
|
break;
|
911 |
|
|
case '/ASCII85Decode':
|
912 |
|
|
if (!class_exists('FilterASCII85')) {
|
913 |
|
|
require_once('filters/FilterASCII85.php');
|
914 |
|
|
}
|
915 |
|
|
$decoder = new FilterASCII85();
|
916 |
|
|
$stream = $decoder->decode($stream);
|
917 |
|
|
break;
|
918 |
|
|
case '/ASCIIHexDecode':
|
919 |
|
|
if (!class_exists('FilterASCIIHexDecode')) {
|
920 |
|
|
require_once('filters/FilterASCIIHexDecode.php');
|
921 |
|
|
}
|
922 |
|
|
$decoder = new FilterASCIIHexDecode();
|
923 |
|
|
$stream = $decoder->decode($stream);
|
924 |
|
|
break;
|
925 |
|
|
case null:
|
926 |
|
|
break;
|
927 |
|
|
default:
|
928 |
|
|
throw new Exception(sprintf('Unsupported Filter: %s', $filter[1]));
|
929 |
|
|
}
|
930 |
|
|
}
|
931 |
|
|
|
932 |
|
|
return $stream;
|
933 |
|
|
}
|
934 |
|
|
} |