root / drupal7 / sites / all / libraries / fpdi-version / fpdi_pdf_parser.php @ 13755f8d
1 |
<?php
|
---|---|
2 |
//
|
3 |
// FPDI - Version 1.4.4
|
4 |
//
|
5 |
// Copyright 2004-2013 Setasign - Jan Slabon
|
6 |
//
|
7 |
// Licensed under the Apache License, Version 2.0 (the "License");
|
8 |
// you may not use this file except in compliance with the License.
|
9 |
// You may obtain a copy of the License at
|
10 |
//
|
11 |
// http://www.apache.org/licenses/LICENSE-2.0
|
12 |
//
|
13 |
// Unless required by applicable law or agreed to in writing, software
|
14 |
// distributed under the License is distributed on an "AS IS" BASIS,
|
15 |
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16 |
// See the License for the specific language governing permissions and
|
17 |
// limitations under the License.
|
18 |
//
|
19 |
|
20 |
require_once('pdf_parser.php'); |
21 |
|
22 |
class fpdi_pdf_parser extends pdf_parser { |
23 |
|
24 |
/**
|
25 |
* Pages
|
26 |
* Index beginns at 0
|
27 |
*
|
28 |
* @var array
|
29 |
*/
|
30 |
var $pages; |
31 |
|
32 |
/**
|
33 |
* Page count
|
34 |
* @var integer
|
35 |
*/
|
36 |
var $page_count; |
37 |
|
38 |
/**
|
39 |
* actual page number
|
40 |
* @var integer
|
41 |
*/
|
42 |
var $pageno; |
43 |
|
44 |
/**
|
45 |
* PDF Version of imported Document
|
46 |
* @var string
|
47 |
*/
|
48 |
var $pdfVersion; |
49 |
|
50 |
/**
|
51 |
* FPDI Reference
|
52 |
* @var object
|
53 |
*/
|
54 |
var $fpdi; |
55 |
|
56 |
/**
|
57 |
* Available BoxTypes
|
58 |
*
|
59 |
* @var array
|
60 |
*/
|
61 |
var $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox'); |
62 |
|
63 |
/**
|
64 |
* Constructor
|
65 |
*
|
66 |
* @param string $filename Source-Filename
|
67 |
* @param object $fpdi Object of type fpdi
|
68 |
*/
|
69 |
function fpdi_pdf_parser($filename, &$fpdi) { |
70 |
$this->fpdi =& $fpdi; |
71 |
|
72 |
parent::pdf_parser($filename); |
73 |
|
74 |
// resolve Pages-Dictonary
|
75 |
$pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']); |
76 |
|
77 |
// Read pages
|
78 |
$this->read_pages($this->c, $pages, $this->pages); |
79 |
|
80 |
// count pages;
|
81 |
$this->page_count = count($this->pages); |
82 |
} |
83 |
|
84 |
/**
|
85 |
* Removes reference to fpdi object and closes the file handle
|
86 |
*/
|
87 |
function cleanUp() { |
88 |
$this->fpdi = null; |
89 |
$this->closeFile();
|
90 |
} |
91 |
|
92 |
/**
|
93 |
* Overwrite parent::error()
|
94 |
*
|
95 |
* @param string $msg Error-Message
|
96 |
*/
|
97 |
function error($msg) { |
98 |
$this->fpdi->error($msg); |
99 |
} |
100 |
|
101 |
/**
|
102 |
* Get pagecount from sourcefile
|
103 |
*
|
104 |
* @return int
|
105 |
*/
|
106 |
function getPageCount() { |
107 |
return $this->page_count; |
108 |
} |
109 |
|
110 |
|
111 |
/**
|
112 |
* Set pageno
|
113 |
*
|
114 |
* @param int $pageno Pagenumber to use
|
115 |
*/
|
116 |
function setPageno($pageno) { |
117 |
$pageno = ((int) $pageno) - 1; |
118 |
|
119 |
if ($pageno < 0 || $pageno >= $this->getPageCount()) { |
120 |
$this->fpdi->error('Pagenumber is wrong!'); |
121 |
} |
122 |
|
123 |
$this->pageno = $pageno; |
124 |
} |
125 |
|
126 |
/**
|
127 |
* Get page-resources from current page
|
128 |
*
|
129 |
* @return array
|
130 |
*/
|
131 |
function getPageResources() { |
132 |
return $this->_getPageResources($this->pages[$this->pageno]); |
133 |
} |
134 |
|
135 |
/**
|
136 |
* Get page-resources from /Page
|
137 |
*
|
138 |
* @param array $obj Array of pdf-data
|
139 |
*/
|
140 |
function _getPageResources ($obj) { // $obj = /Page |
141 |
$obj = $this->pdf_resolve_object($this->c, $obj); |
142 |
|
143 |
// If the current object has a resources
|
144 |
// dictionary associated with it, we use
|
145 |
// it. Otherwise, we move back to its
|
146 |
// parent object.
|
147 |
if (isset ($obj[1][1]['/Resources'])) { |
148 |
$res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']); |
149 |
if ($res[0] == PDF_TYPE_OBJECT) |
150 |
return $res[1]; |
151 |
return $res; |
152 |
} else {
|
153 |
if (!isset ($obj[1][1]['/Parent'])) { |
154 |
return false; |
155 |
} else {
|
156 |
$res = $this->_getPageResources($obj[1][1]['/Parent']); |
157 |
if ($res[0] == PDF_TYPE_OBJECT) |
158 |
return $res[1]; |
159 |
return $res; |
160 |
} |
161 |
} |
162 |
} |
163 |
|
164 |
|
165 |
/**
|
166 |
* Get content of current page
|
167 |
*
|
168 |
* If more /Contents is an array, the streams are concated
|
169 |
*
|
170 |
* @return string
|
171 |
*/
|
172 |
function getContent() { |
173 |
$buffer = ''; |
174 |
|
175 |
if (isset($this->pages[$this->pageno][1][1]['/Contents'])) { |
176 |
$contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']); |
177 |
foreach($contents AS $tmp_content) { |
178 |
$buffer .= $this->_rebuildContentStream($tmp_content) . ' '; |
179 |
} |
180 |
} |
181 |
|
182 |
return $buffer; |
183 |
} |
184 |
|
185 |
|
186 |
/**
|
187 |
* Resolve all content-objects
|
188 |
*
|
189 |
* @param array $content_ref
|
190 |
* @return array
|
191 |
*/
|
192 |
function _getPageContent($content_ref) { |
193 |
$contents = array(); |
194 |
|
195 |
if ($content_ref[0] == PDF_TYPE_OBJREF) { |
196 |
$content = $this->pdf_resolve_object($this->c, $content_ref); |
197 |
if ($content[1][0] == PDF_TYPE_ARRAY) { |
198 |
$contents = $this->_getPageContent($content[1]); |
199 |
} else {
|
200 |
$contents[] = $content; |
201 |
} |
202 |
} elseif ($content_ref[0] == PDF_TYPE_ARRAY) { |
203 |
foreach ($content_ref[1] AS $tmp_content_ref) { |
204 |
$contents = array_merge($contents,$this->_getPageContent($tmp_content_ref)); |
205 |
} |
206 |
} |
207 |
|
208 |
return $contents; |
209 |
} |
210 |
|
211 |
|
212 |
/**
|
213 |
* Rebuild content-streams
|
214 |
*
|
215 |
* @param array $obj
|
216 |
* @return string
|
217 |
*/
|
218 |
function _rebuildContentStream($obj) { |
219 |
$filters = array(); |
220 |
|
221 |
if (isset($obj[1][1]['/Filter'])) { |
222 |
$_filter = $obj[1][1]['/Filter']; |
223 |
|
224 |
if ($_filter[0] == PDF_TYPE_OBJREF) { |
225 |
$tmpFilter = $this->pdf_resolve_object($this->c, $_filter); |
226 |
$_filter = $tmpFilter[1]; |
227 |
} |
228 |
|
229 |
if ($_filter[0] == PDF_TYPE_TOKEN) { |
230 |
$filters[] = $_filter; |
231 |
} elseif ($_filter[0] == PDF_TYPE_ARRAY) { |
232 |
$filters = $_filter[1]; |
233 |
} |
234 |
} |
235 |
|
236 |
$stream = $obj[2][1]; |
237 |
|
238 |
foreach ($filters AS $_filter) { |
239 |
switch ($_filter[1]) { |
240 |
case '/FlateDecode': |
241 |
case '/Fl': |
242 |
// $stream .= "\x0F\x0D"; // in an errorious stream this suffix could work
|
243 |
// $stream .= "\x0A";
|
244 |
// $stream .= "\x0D";
|
245 |
if (function_exists('gzuncompress')) { |
246 |
$oStream = $stream; |
247 |
$stream = (strlen($stream) > 0) ? @gzuncompress($stream) : ''; |
248 |
} else {
|
249 |
$this->error(sprintf('To handle %s filter, please compile php with zlib support.',$_filter[1])); |
250 |
} |
251 |
|
252 |
if ($stream === false) { |
253 |
$oStream = substr($oStream, 2); |
254 |
$stream = @gzinflate($oStream); |
255 |
if ($stream == false) { |
256 |
$this->error('Error while decompressing stream.'); |
257 |
} |
258 |
} |
259 |
break;
|
260 |
case '/LZWDecode': |
261 |
include_once('filters/FilterLZW_FPDI.php'); |
262 |
$decoder = new FilterLZW_FPDI($this->fpdi); |
263 |
$stream = $decoder->decode($stream); |
264 |
break;
|
265 |
case '/ASCII85Decode': |
266 |
include_once('filters/FilterASCII85_FPDI.php'); |
267 |
$decoder = new FilterASCII85_FPDI($this->fpdi); |
268 |
$stream = $decoder->decode($stream); |
269 |
break;
|
270 |
case null: |
271 |
$stream = $stream; |
272 |
break;
|
273 |
default:
|
274 |
$this->error(sprintf('Unsupported Filter: %s',$_filter[1])); |
275 |
} |
276 |
} |
277 |
|
278 |
return $stream; |
279 |
} |
280 |
|
281 |
|
282 |
/**
|
283 |
* Get a Box from a page
|
284 |
* Arrayformat is same as used by fpdf_tpl
|
285 |
*
|
286 |
* @param array $page a /Page
|
287 |
* @param string $box_index Type of Box @see $availableBoxes
|
288 |
* @param float Scale factor from user space units to points
|
289 |
* @return array
|
290 |
*/
|
291 |
function getPageBox($page, $box_index, $k) { |
292 |
$page = $this->pdf_resolve_object($this->c, $page); |
293 |
$box = null; |
294 |
if (isset($page[1][1][$box_index])) |
295 |
$box =& $page[1][1][$box_index]; |
296 |
|
297 |
if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) { |
298 |
$tmp_box = $this->pdf_resolve_object($this->c, $box); |
299 |
$box = $tmp_box[1]; |
300 |
} |
301 |
|
302 |
if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) { |
303 |
$b =& $box[1]; |
304 |
return array('x' => $b[0][1] / $k, |
305 |
'y' => $b[1][1] / $k, |
306 |
'w' => abs($b[0][1] - $b[2][1]) / $k, |
307 |
'h' => abs($b[1][1] - $b[3][1]) / $k, |
308 |
'llx' => min($b[0][1], $b[2][1]) / $k, |
309 |
'lly' => min($b[1][1], $b[3][1]) / $k, |
310 |
'urx' => max($b[0][1], $b[2][1]) / $k, |
311 |
'ury' => max($b[1][1], $b[3][1]) / $k, |
312 |
); |
313 |
} elseif (!isset ($page[1][1]['/Parent'])) { |
314 |
return false; |
315 |
} else {
|
316 |
return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index, $k); |
317 |
} |
318 |
} |
319 |
|
320 |
/**
|
321 |
* Get all page boxes by page no
|
322 |
*
|
323 |
* @param int The page number
|
324 |
* @param float Scale factor from user space units to points
|
325 |
* @return array
|
326 |
*/
|
327 |
function getPageBoxes($pageno, $k) { |
328 |
return $this->_getPageBoxes($this->pages[$pageno - 1], $k); |
329 |
} |
330 |
|
331 |
/**
|
332 |
* Get all boxes from /Page
|
333 |
*
|
334 |
* @param array a /Page
|
335 |
* @return array
|
336 |
*/
|
337 |
function _getPageBoxes($page, $k) { |
338 |
$boxes = array(); |
339 |
|
340 |
foreach($this->availableBoxes AS $box) { |
341 |
if ($_box = $this->getPageBox($page, $box, $k)) { |
342 |
$boxes[$box] = $_box; |
343 |
} |
344 |
} |
345 |
|
346 |
return $boxes; |
347 |
} |
348 |
|
349 |
/**
|
350 |
* Get the page rotation by pageno
|
351 |
*
|
352 |
* @param integer $pageno
|
353 |
* @return array
|
354 |
*/
|
355 |
function getPageRotation($pageno) { |
356 |
return $this->_getPageRotation($this->pages[$pageno - 1]); |
357 |
} |
358 |
|
359 |
function _getPageRotation($obj) { // $obj = /Page |
360 |
$obj = $this->pdf_resolve_object($this->c, $obj); |
361 |
if (isset ($obj[1][1]['/Rotate'])) { |
362 |
$res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Rotate']); |
363 |
if ($res[0] == PDF_TYPE_OBJECT) |
364 |
return $res[1]; |
365 |
return $res; |
366 |
} else {
|
367 |
if (!isset ($obj[1][1]['/Parent'])) { |
368 |
return false; |
369 |
} else {
|
370 |
$res = $this->_getPageRotation($obj[1][1]['/Parent']); |
371 |
if ($res[0] == PDF_TYPE_OBJECT) |
372 |
return $res[1]; |
373 |
return $res; |
374 |
} |
375 |
} |
376 |
} |
377 |
|
378 |
/**
|
379 |
* Read all /Page(es)
|
380 |
*
|
381 |
* @param object pdf_context
|
382 |
* @param array /Pages
|
383 |
* @param array the result-array
|
384 |
*/
|
385 |
function read_pages(&$c, &$pages, &$result) { |
386 |
// Get the kids dictionary
|
387 |
$_kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']); |
388 |
|
389 |
if (!is_array($_kids)) |
390 |
$this->error('Cannot find /Kids in current /Page-Dictionary'); |
391 |
|
392 |
if ($_kids[1][0] == PDF_TYPE_ARRAY) { |
393 |
$kids = $_kids[1][1]; |
394 |
} else {
|
395 |
$kids = $_kids[1]; |
396 |
} |
397 |
|
398 |
foreach ($kids as $v) { |
399 |
$pg = $this->pdf_resolve_object ($c, $v); |
400 |
if ($pg[1][1]['/Type'][1] === '/Pages') { |
401 |
// If one of the kids is an embedded
|
402 |
// /Pages array, resolve it as well.
|
403 |
$this->read_pages($c, $pg, $result); |
404 |
} else {
|
405 |
$result[] = $pg; |
406 |
} |
407 |
} |
408 |
} |
409 |
|
410 |
|
411 |
|
412 |
/**
|
413 |
* Get PDF-Version
|
414 |
*
|
415 |
* And reset the PDF Version used in FPDI if needed
|
416 |
*/
|
417 |
function getPDFVersion() { |
418 |
parent::getPDFVersion();
|
419 |
$this->fpdi->setPDFVersion(max($this->fpdi->getPDFVersion(), $this->pdfVersion)); |
420 |
} |
421 |
} |