root / drupal7 / sites / all / libraries / fpdi-1.5.4 / fpdi_pdf_parser.php @ edccd837
1 |
<?php
|
---|---|
2 |
//
|
3 |
// FPDI - Version 1.5.4
|
4 |
//
|
5 |
// Copyright 2004-2015 Setasign - Jan Slabon
|
6 |
//
|
7 |
// Licensed under the Apache License, Version 2.0 (the "License");
|
8 |
// you may not use this file except in compliance with the License.
|
9 |
// You may obtain a copy of the License at
|
10 |
//
|
11 |
// http://www.apache.org/licenses/LICENSE-2.0
|
12 |
//
|
13 |
// Unless required by applicable law or agreed to in writing, software
|
14 |
// distributed under the License is distributed on an "AS IS" BASIS,
|
15 |
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16 |
// See the License for the specific language governing permissions and
|
17 |
// limitations under the License.
|
18 |
//
|
19 |
|
20 |
if (!class_exists('pdf_parser')) { |
21 |
require_once('pdf_parser.php'); |
22 |
} |
23 |
|
24 |
/**
|
25 |
* Class fpdi_pdf_parser
|
26 |
*/
|
27 |
class fpdi_pdf_parser extends pdf_parser |
28 |
{ |
29 |
/**
|
30 |
* Pages
|
31 |
*
|
32 |
* Index begins at 0
|
33 |
*
|
34 |
* @var array
|
35 |
*/
|
36 |
protected $_pages; |
37 |
|
38 |
/**
|
39 |
* Page count
|
40 |
*
|
41 |
* @var integer
|
42 |
*/
|
43 |
protected $_pageCount; |
44 |
|
45 |
/**
|
46 |
* Current page number
|
47 |
*
|
48 |
* @var integer
|
49 |
*/
|
50 |
public $pageNo; |
51 |
|
52 |
/**
|
53 |
* PDF version of imported document
|
54 |
*
|
55 |
* @var string
|
56 |
*/
|
57 |
public $_pdfVersion; |
58 |
|
59 |
/**
|
60 |
* Available BoxTypes
|
61 |
*
|
62 |
* @var array
|
63 |
*/
|
64 |
public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox'); |
65 |
|
66 |
/**
|
67 |
* The constructor.
|
68 |
*
|
69 |
* @param string $filename The source filename
|
70 |
*/
|
71 |
public function __construct($filename) |
72 |
{ |
73 |
parent::__construct($filename); |
74 |
|
75 |
// resolve Pages-Dictonary
|
76 |
$pages = $this->resolveObject($this->_root[1][1]['/Pages']); |
77 |
|
78 |
// Read pages
|
79 |
$this->_readPages($pages, $this->_pages); |
80 |
|
81 |
// count pages;
|
82 |
$this->_pageCount = count($this->_pages); |
83 |
} |
84 |
|
85 |
/**
|
86 |
* Get page count from source file.
|
87 |
*
|
88 |
* @return int
|
89 |
*/
|
90 |
public function getPageCount() |
91 |
{ |
92 |
return $this->_pageCount; |
93 |
} |
94 |
|
95 |
/**
|
96 |
* Set the page number.
|
97 |
*
|
98 |
* @param int $pageNo Page number to use
|
99 |
* @throws InvalidArgumentException
|
100 |
*/
|
101 |
public function setPageNo($pageNo) |
102 |
{ |
103 |
$pageNo = ((int) $pageNo) - 1; |
104 |
|
105 |
if ($pageNo < 0 || $pageNo >= $this->getPageCount()) { |
106 |
throw new InvalidArgumentException('Invalid page number!'); |
107 |
} |
108 |
|
109 |
$this->pageNo = $pageNo; |
110 |
} |
111 |
|
112 |
/**
|
113 |
* Get page-resources from current page
|
114 |
*
|
115 |
* @return array|boolean
|
116 |
*/
|
117 |
public function getPageResources() |
118 |
{ |
119 |
return $this->_getPageResources($this->_pages[$this->pageNo]); |
120 |
} |
121 |
|
122 |
/**
|
123 |
* Get page-resources from a /Page dictionary.
|
124 |
*
|
125 |
* @param array $obj Array of pdf-data
|
126 |
* @return array|boolean
|
127 |
*/
|
128 |
protected function _getPageResources($obj) |
129 |
{ |
130 |
$obj = $this->resolveObject($obj); |
131 |
|
132 |
// If the current object has a resources
|
133 |
// dictionary associated with it, we use
|
134 |
// it. Otherwise, we move back to its
|
135 |
// parent object.
|
136 |
if (isset($obj[1][1]['/Resources'])) { |
137 |
$res = $this->resolveObject($obj[1][1]['/Resources']); |
138 |
if ($res[0] == pdf_parser::TYPE_OBJECT) |
139 |
return $res[1]; |
140 |
return $res; |
141 |
} |
142 |
|
143 |
if (!isset($obj[1][1]['/Parent'])) { |
144 |
return false; |
145 |
} |
146 |
|
147 |
$res = $this->_getPageResources($obj[1][1]['/Parent']); |
148 |
if ($res[0] == pdf_parser::TYPE_OBJECT) |
149 |
return $res[1]; |
150 |
return $res; |
151 |
} |
152 |
|
153 |
/**
|
154 |
* Get content of current page.
|
155 |
*
|
156 |
* If /Contents is an array, the streams are concatenated
|
157 |
*
|
158 |
* @return string
|
159 |
*/
|
160 |
public function getContent() |
161 |
{ |
162 |
$buffer = ''; |
163 |
|
164 |
if (isset($this->_pages[$this->pageNo][1][1]['/Contents'])) { |
165 |
$contents = $this->_getPageContent($this->_pages[$this->pageNo][1][1]['/Contents']); |
166 |
foreach ($contents AS $tmpContent) { |
167 |
$buffer .= $this->_unFilterStream($tmpContent) . ' '; |
168 |
} |
169 |
} |
170 |
|
171 |
return $buffer; |
172 |
} |
173 |
|
174 |
/**
|
175 |
* Resolve all content objects.
|
176 |
*
|
177 |
* @param array $contentRef
|
178 |
* @return array
|
179 |
*/
|
180 |
protected function _getPageContent($contentRef) |
181 |
{ |
182 |
$contents = array(); |
183 |
|
184 |
if ($contentRef[0] == pdf_parser::TYPE_OBJREF) { |
185 |
$content = $this->resolveObject($contentRef); |
186 |
if ($content[1][0] == pdf_parser::TYPE_ARRAY) { |
187 |
$contents = $this->_getPageContent($content[1]); |
188 |
} else {
|
189 |
$contents[] = $content; |
190 |
} |
191 |
} else if ($contentRef[0] == pdf_parser::TYPE_ARRAY) { |
192 |
foreach ($contentRef[1] AS $tmp_content_ref) { |
193 |
$contents = array_merge($contents, $this->_getPageContent($tmp_content_ref)); |
194 |
} |
195 |
} |
196 |
|
197 |
return $contents; |
198 |
} |
199 |
|
200 |
/**
|
201 |
* Get a boundary box from a page
|
202 |
*
|
203 |
* Array format is same as used by FPDF_TPL.
|
204 |
*
|
205 |
* @param array $page a /Page dictionary
|
206 |
* @param string $boxIndex Type of box {see {@link $availableBoxes})
|
207 |
* @param float Scale factor from user space units to points
|
208 |
*
|
209 |
* @return array|boolean
|
210 |
*/
|
211 |
protected function _getPageBox($page, $boxIndex, $k) |
212 |
{ |
213 |
$page = $this->resolveObject($page); |
214 |
$box = null; |
215 |
if (isset($page[1][1][$boxIndex])) { |
216 |
$box = $page[1][1][$boxIndex]; |
217 |
} |
218 |
|
219 |
if (!is_null($box) && $box[0] == pdf_parser::TYPE_OBJREF) { |
220 |
$tmp_box = $this->resolveObject($box); |
221 |
$box = $tmp_box[1]; |
222 |
} |
223 |
|
224 |
if (!is_null($box) && $box[0] == pdf_parser::TYPE_ARRAY) { |
225 |
$b = $box[1]; |
226 |
return array( |
227 |
'x' => $b[0][1] / $k, |
228 |
'y' => $b[1][1] / $k, |
229 |
'w' => abs($b[0][1] - $b[2][1]) / $k, |
230 |
'h' => abs($b[1][1] - $b[3][1]) / $k, |
231 |
'llx' => min($b[0][1], $b[2][1]) / $k, |
232 |
'lly' => min($b[1][1], $b[3][1]) / $k, |
233 |
'urx' => max($b[0][1], $b[2][1]) / $k, |
234 |
'ury' => max($b[1][1], $b[3][1]) / $k, |
235 |
); |
236 |
} else if (!isset($page[1][1]['/Parent'])) { |
237 |
return false; |
238 |
} else {
|
239 |
return $this->_getPageBox($this->resolveObject($page[1][1]['/Parent']), $boxIndex, $k); |
240 |
} |
241 |
} |
242 |
|
243 |
/**
|
244 |
* Get all page boundary boxes by page number
|
245 |
*
|
246 |
* @param int $pageNo The page number
|
247 |
* @param float $k Scale factor from user space units to points
|
248 |
* @return array
|
249 |
* @throws InvalidArgumentException
|
250 |
*/
|
251 |
public function getPageBoxes($pageNo, $k) |
252 |
{ |
253 |
if (!isset($this->_pages[$pageNo - 1])) { |
254 |
throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.'); |
255 |
} |
256 |
|
257 |
return $this->_getPageBoxes($this->_pages[$pageNo - 1], $k); |
258 |
} |
259 |
|
260 |
/**
|
261 |
* Get all boxes from /Page dictionary
|
262 |
*
|
263 |
* @param array $page A /Page dictionary
|
264 |
* @param float $k Scale factor from user space units to points
|
265 |
* @return array
|
266 |
*/
|
267 |
protected function _getPageBoxes($page, $k) |
268 |
{ |
269 |
$boxes = array(); |
270 |
|
271 |
foreach($this->availableBoxes AS $box) { |
272 |
if ($_box = $this->_getPageBox($page, $box, $k)) { |
273 |
$boxes[$box] = $_box; |
274 |
} |
275 |
} |
276 |
|
277 |
return $boxes; |
278 |
} |
279 |
|
280 |
/**
|
281 |
* Get the page rotation by page number
|
282 |
*
|
283 |
* @param integer $pageNo
|
284 |
* @throws InvalidArgumentException
|
285 |
* @return array
|
286 |
*/
|
287 |
public function getPageRotation($pageNo) |
288 |
{ |
289 |
if (!isset($this->_pages[$pageNo - 1])) { |
290 |
throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.'); |
291 |
} |
292 |
|
293 |
return $this->_getPageRotation($this->_pages[$pageNo - 1]); |
294 |
} |
295 |
|
296 |
/**
|
297 |
* Get the rotation value of a page
|
298 |
*
|
299 |
* @param array $obj A /Page dictionary
|
300 |
* @return array|bool
|
301 |
*/
|
302 |
protected function _getPageRotation($obj) |
303 |
{ |
304 |
$obj = $this->resolveObject($obj); |
305 |
if (isset($obj[1][1]['/Rotate'])) { |
306 |
$res = $this->resolveObject($obj[1][1]['/Rotate']); |
307 |
if ($res[0] == pdf_parser::TYPE_OBJECT) |
308 |
return $res[1]; |
309 |
return $res; |
310 |
} |
311 |
|
312 |
if (!isset($obj[1][1]['/Parent'])) { |
313 |
return false; |
314 |
} |
315 |
|
316 |
$res = $this->_getPageRotation($obj[1][1]['/Parent']); |
317 |
if ($res[0] == pdf_parser::TYPE_OBJECT) |
318 |
return $res[1]; |
319 |
|
320 |
return $res; |
321 |
} |
322 |
|
323 |
/**
|
324 |
* Read all pages
|
325 |
*
|
326 |
* @param array $pages /Pages dictionary
|
327 |
* @param array $result The result array
|
328 |
* @throws Exception
|
329 |
*/
|
330 |
protected function _readPages(&$pages, &$result) |
331 |
{ |
332 |
// Get the kids dictionary
|
333 |
$_kids = $this->resolveObject($pages[1][1]['/Kids']); |
334 |
|
335 |
if (!is_array($_kids)) { |
336 |
throw new Exception('Cannot find /Kids in current /Page-Dictionary'); |
337 |
} |
338 |
|
339 |
if ($_kids[0] === self::TYPE_OBJECT) { |
340 |
$_kids = $_kids[1]; |
341 |
} |
342 |
|
343 |
$kids = $_kids[1]; |
344 |
|
345 |
foreach ($kids as $v) { |
346 |
$pg = $this->resolveObject($v); |
347 |
if ($pg[1][1]['/Type'][1] === '/Pages') { |
348 |
// If one of the kids is an embedded
|
349 |
// /Pages array, resolve it as well.
|
350 |
$this->_readPages($pg, $result); |
351 |
} else {
|
352 |
$result[] = $pg; |
353 |
} |
354 |
} |
355 |
} |
356 |
} |