1
|
<?php
|
2
|
|
3
|
/**
|
4
|
* @file
|
5
|
* Contains the FeedsFetcher and related classes.
|
6
|
*/
|
7
|
|
8
|
/**
|
9
|
* Base class for all fetcher results.
|
10
|
*/
|
11
|
class FeedsFetcherResult extends FeedsResult {
|
12
|
/**
|
13
|
* The raw fetched data.
|
14
|
*
|
15
|
* @var string
|
16
|
*/
|
17
|
protected $raw;
|
18
|
|
19
|
/**
|
20
|
* The path to a file where the raw data is stored.
|
21
|
*
|
22
|
* @var string
|
23
|
*/
|
24
|
protected $file_path;
|
25
|
|
26
|
/**
|
27
|
* Constructor.
|
28
|
*/
|
29
|
public function __construct($raw) {
|
30
|
$this->raw = $raw;
|
31
|
}
|
32
|
|
33
|
/**
|
34
|
* Prevent saving the raw result when serializing object.
|
35
|
*/
|
36
|
public function __sleep() {
|
37
|
if (!empty($this->raw)) {
|
38
|
// Save contents of raw to a file for later use.
|
39
|
$this->saveRawToFile();
|
40
|
}
|
41
|
|
42
|
// Save anything but 'raw'.
|
43
|
unset($this->raw);
|
44
|
return array_keys(get_object_vars($this));
|
45
|
}
|
46
|
|
47
|
/**
|
48
|
* Returns the raw content.
|
49
|
*
|
50
|
* @return string
|
51
|
* The raw content from the source as a string.
|
52
|
*
|
53
|
* @throws Exception
|
54
|
* Extending classes MAY throw an exception if a problem occurred.
|
55
|
*/
|
56
|
public function getRaw() {
|
57
|
if (empty($this->raw)) {
|
58
|
// Return raw contents from file.
|
59
|
return $this->getFileContents();
|
60
|
}
|
61
|
return $this->sanitizeRawOptimized($this->raw);
|
62
|
}
|
63
|
|
64
|
/**
|
65
|
* Get a path to a temporary file containing the resource provided by the
|
66
|
* fetcher.
|
67
|
*
|
68
|
* File will be deleted after DRUPAL_MAXIMUM_TEMP_FILE_AGE.
|
69
|
*
|
70
|
* @return string
|
71
|
* A path to a file containing the raw content as a source.
|
72
|
*
|
73
|
* @throws Exception
|
74
|
* If an unexpected problem occurred.
|
75
|
*/
|
76
|
public function getFilePath() {
|
77
|
if (empty($this->file_path)) {
|
78
|
// No file exists yet. Save any raw data that we got.
|
79
|
$this->saveRawToFile();
|
80
|
}
|
81
|
|
82
|
// Check if given file exists now.
|
83
|
$this->checkFile();
|
84
|
|
85
|
// Return file path.
|
86
|
return $this->sanitizeFile($this->file_path);
|
87
|
}
|
88
|
|
89
|
/**
|
90
|
* Returns directory for storing files that are in progress of import.
|
91
|
*
|
92
|
* @return string
|
93
|
* The cache dir to use.
|
94
|
*/
|
95
|
public function getFeedsInProgressDir() {
|
96
|
$dir = variable_get('feeds_in_progress_dir', NULL);
|
97
|
if ($dir) {
|
98
|
return $dir;
|
99
|
}
|
100
|
else {
|
101
|
$schemes = file_get_stream_wrappers(STREAM_WRAPPERS_WRITE_VISIBLE);
|
102
|
$scheme = isset($schemes['private']) ? 'private' : 'public';
|
103
|
return $scheme . '://feeds/in_progress';
|
104
|
}
|
105
|
}
|
106
|
|
107
|
/**
|
108
|
* Constructs file name for saving the raw data.
|
109
|
*/
|
110
|
public function constructFilePath() {
|
111
|
return $this->getFeedsInProgressDir() . '/' . get_class($this) . REQUEST_TIME;
|
112
|
}
|
113
|
|
114
|
/**
|
115
|
* Returns if raw data exists.
|
116
|
*
|
117
|
* This checks if either $this->raw is set or if the raw data exists in a
|
118
|
* file. This is better then calling just ::getRaw() as that would return a
|
119
|
* copy of all raw data which may lead to memory issues if the data is very
|
120
|
* large.
|
121
|
*
|
122
|
* @return bool
|
123
|
* True if the raw data exists.
|
124
|
* False otherwise.
|
125
|
*/
|
126
|
public function rawExists() {
|
127
|
return !empty($this->raw) || $this->fileExists();
|
128
|
}
|
129
|
|
130
|
/**
|
131
|
* Returns if the file to parse exists.
|
132
|
*
|
133
|
* @return bool
|
134
|
* True if the file exists.
|
135
|
* False otherwise.
|
136
|
*/
|
137
|
public function fileExists() {
|
138
|
if (!empty($this->file_path) && is_readable($this->file_path)) {
|
139
|
return TRUE;
|
140
|
}
|
141
|
return FALSE;
|
142
|
}
|
143
|
|
144
|
/**
|
145
|
* Returns the contents of a file, if it exists.
|
146
|
*
|
147
|
* @return string
|
148
|
* The file contents.
|
149
|
*/
|
150
|
public function getFileContents() {
|
151
|
if ($this->fileExists()) {
|
152
|
$this->sanitizeFile($this->file_path);
|
153
|
return file_get_contents($this->file_path);
|
154
|
}
|
155
|
}
|
156
|
|
157
|
/**
|
158
|
* Checks that a file exists and is readable.
|
159
|
*
|
160
|
* @throws RuntimeException
|
161
|
* Thrown if the file isn't readable or writable.
|
162
|
*/
|
163
|
protected function checkFile() {
|
164
|
if (!file_exists($this->file_path)) {
|
165
|
throw new RuntimeException(t('File %filepath does not exist.', array(
|
166
|
'%filepath' => $this->file_path,
|
167
|
)));
|
168
|
}
|
169
|
|
170
|
if (!is_readable($this->file_path)) {
|
171
|
throw new RuntimeException(t('File %filepath is not readable.', array(
|
172
|
'%filepath' => $this->file_path,
|
173
|
)));
|
174
|
}
|
175
|
}
|
176
|
|
177
|
/**
|
178
|
* Saves the raw fetcher result to a file.
|
179
|
*
|
180
|
* @throws RuntimeException
|
181
|
* In case the destination wasn't writable.
|
182
|
*/
|
183
|
public function saveRawToFile() {
|
184
|
$file_in_progress_dir = $this->getFeedsInProgressDir();
|
185
|
if (!file_prepare_directory($file_in_progress_dir, FILE_CREATE_DIRECTORY | FILE_MODIFY_PERMISSIONS)) {
|
186
|
throw new RuntimeException(t('Feeds directory either cannot be created or is not writable.'));
|
187
|
}
|
188
|
|
189
|
$this->file_path = FALSE;
|
190
|
if ($file = file_save_data($this->getRaw(), $this->constructFilePath())) {
|
191
|
$file->status = 0;
|
192
|
file_save($file);
|
193
|
$this->file_path = $file->uri;
|
194
|
|
195
|
// Clear raw data to save memory, but also to prevent saving the same raw data
|
196
|
// to a file again in the same request.
|
197
|
$this->raw = NULL;
|
198
|
}
|
199
|
else {
|
200
|
throw new RuntimeException(t('Cannot write content to %dest', array('%dest' => $destination)));
|
201
|
}
|
202
|
}
|
203
|
|
204
|
/**
|
205
|
* Sanitize the raw content string.
|
206
|
*
|
207
|
* Currently supported sanitizations:
|
208
|
* - Remove BOM header from UTF-8 files.
|
209
|
*
|
210
|
* Consider using ::sanitizeRawOptimized() instead that receives the variable
|
211
|
* by reference and thus saves memory.
|
212
|
*
|
213
|
* @param string $raw
|
214
|
* The raw content string to be sanitized.
|
215
|
*
|
216
|
* @return string
|
217
|
* The sanitized content as a string.
|
218
|
*/
|
219
|
public function sanitizeRaw($raw) {
|
220
|
if (substr($raw, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) {
|
221
|
$raw = substr($raw, 3);
|
222
|
}
|
223
|
return $raw;
|
224
|
}
|
225
|
|
226
|
/**
|
227
|
* Sanitize the raw content string.
|
228
|
*
|
229
|
* Currently supported sanitizations:
|
230
|
* - Remove BOM header from UTF-8 files.
|
231
|
*
|
232
|
* This accepts the raw contents by reference to prevent having the whole raw
|
233
|
* contents in memory again.
|
234
|
*
|
235
|
* @param string $raw
|
236
|
* The raw content string to be sanitized.
|
237
|
*
|
238
|
* @return string
|
239
|
* The sanitized content as a string.
|
240
|
*/
|
241
|
public function sanitizeRawOptimized(&$raw) {
|
242
|
if (substr($raw, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) {
|
243
|
$raw = substr($raw, 3);
|
244
|
}
|
245
|
return $raw;
|
246
|
}
|
247
|
|
248
|
/**
|
249
|
* Sanitize the file in place.
|
250
|
*
|
251
|
* Currently supported sanitizations:
|
252
|
* - Remove BOM header from UTF-8 files.
|
253
|
*
|
254
|
* @param string $filepath
|
255
|
* The file path of the file to be sanitized.
|
256
|
*
|
257
|
* @return string
|
258
|
* The file path of the sanitized file.
|
259
|
*
|
260
|
* @throws RuntimeException
|
261
|
* Thrown if the file is not writeable.
|
262
|
*/
|
263
|
public function sanitizeFile($filepath) {
|
264
|
$handle = fopen($filepath, 'r');
|
265
|
$line = fgets($handle);
|
266
|
fclose($handle);
|
267
|
|
268
|
// If BOM header is present, read entire contents of file and overwrite the
|
269
|
// file with corrected contents.
|
270
|
if (substr($line, 0, 3) !== pack('CCC', 0xef, 0xbb, 0xbf)) {
|
271
|
return $filepath;
|
272
|
}
|
273
|
|
274
|
if (!is_writable($filepath)) {
|
275
|
throw new RuntimeException(t('File %filepath is not writable.', array(
|
276
|
'%filepath' => $filepath,
|
277
|
)));
|
278
|
}
|
279
|
|
280
|
$contents = file_get_contents($filepath);
|
281
|
$contents = substr($contents, 3);
|
282
|
$status = file_put_contents($filepath, $contents);
|
283
|
|
284
|
return $filepath;
|
285
|
}
|
286
|
|
287
|
}
|
288
|
|
289
|
/**
|
290
|
* Abstract class, defines shared functionality between fetchers.
|
291
|
*
|
292
|
* Implements FeedsSourceInfoInterface to expose source forms to Feeds.
|
293
|
*/
|
294
|
abstract class FeedsFetcher extends FeedsPlugin {
|
295
|
|
296
|
/**
|
297
|
* Implements FeedsPlugin::pluginType().
|
298
|
*/
|
299
|
public function pluginType() {
|
300
|
return 'fetcher';
|
301
|
}
|
302
|
|
303
|
/**
|
304
|
* Fetch content from a source and return it.
|
305
|
*
|
306
|
* Every class that extends FeedsFetcher must implement this method.
|
307
|
*
|
308
|
* @param $source
|
309
|
* Source value as entered by user through sourceForm().
|
310
|
*
|
311
|
* @return
|
312
|
* A FeedsFetcherResult object.
|
313
|
*/
|
314
|
abstract public function fetch(FeedsSource $source);
|
315
|
|
316
|
/**
|
317
|
* Clear all caches for results for given source.
|
318
|
*
|
319
|
* @param FeedsSource $source
|
320
|
* Source information for this expiry. Implementers can choose to only clear
|
321
|
* caches pertaining to this source.
|
322
|
*/
|
323
|
public function clear(FeedsSource $source) {}
|
324
|
|
325
|
/**
|
326
|
* Request handler invoked if callback URL is requested. Locked down by
|
327
|
* default. For a example usage see FeedsHTTPFetcher.
|
328
|
*
|
329
|
* Note: this method may exit the script.
|
330
|
*
|
331
|
* @return
|
332
|
* A string to be returned to the client.
|
333
|
*/
|
334
|
public function request($feed_nid = 0) {
|
335
|
drupal_access_denied();
|
336
|
}
|
337
|
|
338
|
/**
|
339
|
* Construct a path for a concrete fetcher/source combination. The result of
|
340
|
* this method matches up with the general path definition in
|
341
|
* FeedsFetcher::menuItem(). For example usage look at FeedsHTTPFetcher.
|
342
|
*
|
343
|
* @return
|
344
|
* Path for this fetcher/source combination.
|
345
|
*/
|
346
|
public function path($feed_nid = 0) {
|
347
|
$id = urlencode($this->id);
|
348
|
if ($feed_nid && is_numeric($feed_nid)) {
|
349
|
return "feeds/importer/$id/$feed_nid";
|
350
|
}
|
351
|
return "feeds/importer/$id";
|
352
|
}
|
353
|
|
354
|
/**
|
355
|
* Menu item definition for fetchers of this class. Note how the path
|
356
|
* component in the item definition matches the return value of
|
357
|
* FeedsFetcher::path().
|
358
|
*
|
359
|
* Requests to this menu item will be routed to FeedsFetcher::request().
|
360
|
*
|
361
|
* @return
|
362
|
* An array where the key is the Drupal menu item path and the value is
|
363
|
* a valid Drupal menu item definition.
|
364
|
*/
|
365
|
public function menuItem() {
|
366
|
return array(
|
367
|
'feeds/importer/%feeds_importer' => array(
|
368
|
'page callback' => 'feeds_fetcher_callback',
|
369
|
'page arguments' => array(2, 3),
|
370
|
'access callback' => TRUE,
|
371
|
'file' => 'feeds.pages.inc',
|
372
|
'type' => MENU_CALLBACK,
|
373
|
),
|
374
|
);
|
375
|
}
|
376
|
|
377
|
/**
|
378
|
* Subscribe to a source. Only implement if fetcher requires subscription.
|
379
|
*
|
380
|
* @param FeedsSource $source
|
381
|
* Source information for this subscription.
|
382
|
*/
|
383
|
public function subscribe(FeedsSource $source) {}
|
384
|
|
385
|
/**
|
386
|
* Unsubscribe from a source. Only implement if fetcher requires subscription.
|
387
|
*
|
388
|
* @param FeedsSource $source
|
389
|
* Source information for unsubscribing.
|
390
|
*/
|
391
|
public function unsubscribe(FeedsSource $source) {}
|
392
|
|
393
|
/**
|
394
|
* Override import period settings. This can be used to force a certain import
|
395
|
* interval.
|
396
|
*
|
397
|
* @param $source
|
398
|
* A FeedsSource object.
|
399
|
*
|
400
|
* @return
|
401
|
* A time span in seconds if periodic import should be overridden for given
|
402
|
* $source, NULL otherwise.
|
403
|
*/
|
404
|
public function importPeriod(FeedsSource $source) {}
|
405
|
|
406
|
/**
|
407
|
* Invoked after an import is finished.
|
408
|
*
|
409
|
* @param $source
|
410
|
* A FeedsSource object.
|
411
|
*/
|
412
|
public function afterImport(FeedsSource $source) {}
|
413
|
|
414
|
}
|