| 1 |
<?php
|
| 2 |
/*
|
| 3 |
======================================================================
|
| 4 |
lastRSS 0.9.1
|
| 5 |
|
| 6 |
Simple yet powerfull PHP class to parse RSS files.
|
| 7 |
|
| 8 |
by Vojtech Semecky, webmaster @ webdot . cz
|
| 9 |
|
| 10 |
Latest version, features, manual and examples:
|
| 11 |
http://lastrss.webdot.cz/
|
| 12 |
|
| 13 |
----------------------------------------------------------------------
|
| 14 |
LICENSE
|
| 15 |
|
| 16 |
This program is free software; you can redistribute it and/or
|
| 17 |
modify it under the terms of the GNU General Public License (GPL)
|
| 18 |
as published by the Free Software Foundation; either version 2
|
| 19 |
of the License, or (at your option) any later version.
|
| 20 |
|
| 21 |
This program is distributed in the hope that it will be useful,
|
| 22 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 23 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 24 |
GNU General Public License for more details.
|
| 25 |
|
| 26 |
To read the license please visit http://www.gnu.org/copyleft/gpl.html
|
| 27 |
======================================================================
|
| 28 |
*/
|
| 29 |
|
| 30 |
/**
|
| 31 |
* lastRSS
|
| 32 |
* Simple yet powerfull PHP class to parse RSS files.
|
| 33 |
*/
|
| 34 |
class lastRSS {
|
| 35 |
// -------------------------------------------------------------------
|
| 36 |
// Public properties
|
| 37 |
// -------------------------------------------------------------------
|
| 38 |
var $default_cp = 'UTF-8';
|
| 39 |
var $CDATA = 'nochange';
|
| 40 |
var $cp = '';
|
| 41 |
var $items_limit = 0;
|
| 42 |
var $stripHTML = False;
|
| 43 |
var $date_format = '';
|
| 44 |
|
| 45 |
// -------------------------------------------------------------------
|
| 46 |
// Private variables
|
| 47 |
// -------------------------------------------------------------------
|
| 48 |
var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'lastBuildDate', 'rating', 'docs');
|
| 49 |
var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source');
|
| 50 |
var $imagetags = array('title', 'url', 'link', 'width', 'height');
|
| 51 |
var $textinputtags = array('title', 'description', 'name', 'link');
|
| 52 |
|
| 53 |
// -------------------------------------------------------------------
|
| 54 |
// Parse RSS file and returns associative array.
|
| 55 |
// -------------------------------------------------------------------
|
| 56 |
function Get ($rss_url) {
|
| 57 |
// If CACHE ENABLED
|
| 58 |
if ($this->cache_dir != '') {
|
| 59 |
$cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url);
|
| 60 |
$timedif = @(time() - filemtime($cache_file));
|
| 61 |
if ($timedif < $this->cache_time) {
|
| 62 |
// cached file is fresh enough, return cached array
|
| 63 |
$result = unserialize(join('', file($cache_file)));
|
| 64 |
// set 'cached' to 1 only if cached file is correct
|
| 65 |
if ($result) $result['cached'] = 1;
|
| 66 |
} else {
|
| 67 |
// cached file is too old, create new
|
| 68 |
$result = $this->Parse($rss_url);
|
| 69 |
$serialized = serialize($result);
|
| 70 |
if ($f = @fopen($cache_file, 'w')) {
|
| 71 |
fwrite ($f, $serialized, strlen($serialized));
|
| 72 |
fclose($f);
|
| 73 |
}
|
| 74 |
if ($result) $result['cached'] = 0;
|
| 75 |
}
|
| 76 |
}
|
| 77 |
// If CACHE DISABLED >> load and parse the file directly
|
| 78 |
else {
|
| 79 |
$result = $this->Parse($rss_url);
|
| 80 |
if ($result) $result['cached'] = 0;
|
| 81 |
}
|
| 82 |
// return result
|
| 83 |
return $result;
|
| 84 |
}
|
| 85 |
|
| 86 |
// -------------------------------------------------------------------
|
| 87 |
// Modification of preg_match(); return trimed field with index 1
|
| 88 |
// from 'classic' preg_match() array output
|
| 89 |
// -------------------------------------------------------------------
|
| 90 |
function my_preg_match ($pattern, $subject) {
|
| 91 |
// start regullar expression
|
| 92 |
preg_match($pattern, $subject, $out);
|
| 93 |
|
| 94 |
// if there is some result... process it and return it
|
| 95 |
if(isset($out[1])) {
|
| 96 |
// Process CDATA (if present)
|
| 97 |
if ($this->CDATA == 'content') { // Get CDATA content (without CDATA tag)
|
| 98 |
$out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
|
| 99 |
} elseif ($this->CDATA == 'strip') { // Strip CDATA
|
| 100 |
$out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
|
| 101 |
}
|
| 102 |
|
| 103 |
// If code page is set convert character encoding to required
|
| 104 |
if ($this->cp != '')
|
| 105 |
//$out[1] = $this->MyConvertEncoding($this->rsscp, $this->cp, $out[1]);
|
| 106 |
$out[1] = iconv($this->rsscp, $this->cp.'//TRANSLIT', $out[1]);
|
| 107 |
// Return result
|
| 108 |
return trim($out[1]);
|
| 109 |
} else {
|
| 110 |
// if there is NO result, return empty string
|
| 111 |
return '';
|
| 112 |
}
|
| 113 |
}
|
| 114 |
|
| 115 |
// -------------------------------------------------------------------
|
| 116 |
// Replace HTML entities &something; by real characters
|
| 117 |
// -------------------------------------------------------------------
|
| 118 |
function unhtmlentities ($string) {
|
| 119 |
// Get HTML entities table
|
| 120 |
$trans_tbl = get_html_translation_table (HTML_ENTITIES, ENT_QUOTES);
|
| 121 |
// Flip keys<==>values
|
| 122 |
$trans_tbl = array_flip ($trans_tbl);
|
| 123 |
// Add support for ' entity (missing in HTML_ENTITIES)
|
| 124 |
$trans_tbl += array(''' => "'");
|
| 125 |
// Replace entities by values
|
| 126 |
return strtr ($string, $trans_tbl);
|
| 127 |
}
|
| 128 |
|
| 129 |
// -------------------------------------------------------------------
|
| 130 |
// Parse() is private method used by Get() to load and parse RSS file.
|
| 131 |
// Don't use Parse() in your scripts - use Get($rss_file) instead.
|
| 132 |
// -------------------------------------------------------------------
|
| 133 |
function Parse ($rss_url) {
|
| 134 |
// Open and load RSS file
|
| 135 |
if ($f = @fopen($rss_url, 'r')) {
|
| 136 |
$rss_content = '';
|
| 137 |
while (!feof($f)) {
|
| 138 |
$rss_content .= fgets($f, 4096);
|
| 139 |
}
|
| 140 |
fclose($f);
|
| 141 |
|
| 142 |
// Parse document encoding
|
| 143 |
$result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content);
|
| 144 |
// if document codepage is specified, use it
|
| 145 |
if ($result['encoding'] != '')
|
| 146 |
{ $this->rsscp = $result['encoding']; } // This is used in my_preg_match()
|
| 147 |
// otherwise use the default codepage
|
| 148 |
else
|
| 149 |
{ $this->rsscp = $this->default_cp; } // This is used in my_preg_match()
|
| 150 |
|
| 151 |
// Parse CHANNEL info
|
| 152 |
preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel);
|
| 153 |
foreach($this->channeltags as $channeltag)
|
| 154 |
{
|
| 155 |
$temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]);
|
| 156 |
if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty
|
| 157 |
}
|
| 158 |
// If date_format is specified and lastBuildDate is valid
|
| 159 |
if ($this->date_format != '' && ($timestamp = strtotime($result['lastBuildDate'])) !==-1) {
|
| 160 |
// convert lastBuildDate to specified date format
|
| 161 |
$result['lastBuildDate'] = date($this->date_format, $timestamp);
|
| 162 |
}
|
| 163 |
|
| 164 |
// Parse TEXTINPUT info
|
| 165 |
preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo);
|
| 166 |
// This a little strange regexp means:
|
| 167 |
// Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag)
|
| 168 |
if (isset($out_textinfo[2])) {
|
| 169 |
foreach($this->textinputtags as $textinputtag) {
|
| 170 |
$temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]);
|
| 171 |
if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty
|
| 172 |
}
|
| 173 |
}
|
| 174 |
// Parse IMAGE info
|
| 175 |
preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo);
|
| 176 |
if (isset($out_imageinfo[1])) {
|
| 177 |
foreach($this->imagetags as $imagetag) {
|
| 178 |
$temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]);
|
| 179 |
if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty
|
| 180 |
}
|
| 181 |
}
|
| 182 |
// Parse ITEMS
|
| 183 |
preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items);
|
| 184 |
$rss_items = $items[2];
|
| 185 |
$i = 0;
|
| 186 |
$result['items'] = array(); // create array even if there are no items
|
| 187 |
foreach($rss_items as $rss_item) {
|
| 188 |
// If number of items is lower then limit: Parse one item
|
| 189 |
if ($i < $this->items_limit || $this->items_limit == 0) {
|
| 190 |
foreach($this->itemtags as $itemtag) {
|
| 191 |
$temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item);
|
| 192 |
if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty
|
| 193 |
}
|
| 194 |
// Strip HTML tags and other bullshit from DESCRIPTION
|
| 195 |
if ($this->stripHTML && $result['items'][$i]['description'])
|
| 196 |
$result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description'])));
|
| 197 |
// Strip HTML tags and other bullshit from TITLE
|
| 198 |
if ($this->stripHTML && $result['items'][$i]['title'])
|
| 199 |
$result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title'])));
|
| 200 |
// If date_format is specified and pubDate is valid
|
| 201 |
if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) {
|
| 202 |
// convert pubDate to specified date format
|
| 203 |
$result['items'][$i]['pubDate'] = date($this->date_format, $timestamp);
|
| 204 |
}
|
| 205 |
// Item counter
|
| 206 |
$i++;
|
| 207 |
}
|
| 208 |
}
|
| 209 |
|
| 210 |
$result['items_count'] = $i;
|
| 211 |
return $result;
|
| 212 |
}
|
| 213 |
else // Error in opening return False
|
| 214 |
{
|
| 215 |
return False;
|
| 216 |
}
|
| 217 |
}
|
| 218 |
}
|
| 219 |
|
| 220 |
?>
|