/[debian-med]/trunk/community/website/inc/lastRSS.php
ViewVC logotype

Contents of /trunk/community/website/inc/lastRSS.php

Parent Directory Parent Directory | Revision Log Revision Log


Revision 499 - (show annotations) (download)
Tue Oct 2 11:30:24 2007 UTC (5 years, 7 months ago) by hanska-guest
File size: 9012 byte(s)
Another implementation
1 <?php
2 /*
3 ======================================================================
4 lastRSS 0.9.1
5
6 Simple yet powerfull PHP class to parse RSS files.
7
8 by Vojtech Semecky, webmaster @ webdot . cz
9
10 Latest version, features, manual and examples:
11 http://lastrss.webdot.cz/
12
13 ----------------------------------------------------------------------
14 LICENSE
15
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License (GPL)
18 as published by the Free Software Foundation; either version 2
19 of the License, or (at your option) any later version.
20
21 This program is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
25
26 To read the license please visit http://www.gnu.org/copyleft/gpl.html
27 ======================================================================
28 */
29
30 /**
31 * lastRSS
32 * Simple yet powerfull PHP class to parse RSS files.
33 */
34 class lastRSS {
35 // -------------------------------------------------------------------
36 // Public properties
37 // -------------------------------------------------------------------
38 var $default_cp = 'UTF-8';
39 var $CDATA = 'nochange';
40 var $cp = '';
41 var $items_limit = 0;
42 var $stripHTML = False;
43 var $date_format = '';
44
45 // -------------------------------------------------------------------
46 // Private variables
47 // -------------------------------------------------------------------
48 var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'lastBuildDate', 'rating', 'docs');
49 var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source');
50 var $imagetags = array('title', 'url', 'link', 'width', 'height');
51 var $textinputtags = array('title', 'description', 'name', 'link');
52
53 // -------------------------------------------------------------------
54 // Parse RSS file and returns associative array.
55 // -------------------------------------------------------------------
56 function Get ($rss_url) {
57 // If CACHE ENABLED
58 if ($this->cache_dir != '') {
59 $cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url);
60 $timedif = @(time() - filemtime($cache_file));
61 if ($timedif < $this->cache_time) {
62 // cached file is fresh enough, return cached array
63 $result = unserialize(join('', file($cache_file)));
64 // set 'cached' to 1 only if cached file is correct
65 if ($result) $result['cached'] = 1;
66 } else {
67 // cached file is too old, create new
68 $result = $this->Parse($rss_url);
69 $serialized = serialize($result);
70 if ($f = @fopen($cache_file, 'w')) {
71 fwrite ($f, $serialized, strlen($serialized));
72 fclose($f);
73 }
74 if ($result) $result['cached'] = 0;
75 }
76 }
77 // If CACHE DISABLED >> load and parse the file directly
78 else {
79 $result = $this->Parse($rss_url);
80 if ($result) $result['cached'] = 0;
81 }
82 // return result
83 return $result;
84 }
85
86 // -------------------------------------------------------------------
87 // Modification of preg_match(); return trimed field with index 1
88 // from 'classic' preg_match() array output
89 // -------------------------------------------------------------------
90 function my_preg_match ($pattern, $subject) {
91 // start regullar expression
92 preg_match($pattern, $subject, $out);
93
94 // if there is some result... process it and return it
95 if(isset($out[1])) {
96 // Process CDATA (if present)
97 if ($this->CDATA == 'content') { // Get CDATA content (without CDATA tag)
98 $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
99 } elseif ($this->CDATA == 'strip') { // Strip CDATA
100 $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
101 }
102
103 // If code page is set convert character encoding to required
104 if ($this->cp != '')
105 //$out[1] = $this->MyConvertEncoding($this->rsscp, $this->cp, $out[1]);
106 $out[1] = iconv($this->rsscp, $this->cp.'//TRANSLIT', $out[1]);
107 // Return result
108 return trim($out[1]);
109 } else {
110 // if there is NO result, return empty string
111 return '';
112 }
113 }
114
115 // -------------------------------------------------------------------
116 // Replace HTML entities &something; by real characters
117 // -------------------------------------------------------------------
118 function unhtmlentities ($string) {
119 // Get HTML entities table
120 $trans_tbl = get_html_translation_table (HTML_ENTITIES, ENT_QUOTES);
121 // Flip keys<==>values
122 $trans_tbl = array_flip ($trans_tbl);
123 // Add support for &apos; entity (missing in HTML_ENTITIES)
124 $trans_tbl += array('&apos;' => "'");
125 // Replace entities by values
126 return strtr ($string, $trans_tbl);
127 }
128
129 // -------------------------------------------------------------------
130 // Parse() is private method used by Get() to load and parse RSS file.
131 // Don't use Parse() in your scripts - use Get($rss_file) instead.
132 // -------------------------------------------------------------------
133 function Parse ($rss_url) {
134 // Open and load RSS file
135 if ($f = @fopen($rss_url, 'r')) {
136 $rss_content = '';
137 while (!feof($f)) {
138 $rss_content .= fgets($f, 4096);
139 }
140 fclose($f);
141
142 // Parse document encoding
143 $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content);
144 // if document codepage is specified, use it
145 if ($result['encoding'] != '')
146 { $this->rsscp = $result['encoding']; } // This is used in my_preg_match()
147 // otherwise use the default codepage
148 else
149 { $this->rsscp = $this->default_cp; } // This is used in my_preg_match()
150
151 // Parse CHANNEL info
152 preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel);
153 foreach($this->channeltags as $channeltag)
154 {
155 $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]);
156 if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty
157 }
158 // If date_format is specified and lastBuildDate is valid
159 if ($this->date_format != '' && ($timestamp = strtotime($result['lastBuildDate'])) !==-1) {
160 // convert lastBuildDate to specified date format
161 $result['lastBuildDate'] = date($this->date_format, $timestamp);
162 }
163
164 // Parse TEXTINPUT info
165 preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo);
166 // This a little strange regexp means:
167 // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag)
168 if (isset($out_textinfo[2])) {
169 foreach($this->textinputtags as $textinputtag) {
170 $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]);
171 if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty
172 }
173 }
174 // Parse IMAGE info
175 preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo);
176 if (isset($out_imageinfo[1])) {
177 foreach($this->imagetags as $imagetag) {
178 $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]);
179 if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty
180 }
181 }
182 // Parse ITEMS
183 preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items);
184 $rss_items = $items[2];
185 $i = 0;
186 $result['items'] = array(); // create array even if there are no items
187 foreach($rss_items as $rss_item) {
188 // If number of items is lower then limit: Parse one item
189 if ($i < $this->items_limit || $this->items_limit == 0) {
190 foreach($this->itemtags as $itemtag) {
191 $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item);
192 if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty
193 }
194 // Strip HTML tags and other bullshit from DESCRIPTION
195 if ($this->stripHTML && $result['items'][$i]['description'])
196 $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description'])));
197 // Strip HTML tags and other bullshit from TITLE
198 if ($this->stripHTML && $result['items'][$i]['title'])
199 $result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title'])));
200 // If date_format is specified and pubDate is valid
201 if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) {
202 // convert pubDate to specified date format
203 $result['items'][$i]['pubDate'] = date($this->date_format, $timestamp);
204 }
205 // Item counter
206 $i++;
207 }
208 }
209
210 $result['items_count'] = $i;
211 return $result;
212 }
213 else // Error in opening return False
214 {
215 return False;
216 }
217 }
218 }
219
220 ?>

  ViewVC Help
Powered by ViewVC 1.1.5