batch/1.2/library/Zend/Feed/Reader.php
2013-06-19 09:13:51 +00:00

757 lines
24 KiB
PHP

<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Feed_Reader
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Reader.php 25103 2012-11-07 20:29:29Z rob $
*/
/**
* @see Zend_Feed
*/
require_once 'Zend/Feed.php';
/**
* @see Zend_Feed_Reader_Feed_Rss
*/
require_once 'Zend/Feed/Reader/Feed/Rss.php';
/**
* @see Zend_Feed_Reader_Feed_Atom
*/
require_once 'Zend/Feed/Reader/Feed/Atom.php';
/**
* @see Zend_Feed_Reader_FeedSet
*/
require_once 'Zend/Feed/Reader/FeedSet.php';
/**
* @category Zend
* @package Zend_Feed_Reader
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Feed_Reader
{
/**
* Namespace constants
*/
const NAMESPACE_ATOM_03 = 'http://purl.org/atom/ns#';
const NAMESPACE_ATOM_10 = 'http://www.w3.org/2005/Atom';
const NAMESPACE_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
const NAMESPACE_RSS_090 = 'http://my.netscape.com/rdf/simple/0.9/';
const NAMESPACE_RSS_10 = 'http://purl.org/rss/1.0/';
/**
* Feed type constants
*/
const TYPE_ANY = 'any';
const TYPE_ATOM_03 = 'atom-03';
const TYPE_ATOM_10 = 'atom-10';
const TYPE_ATOM_10_ENTRY = 'atom-10-entry';
const TYPE_ATOM_ANY = 'atom';
const TYPE_RSS_090 = 'rss-090';
const TYPE_RSS_091 = 'rss-091';
const TYPE_RSS_091_NETSCAPE = 'rss-091n';
const TYPE_RSS_091_USERLAND = 'rss-091u';
const TYPE_RSS_092 = 'rss-092';
const TYPE_RSS_093 = 'rss-093';
const TYPE_RSS_094 = 'rss-094';
const TYPE_RSS_10 = 'rss-10';
const TYPE_RSS_20 = 'rss-20';
const TYPE_RSS_ANY = 'rss';
/**
* Cache instance
*
* @var Zend_Cache_Core
*/
protected static $_cache = null;
/**
* HTTP client object to use for retrieving feeds
*
* @var Zend_Http_Client
*/
protected static $_httpClient = null;
/**
* Override HTTP PUT and DELETE request methods?
*
* @var boolean
*/
protected static $_httpMethodOverride = false;
protected static $_httpConditionalGet = false;
protected static $_pluginLoader = null;
protected static $_prefixPaths = array();
protected static $_extensions = array(
'feed' => array(
'DublinCore_Feed',
'Atom_Feed'
),
'entry' => array(
'Content_Entry',
'DublinCore_Entry',
'Atom_Entry'
),
'core' => array(
'DublinCore_Feed',
'Atom_Feed',
'Content_Entry',
'DublinCore_Entry',
'Atom_Entry'
)
);
/**
* Get the Feed cache
*
* @return Zend_Cache_Core
*/
public static function getCache()
{
return self::$_cache;
}
/**
* Set the feed cache
*
* @param Zend_Cache_Core $cache
* @return void
*/
public static function setCache(Zend_Cache_Core $cache)
{
self::$_cache = $cache;
}
/**
* Set the HTTP client instance
*
* Sets the HTTP client object to use for retrieving the feeds.
*
* @param Zend_Http_Client $httpClient
* @return void
*/
public static function setHttpClient(Zend_Http_Client $httpClient)
{
self::$_httpClient = $httpClient;
}
/**
* Gets the HTTP client object. If none is set, a new Zend_Http_Client will be used.
*
* @return Zend_Http_Client_Abstract
*/
public static function getHttpClient()
{
if (!self::$_httpClient instanceof Zend_Http_Client) {
/**
* @see Zend_Http_Client
*/
require_once 'Zend/Http/Client.php';
self::$_httpClient = new Zend_Http_Client();
}
return self::$_httpClient;
}
/**
* Toggle using POST instead of PUT and DELETE HTTP methods
*
* Some feed implementations do not accept PUT and DELETE HTTP
* methods, or they can't be used because of proxies or other
* measures. This allows turning on using POST where PUT and
* DELETE would normally be used; in addition, an
* X-Method-Override header will be sent with a value of PUT or
* DELETE as appropriate.
*
* @param boolean $override Whether to override PUT and DELETE.
* @return void
*/
public static function setHttpMethodOverride($override = true)
{
self::$_httpMethodOverride = $override;
}
/**
* Get the HTTP override state
*
* @return boolean
*/
public static function getHttpMethodOverride()
{
return self::$_httpMethodOverride;
}
/**
* Set the flag indicating whether or not to use HTTP conditional GET
*
* @param bool $bool
* @return void
*/
public static function useHttpConditionalGet($bool = true)
{
self::$_httpConditionalGet = $bool;
}
/**
* Import a feed by providing a URL
*
* @param string $url The URL to the feed
* @param string $etag OPTIONAL Last received ETag for this resource
* @param string $lastModified OPTIONAL Last-Modified value for this resource
* @return Zend_Feed_Reader_FeedInterface
*/
public static function import($uri, $etag = null, $lastModified = null)
{
$cache = self::getCache();
$feed = null;
$responseXml = '';
$client = self::getHttpClient();
$client->resetParameters();
$client->setHeaders('If-None-Match', null);
$client->setHeaders('If-Modified-Since', null);
$client->setUri($uri);
$cacheId = 'Zend_Feed_Reader_' . md5($uri);
if (self::$_httpConditionalGet && $cache) {
$data = $cache->load($cacheId);
if ($data) {
if ($etag === null) {
$etag = $cache->load($cacheId.'_etag');
}
if ($lastModified === null) {
$lastModified = $cache->load($cacheId.'_lastmodified');
}
if ($etag) {
$client->setHeaders('If-None-Match', $etag);
}
if ($lastModified) {
$client->setHeaders('If-Modified-Since', $lastModified);
}
}
$response = $client->request('GET');
if ($response->getStatus() !== 200 && $response->getStatus() !== 304) {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
}
if ($response->getStatus() == 304) {
$responseXml = $data;
} else {
$responseXml = $response->getBody();
$cache->save($responseXml, $cacheId);
if ($response->getHeader('ETag')) {
$cache->save($response->getHeader('ETag'), $cacheId.'_etag');
}
if ($response->getHeader('Last-Modified')) {
$cache->save($response->getHeader('Last-Modified'), $cacheId.'_lastmodified');
}
}
if (empty($responseXml)) {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception('Feed failed to load, got empty response body');
}
return self::importString($responseXml);
} elseif ($cache) {
$data = $cache->load($cacheId);
if ($data !== false) {
return self::importString($data);
}
$response = $client->request('GET');
if ($response->getStatus() !== 200) {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
}
$responseXml = $response->getBody();
$cache->save($responseXml, $cacheId);
if (empty($responseXml)) {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception('Feed failed to load, got empty response body');
}
return self::importString($responseXml);
} else {
$response = $client->request('GET');
if ($response->getStatus() !== 200) {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
}
$responseXml = $response->getBody();
if (empty($responseXml)) {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception('Feed failed to load, got empty response body');
}
$reader = self::importString($responseXml);
$reader->setOriginalSourceUri($uri);
return $reader;
}
}
/**
* Import a feed by providing a Zend_Feed_Abstract object
*
* @param Zend_Feed_Abstract $feed A fully instantiated Zend_Feed object
* @return Zend_Feed_Reader_FeedInterface
*/
public static function importFeed(Zend_Feed_Abstract $feed)
{
$dom = $feed->getDOM()->ownerDocument;
$type = self::detectType($dom);
self::_registerCoreExtensions();
if (substr($type, 0, 3) == 'rss') {
$reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
} else {
$reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
}
return $reader;
}
/**
* Import a feed froma string
*
* @param string $string
* @return Zend_Feed_Reader_FeedInterface
*/
public static function importString($string)
{
$libxml_errflag = libxml_use_internal_errors(true);
$oldValue = libxml_disable_entity_loader(true);
$dom = new DOMDocument;
$status = $dom->loadXML($string);
foreach ($dom->childNodes as $child) {
if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception(
'Invalid XML: Detected use of illegal DOCTYPE'
);
}
}
libxml_disable_entity_loader($oldValue);
libxml_use_internal_errors($libxml_errflag);
if (!$status) {
// Build error message
$error = libxml_get_last_error();
if ($error && $error->message) {
$errormsg = "DOMDocument cannot parse XML: {$error->message}";
} else {
$errormsg = "DOMDocument cannot parse XML: Please check the XML document's validity";
}
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception($errormsg);
}
$type = self::detectType($dom);
self::_registerCoreExtensions();
if (substr($type, 0, 3) == 'rss') {
$reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
} elseif (substr($type, 8, 5) == 'entry') {
$reader = new Zend_Feed_Reader_Entry_Atom($dom->documentElement, 0, Zend_Feed_Reader::TYPE_ATOM_10);
} elseif (substr($type, 0, 4) == 'atom') {
$reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
} else {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception('The URI used does not point to a '
. 'valid Atom, RSS or RDF feed that Zend_Feed_Reader can parse.');
}
return $reader;
}
/**
* Imports a feed from a file located at $filename.
*
* @param string $filename
* @throws Zend_Feed_Exception
* @return Zend_Feed_Reader_FeedInterface
*/
public static function importFile($filename)
{
@ini_set('track_errors', 1);
$feed = @file_get_contents($filename);
@ini_restore('track_errors');
if ($feed === false) {
/**
* @see Zend_Feed_Exception
*/
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception("File could not be loaded: $php_errormsg");
}
return self::importString($feed);
}
public static function findFeedLinks($uri)
{
// Get the HTTP response from $uri and save the contents
$client = self::getHttpClient();
$client->setUri($uri);
$response = $client->request();
if ($response->getStatus() !== 200) {
/**
* @see Zend_Feed_Exception
*/
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception("Failed to access $uri, got response code " . $response->getStatus());
}
$responseHtml = $response->getBody();
$libxml_errflag = libxml_use_internal_errors(true);
$oldValue = libxml_disable_entity_loader(true);
$dom = new DOMDocument;
$status = $dom->loadHTML($responseHtml);
libxml_disable_entity_loader($oldValue);
libxml_use_internal_errors($libxml_errflag);
if (!$status) {
// Build error message
$error = libxml_get_last_error();
if ($error && $error->message) {
$errormsg = "DOMDocument cannot parse HTML: {$error->message}";
} else {
$errormsg = "DOMDocument cannot parse HTML: Please check the XML document's validity";
}
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception($errormsg);
}
$feedSet = new Zend_Feed_Reader_FeedSet;
$links = $dom->getElementsByTagName('link');
$feedSet->addLinks($links, $uri);
return $feedSet;
}
/**
* Detect the feed type of the provided feed
*
* @param Zend_Feed_Abstract|DOMDocument|string $feed
* @return string
*/
public static function detectType($feed, $specOnly = false)
{
if ($feed instanceof Zend_Feed_Reader_FeedInterface) {
$dom = $feed->getDomDocument();
} elseif($feed instanceof DOMDocument) {
$dom = $feed;
} elseif(is_string($feed) && !empty($feed)) {
@ini_set('track_errors', 1);
$oldValue = libxml_disable_entity_loader(true);
$dom = new DOMDocument;
$status = @$dom->loadXML($feed);
foreach ($dom->childNodes as $child) {
if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception(
'Invalid XML: Detected use of illegal DOCTYPE'
);
}
}
libxml_disable_entity_loader($oldValue);
@ini_restore('track_errors');
if (!$status) {
if (!isset($php_errormsg)) {
if (function_exists('xdebug_is_enabled')) {
$php_errormsg = '(error message not available, when XDebug is running)';
} else {
$php_errormsg = '(error message not available)';
}
}
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception("DOMDocument cannot parse XML: $php_errormsg");
}
} else {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception('Invalid object/scalar provided: must'
. ' be of type Zend_Feed_Reader_FeedInterface, DomDocument or string');
}
$xpath = new DOMXPath($dom);
if ($xpath->query('/rss')->length) {
$type = self::TYPE_RSS_ANY;
$version = $xpath->evaluate('string(/rss/@version)');
if (strlen($version) > 0) {
switch($version) {
case '2.0':
$type = self::TYPE_RSS_20;
break;
case '0.94':
$type = self::TYPE_RSS_094;
break;
case '0.93':
$type = self::TYPE_RSS_093;
break;
case '0.92':
$type = self::TYPE_RSS_092;
break;
case '0.91':
$type = self::TYPE_RSS_091;
break;
}
}
return $type;
}
$xpath->registerNamespace('rdf', self::NAMESPACE_RDF);
if ($xpath->query('/rdf:RDF')->length) {
$xpath->registerNamespace('rss', self::NAMESPACE_RSS_10);
if ($xpath->query('/rdf:RDF/rss:channel')->length
|| $xpath->query('/rdf:RDF/rss:image')->length
|| $xpath->query('/rdf:RDF/rss:item')->length
|| $xpath->query('/rdf:RDF/rss:textinput')->length
) {
return self::TYPE_RSS_10;
}
$xpath->registerNamespace('rss', self::NAMESPACE_RSS_090);
if ($xpath->query('/rdf:RDF/rss:channel')->length
|| $xpath->query('/rdf:RDF/rss:image')->length
|| $xpath->query('/rdf:RDF/rss:item')->length
|| $xpath->query('/rdf:RDF/rss:textinput')->length
) {
return self::TYPE_RSS_090;
}
}
$type = self::TYPE_ATOM_ANY;
$xpath->registerNamespace('atom', self::NAMESPACE_ATOM_10);
if ($xpath->query('//atom:feed')->length) {
return self::TYPE_ATOM_10;
}
if ($xpath->query('//atom:entry')->length) {
if ($specOnly == true) {
return self::TYPE_ATOM_10;
} else {
return self::TYPE_ATOM_10_ENTRY;
}
}
$xpath->registerNamespace('atom', self::NAMESPACE_ATOM_03);
if ($xpath->query('//atom:feed')->length) {
return self::TYPE_ATOM_03;
}
return self::TYPE_ANY;
}
/**
* Set plugin loader for use with Extensions
*
* @param Zend_Loader_PluginLoader_Interface $loader
*/
public static function setPluginLoader(Zend_Loader_PluginLoader_Interface $loader)
{
self::$_pluginLoader = $loader;
}
/**
* Get plugin loader for use with Extensions
*
* @return Zend_Loader_PluginLoader_Interface $loader
*/
public static function getPluginLoader()
{
if (!isset(self::$_pluginLoader)) {
require_once 'Zend/Loader/PluginLoader.php';
self::$_pluginLoader = new Zend_Loader_PluginLoader(array(
'Zend_Feed_Reader_Extension_' => 'Zend/Feed/Reader/Extension/',
));
}
return self::$_pluginLoader;
}
/**
* Add prefix path for loading Extensions
*
* @param string $prefix
* @param string $path
* @return void
*/
public static function addPrefixPath($prefix, $path)
{
$prefix = rtrim($prefix, '_');
$path = rtrim($path, DIRECTORY_SEPARATOR);
self::getPluginLoader()->addPrefixPath($prefix, $path);
}
/**
* Add multiple Extension prefix paths at once
*
* @param array $spec
* @return void
*/
public static function addPrefixPaths(array $spec)
{
if (isset($spec['prefix']) && isset($spec['path'])) {
self::addPrefixPath($spec['prefix'], $spec['path']);
}
foreach ($spec as $prefixPath) {
if (isset($prefixPath['prefix']) && isset($prefixPath['path'])) {
self::addPrefixPath($prefixPath['prefix'], $prefixPath['path']);
}
}
}
/**
* Register an Extension by name
*
* @param string $name
* @return void
* @throws Zend_Feed_Exception if unable to resolve Extension class
*/
public static function registerExtension($name)
{
$feedName = $name . '_Feed';
$entryName = $name . '_Entry';
if (self::isRegistered($name)) {
if (self::getPluginLoader()->isLoaded($feedName) ||
self::getPluginLoader()->isLoaded($entryName)) {
return;
}
}
try {
self::getPluginLoader()->load($feedName);
self::$_extensions['feed'][] = $feedName;
} catch (Zend_Loader_PluginLoader_Exception $e) {
}
try {
self::getPluginLoader()->load($entryName);
self::$_extensions['entry'][] = $entryName;
} catch (Zend_Loader_PluginLoader_Exception $e) {
}
if (!self::getPluginLoader()->isLoaded($feedName)
&& !self::getPluginLoader()->isLoaded($entryName)
) {
require_once 'Zend/Feed/Exception.php';
throw new Zend_Feed_Exception('Could not load extension: ' . $name
. 'using Plugin Loader. Check prefix paths are configured and extension exists.');
}
}
/**
* Is a given named Extension registered?
*
* @param string $extensionName
* @return boolean
*/
public static function isRegistered($extensionName)
{
$feedName = $extensionName . '_Feed';
$entryName = $extensionName . '_Entry';
if (in_array($feedName, self::$_extensions['feed'])
|| in_array($entryName, self::$_extensions['entry'])
) {
return true;
}
return false;
}
/**
* Get a list of extensions
*
* @return array
*/
public static function getExtensions()
{
return self::$_extensions;
}
/**
* Reset class state to defaults
*
* @return void
*/
public static function reset()
{
self::$_cache = null;
self::$_httpClient = null;
self::$_httpMethodOverride = false;
self::$_httpConditionalGet = false;
self::$_pluginLoader = null;
self::$_prefixPaths = array();
self::$_extensions = array(
'feed' => array(
'DublinCore_Feed',
'Atom_Feed'
),
'entry' => array(
'Content_Entry',
'DublinCore_Entry',
'Atom_Entry'
),
'core' => array(
'DublinCore_Feed',
'Atom_Feed',
'Content_Entry',
'DublinCore_Entry',
'Atom_Entry'
)
);
}
/**
* Register core (default) extensions
*
* @return void
*/
protected static function _registerCoreExtensions()
{
self::registerExtension('DublinCore');
self::registerExtension('Content');
self::registerExtension('Atom');
self::registerExtension('Slash');
self::registerExtension('WellFormedWeb');
self::registerExtension('Thread');
self::registerExtension('Podcast');
}
/**
* Utility method to apply array_unique operation to a multidimensional
* array.
*
* @param array
* @return array
*/
public static function arrayUnique(array $array)
{
foreach ($array as &$value) {
$value = serialize($value);
}
$array = array_unique($array);
foreach ($array as &$value) {
$value = unserialize($value);
}
return $array;
}
}