PHPでRSSを解析するメモ
ブログのRSSを解析して記事一覧を表示…とかをするためのメモ。
「RSS2.0と宣言されているのにデータの一部がAtomで配信されている。」とか時々あるので、宣言は信用せずに「それらしいデータがないか探す」という方針で実装。
feed_load_file()
を実行すれば、階層のある連想配列でデータをまとめて返すので、あとは表示するだけ…という仕様にしています。
<?php
$feed = feed_load_file('http://freo.jp/info/news/feed');
echo "<!DOCTYPE html>\n";
echo "<html lang=\"ja\">\n";
echo "<head>\n";
echo "<meta charset=\"utf-8\" />\n";
echo "<title>RSSの解析</title>\n";
echo "</head>\n";
echo "<body>\n";
if (empty($feed)) {
echo 'エラー';
} else {
echo '<strong>形式</strong>';
echo '<hr />';
echo $feed['type'];
echo '<hr />';
echo '<strong>概要</strong>';
echo '<hr />';
echo 'title : ' . $feed['channel']['title'] . '<br />';
echo 'link : ' . $feed['channel']['link'] . '<br />';
echo 'description : ' . $feed['channel']['description'] . '<br />';
echo '<hr />';
echo '<strong>記事</strong>';
echo '<hr />';
foreach ($feed['item'] as $item) {
if (strtotime($item['date']) > time()) {
continue;
}
echo 'title : ' . $item['title'] . '<br />';
echo 'link : ' . $item['link'] . '<br />';
echo 'description : ' . $item['description'] . '<br />';
echo 'content : ' . $item['content'] . '<br />';
echo 'date : ' . $item['date'] . '<br />';
echo '<hr />';
}
}
echo "</body>\n";
echo "</html>\n";
exit;
feed_load_file()
の内容は、長いので続きで。
function feed_load_file($url)
{
$xml = simplexml_load_file($url);
if ($xml->entry) {
$type = 'atom';
$channel = $xml;
$items = $xml->entry;
} elseif ($xml->channel->item) {
$type = 'rss2';
$channel = $xml->channel;
$items = $xml->channel->item;
} elseif ($xml->item) {
$type = 'rss1';
$channel = $xml->channel;
$items = $xml->item;
} else {
return array();
}
$feed = array();
if ($type and $channel and $items) {
$feed['type'] = $type;
$feed['channel'] = array(
'title' => feed_title($channel),
'link' => feed_link($channel),
'description' => feed_description($channel)
);
foreach ($items as $item) {
$feed['item'][] = array(
'title' => feed_title($item),
'link' => feed_link($item),
'description' => feed_description($item),
'content' => feed_content($item),
'date' => feed_date($item)
);
}
}
return $feed;
}
function feed_title($xml)
{
return (string)$xml->title; //Atom・RSS2・RSS1
}
function feed_link($xml)
{
if (isset($xml->link)) {
foreach ($xml->link as $link) {
if ($link->attributes()->rel == 'alternate') { //Atom
return (string)$link->attributes()->href;
}
}
return (string)$xml->link; //RSS2・RSS1
} else {
return null;
}
}
function feed_description($xml)
{
if (isset($xml->summary)) { //Atom
return (string)$xml->summary;
} elseif (isset($xml->subtitle)) { //Atom
return (string)$xml->subtitle;
} elseif (isset($xml->tagline)) { //Atom
return (string)$xml->tagline;
} elseif (isset($xml->description)) { //RSS2・RSS1
return (string)$xml->description;
} else {
return null;
}
}
function feed_content($xml)
{
if (isset($xml->content)) { //Atom
return (string)$xml->content;
} elseif ($xml->children('http://purl.org/rss/1.0/modules/content/')->encoded) { //RSS1
return (string)$xml->children('http://purl.org/rss/1.0/modules/content/')->encoded;
} else {
return null;
}
}
function feed_date($xml)
{
if (isset($xml->created)) { //Atom
$date = (string)$xml->created;
} elseif ($xml->issued) { //Atom
$date = (string)$xml->issued;
} elseif ($xml->published) { //Atom
$date = (string)$xml->published;
} elseif (isset($xml->updated)) { //Atom
$date = (string)$xml->updated;
} elseif ($xml->modified) { //Atom
$date = (string)$xml->modified;
} elseif (isset($xml->pubDate)) { //RSS2
$date = (string)$xml->pubDate;
} elseif (isset($xml->lastBuildDate)) { //RSS2
$date = (string)$xml->lastBuildDate;
} elseif ($xml->children('http://purl.org/dc/elements/1.1/')->date) { //RSS1
$date = (string)$xml->children('http://purl.org/dc/elements/1.1/')->date;
} else {
$date = null;
}
return date('Y-m-d H:i:s', strtotime($date));
}
?>