#!/usr/bin/perl use strict; use XML::RSS::Parser; use RPC::XML; use RPC::XML::Client; use Date::Manip; use LWP::Simple; use DBI;
« Broken Lawnmower | Main | Tech Podcasting Has Jumped The Shark »
April 03, 2006
Turning RSS Feeds in to Movable Type Entries
A while ago, I created the site PsychicProgrammer.com as a place to gather up programming-related stories from various corners of the Internet. I whipped up some code to automatically gather RSS feeds from various programming-related websites and pull the stories to turn them into Movable Type postings. I've had a few people ask how I did this, so I thought I'd post the code with some explanation of what's going on.XML::RSS::Parser - a great module for dealing with RSS feeds.
RPC::XML - used to communicate with Movable Type via RPC.
LWP::Simple - used to retreive the RSS feed document.
my @info=({'url' => "http://www.oreillynet.com/pub/feed/16?format=rss2",
'name' => "Perl.com",
'category' => "Perl.com",
'datetype' => 2},
{'url' => "http://www.digg.com/rss/indexprogramming.xml",
'name' => "Digg.com",
'category' => "Digg.com",
'datetype' => 1},
{'url' => "http://www.oreillynet.com/pub/feed/20?format=rss2",
'name' => "Xml.com",
'category' => "Xml.com",
'datetype' => 2},
{'url' => "http://www.dotnetjunkies.com/WebLog/saasheim/rss.aspx",
'name' => "Steinar Aasheim's Blog",
'category' => "Steinar Aasheim's Blog",
'datetype' => 1},
{'url' => "http://tomcopeland.blogs.com/juniordeveloper/rss.xml",
'name' => "Junior Developer",
'category' => "Junior Developer",
'datetype' => 1},
{'url' => "http://programming.newsforge.com/programming.rss",
'name' => "Newsforge.com",
'category' => "Newsforge.com",
'datetype' => 2});
my $username='user'; my $password='password'; my %category; my $i; my $dbh; my $sth; my $q; my $seencount; my $feed; my $site; my $xmldoc;
# Set up database connection
$dbh=DBI->connect("dbi:Pg:dbname=p","psy","password") or die "Can't open database";
# Set up XML-RPC interface
my $cli=RPC::XML::Client->new('http://www.psychicprogrammer.com/mt/mt-xmlrpc.cgi');
# Set up XML parser
my $p=new XML::RSS::Parser;
# Get category list
my $req=RPC::XML::request->new('mt.getCategoryList','1',$username,$password);
my $resp=$cli->simple_request($req);
foreach $i (@$resp)
{
$category{$i->{categoryName}}=$i->{categoryId};
}
if($DEBUG)
{
foreach $i (%category)
{
printf("$category{$i} $i\n");
}
}
foreach $site(@info)
{
printf("*** Processing for site %s\n\n",$site->{'name'}) if $DEBUG;
$xmldoc=get $site->{'url'};
$feed=$p->parse($xmldoc);
foreach my $i ( $feed->query('//item') )
{
my $datenode;
my $date;
my $titlenode = $i->query('title');
my $linknode = $i->query('link');
my $descnode = $i->query('description');
if(($site->{'datetype'})==1)
{
$datenode = $i->query('pubDate');
$date=UnixDate($datenode->text_content,"%Y-%m-%dT%H:%M:%S");
}
if(($site->{'datetype'})==2)
{
$datenode = $i->query('dc:date');
$date=UnixDate($datenode->text_content,"%Y-%m-%dT%H:%M:%S");
}
my $dd = $descnode->text_content .
"<br>Link: <a href=\"" . $linknode->text_content . "\">" .
$linknode->text_content . "</a>";
# Check to see if we've seen this one yet
$q="SELECT count(source) FROM seen WHERE index=" . $dbh->quote($linknode->text_content);
$sth=$dbh->prepare($q);
$sth->execute();
($seencount)=$sth->fetchrow();
$sth->finish();
if($seencount==0)
{
# Post article
printf("Posting %s\n",$titlenode->text_content) if $DEBUG;
my $req=RPC::XML::request->new('metaWeblog.newPost',
'1',
$username,
$password,
RPC::XML::struct->new(
'title' => RPC::XML::string->new($titlenode->text_content),
'description' => RPC::XML::string->new($dd),
'dateCreated' => RPC::XML::string->new($date),
'mt_tb_ping_urls' => RPC::XML::array->new(
$linknode->text_content)
),
RPC::XML::boolean->new(1)
);
my $resp=$cli->simple_request($req);
# Change category
$req=RPC::XML::request->new('mt.setPostCategories',
$resp,
$username,
$password,
RPC::XML::array->new(
RPC::XML::struct->new(
'categoryId' => $category{$site->{'category'}},
'isPrimary' => RPC::XML::boolean->new(1)
)
)
);
$resp=$cli->simple_request($req);
$q="INSERT INTO seen (index, source) VALUES (" . $dbh->quote($linknode->text_content) .
", " . $dbh->quote($site->{'name'}) . ")";
$sth=$dbh->prepare($q);
$sth->execute();
$sth->finish();
}
}
}
# Close database
$dbh->disconnect();
That's it! I run this code from a crontab entry every hour or so. As soon as new articles are discovered in the RSS feeds, they will be magically turned into postings on a Movable Type blog, thanks to the wonders of XML-RPC.
I'd appreciate any comments or feedback if you decide to use this code in your own projects. Have fun!
Tags: perl | xml | xml-rpc | rpc | rss |
Posted by Ian at April 3, 2006 02:21 PM
Trackback Pings
TrackBack URL for this entry:
http://www.broadbandpig.com/mt-tb.cgi/70
Comments
This is really interesting. It could be useful for a blog that aggregates other blogs written by one author; that's what interests me about it. I wondered if this would work when you publish more than one blog with your MT installation. Sorry, I'm really a novice at perl.
Posted by: Terry Heath at April 7, 2006 06:52 PM
Terry, it certainly can be used to aggregate a bunch of MT blogs. Just point the code to the RSS page of each of your blogs. Just make sure you don't point to the RSS feed of the blog you're creating with other feeds, otherwise you'll have an endless loop.
Posted by: Ian McLaughlin at April 8, 2006 08:21 AM