#!/usr/bin/perl -w use strict; use XML::RSS; use DBI; use HTML::TokeParser; use LWP::Simple; my ($DSN) = "DBI:mysql:xparnasse_2:mysql.parnasse.com"; my ($DBUSER) = "xparnasse"; my ($DBPASS) = "damask235"; my (@news); my ($DB); my ($qry); my ($today); use POSIX qw(strftime); $today = strftime "%a, %e %b %Y %H:%M:%S", gmtime; my $rss = XML::RSS->new( version => '0.91' ); $rss->channel( title => "Classical MP3 Portal Music News", link => "http://parnasse.com/ca/html", language => "en-us", copyright => "Copyright 2003, Jeffrey Harrington", pubDate => $today . " GMT", lastBuildDate => $today . " GMT", managingEditor => "editor\@parnasse.com", webMaster => "webmaster\@parnasse.com", category => "Classical Music News", description => "News for the classical music community including news about chamber and orchestral music and opera. Classical Music News Provided by The Classical MP3 Portal."); my $url = "http://news.scena.org/WEBNEWS/WEBNEWS.ASP?LAN=2"; my $rawHTML = get($url); # attempt to d/l the page to mem die "LWP::Simple messed up $!" unless ($rawHTML); my $tp; $tp = HTML::TokeParser->new(\$rawHTML) or die "WTF $tp gone bad: $!"; while (my $token = $tp->get_tag("td")) { my $atoken = $tp->get_tag("a"); my $url = $atoken->[1]{href} || "-"; $url =~ /lnk=(http.*$)/; my $urlString = $1; my $title = $tp->get_trimmed_text("/a"); #([\w\W\s]*) $title =~ /[a-zA-Z0-9\s]*/; my $description = $tp->get_trimmed_text("/td"); $description =~ /^(, )?([\w\W\s]*)/; if (defined $title && defined $2 && defined $urlString) { if ($title ne "" && $2 ne "" && $urlString ne "") { if ("http://" eq substr( $urlString, 0, 7)) { print $urlString . "\n"; $rss->add_item( title => $title, description => $2, link => $urlString); } } } } $rss->save("/big/dom/xparnasse/www/ca/html/classicalmusic.rss"); exit(0);