#!/usr/bin/perl
# OPML Burner
use strict;
use lib "./lib/";
use Encode;
use CGI::Carp qw(fatalsToBrowser);
use CGI qw(escapeHTML unescapeHTML);
use Cache::FileCache;
use LWP::Simple qw($ua);
my $debug = 0;
my $version = '0.01';
my $q = CGI->new;
my %lwp_cache_opt = (
'namespace' => 'lwp-cache',
'cache_root' => ".cache",
'default_expires_in' => 60*60*24 );
use Data::Dumper;
$ua->agent(qq{OPML Burner/$version (http://la.ma.la/opmlburner/)});
#print Dumper$ua;
my %format = (
livedoor => 'http://reader.livedoor.com/user/%s/opml',
bloglines => 'http://www.bloglines.com/export?id=%s',
hatena => 'http://r.hatena.ne.jp/%s/opml',
);
my $input = get_input();
$input = {
livedoor => [qw(mala__ cho45 nipotan brasil_ higeorange tokyoenvious)],
bloglines => [qw(horaguchi)],
hatena => [qw(higepon naoya)],
} if $debug;
my $threshold = int $q->param("threshold") || 0;
$threshold = 5 if $debug;
sub get_input{
my $input = {};
for(qw(livedoor bloglines hatena)){
$input->{$_} = anzen_ni_suru($q->param($_));
}
return $input;
}
sub anzen_ni_suru{
return [] unless defined $_[0];
my @ids = split(/\s+/, $_[0]);
@ids = grep {m/\w+/} @ids;
return \@ids;
}
my %count_for_url;
my %url2obj;
while(my($key,$value) = each %{$input}){
for my $id(@{$value}){
$id = "$key:$id";
my $opml = get_opml("$id");
my @subs = opml2obj($opml, $id);
for(@subs){
my $url = $_->{feedlink};
$count_for_url{$url}++;
if(!exists $url2obj{$url}){
$url2obj{$url} = $_;
$_->{subscribers} = [];
}
push @{$url2obj{$url}->{subscribers}}, $id;
}
}
}
my @feedlinks = sort {
$count_for_url{$b} - $count_for_url{$a}
} keys %url2obj;
my @hot_feed = grep {$count_for_url{$_} >= $threshold} @feedlinks;
#for(@feedlinks){
# if($count_for_url{$_} > 4){
# print Dumper $url2obj{$_}
# }
#}
print $q->header(
-type=>'text/xml',
-charset=>'utf-8',
);
print output_opml(@hot_feed);
sub opml2obj{
my ($opml,$id) = @_;
$opml = Encode::decode_utf8($opml);
$opml = Encode::encode_utf8($opml);
$opml =~s/\n//g;
$opml =~s{>}{>\n}g;
my @lines = split("\n", $opml);
my @result;
my $i=0;
for(@lines){
next unless m{type="rss"};
$i++;
my $hash = {
title => do{m{title="(.*?)"}; utf8::decode($1); $1||""},
link => m{htmlUrl="(.*?)"},
feedlink => m{xmlUrl="(.*?)"},
};
push @result, $hash;
#print Dumper @result if $i==1;
#print Dumper $_ if $i==1;
}
return @result;
}
sub get_opml{
my($service, $id) = split(":",$_[0]);
return unless exists $format{$service};
my $url = sprintf($format{$service} ,$id);
my $cache = Cache::FileCache->new(\%lwp_cache_opt);
my $obj = $cache->get($url);
return $obj if defined $obj;
my $res = LWP::Simple::get($url);
$cache->set($url, $res);
return $res;
}
sub output_opml{
my @feedlinks = @_;
sub html_escape{
my $str = shift;
utf8::decode($str);
$str = unescapeHTML($str);
$str = escapeHTML($str);
utf8::encode($str);
$str;
}
sub format_item{
my $obj = shift;
my $title = html_escape($obj->{title});
# print $title;
my $link = html_escape($obj->{link});
my $feedlink = html_escape($obj->{feedlink});
return qq{};
}
my $curr;
my $old;
my @buf;
for (@feedlinks){
$curr = $count_for_url{$_};
if(defined $old && $old ne $curr){push @buf, qq{}}
if($old ne $curr){push @buf, qq{}}
push @buf, format_item($url2obj{$_});
$old = $curr;
}
if(defined $old){push @buf, qq{}}
my $loop = join("\n", @buf);
my $head = <<"_TMPL_";
generated by opml burner
_TMPL_
my $foot = <<"_TMPL_";
_TMPL_
return $head.$loop.$foot;
}