#!/usr/bin/perl # OPML Burner use strict; use lib "./lib/"; use Encode; use CGI::Carp qw(fatalsToBrowser); use CGI qw(escapeHTML unescapeHTML); use Cache::FileCache; use LWP::Simple qw($ua); my $debug = 0; my $version = '0.01'; my $q = CGI->new; my %lwp_cache_opt = ( 'namespace' => 'lwp-cache', 'cache_root' => ".cache", 'default_expires_in' => 60*60*24 ); use Data::Dumper; $ua->agent(qq{OPML Burner/$version (http://la.ma.la/opmlburner/)}); #print Dumper$ua; my %format = ( livedoor => 'http://reader.livedoor.com/user/%s/opml', bloglines => 'http://www.bloglines.com/export?id=%s', hatena => 'http://r.hatena.ne.jp/%s/opml', ); my $input = get_input(); $input = { livedoor => [qw(mala__ cho45 nipotan brasil_ higeorange tokyoenvious)], bloglines => [qw(horaguchi)], hatena => [qw(higepon naoya)], } if $debug; my $threshold = int $q->param("threshold") || 0; $threshold = 5 if $debug; sub get_input{ my $input = {}; for(qw(livedoor bloglines hatena)){ $input->{$_} = anzen_ni_suru($q->param($_)); } return $input; } sub anzen_ni_suru{ return [] unless defined $_[0]; my @ids = split(/\s+/, $_[0]); @ids = grep {m/\w+/} @ids; return \@ids; } my %count_for_url; my %url2obj; while(my($key,$value) = each %{$input}){ for my $id(@{$value}){ $id = "$key:$id"; my $opml = get_opml("$id"); my @subs = opml2obj($opml, $id); for(@subs){ my $url = $_->{feedlink}; $count_for_url{$url}++; if(!exists $url2obj{$url}){ $url2obj{$url} = $_; $_->{subscribers} = []; } push @{$url2obj{$url}->{subscribers}}, $id; } } } my @feedlinks = sort { $count_for_url{$b} - $count_for_url{$a} } keys %url2obj; my @hot_feed = grep {$count_for_url{$_} >= $threshold} @feedlinks; #for(@feedlinks){ # if($count_for_url{$_} > 4){ # print Dumper $url2obj{$_} # } #} print $q->header( -type=>'text/xml', -charset=>'utf-8', ); print output_opml(@hot_feed); sub opml2obj{ my ($opml,$id) = @_; $opml = Encode::decode_utf8($opml); $opml = Encode::encode_utf8($opml); $opml =~s/\n//g; $opml =~s{>}{>\n}g; my @lines = split("\n", $opml); my @result; my $i=0; for(@lines){ next unless m{type="rss"}; $i++; my $hash = { title => do{m{title="(.*?)"}; utf8::decode($1); $1||""}, link => m{htmlUrl="(.*?)"}, feedlink => m{xmlUrl="(.*?)"}, }; push @result, $hash; #print Dumper @result if $i==1; #print Dumper $_ if $i==1; } return @result; } sub get_opml{ my($service, $id) = split(":",$_[0]); return unless exists $format{$service}; my $url = sprintf($format{$service} ,$id); my $cache = Cache::FileCache->new(\%lwp_cache_opt); my $obj = $cache->get($url); return $obj if defined $obj; my $res = LWP::Simple::get($url); $cache->set($url, $res); return $res; } sub output_opml{ my @feedlinks = @_; sub html_escape{ my $str = shift; utf8::decode($str); $str = unescapeHTML($str); $str = escapeHTML($str); utf8::encode($str); $str; } sub format_item{ my $obj = shift; my $title = html_escape($obj->{title}); # print $title; my $link = html_escape($obj->{link}); my $feedlink = html_escape($obj->{feedlink}); return qq{}; } my $curr; my $old; my @buf; for (@feedlinks){ $curr = $count_for_url{$_}; if(defined $old && $old ne $curr){push @buf, qq{}} if($old ne $curr){push @buf, qq{}} push @buf, format_item($url2obj{$_}); $old = $curr; } if(defined $old){push @buf, qq{}} my $loop = join("\n", @buf); my $head = <<"_TMPL_"; generated by opml burner _TMPL_ my $foot = <<"_TMPL_"; _TMPL_ return $head.$loop.$foot; }