Plagger::Plugin::Subscription::Toranoanaの通販対応版

とらナビだとその日の更新がおおむね一覧できることが判明したのでやってみた。あまりに数が多い場合はあふれるんだろーけど、20ページある日でも大丈夫そうなのでとりあえず。初出以降のPlaggerの新機能でもっとシンプルに書けるところがあったりしそうだな…。しかしとりあえず動いたので寝る。

Toranoana.pm

package Plagger::Plugin::Subscription::Toranoana;
use utf8;
use strict;
use warnings;

use base qw( Plagger::Plugin );

use Plagger::Util;
use Plagger::Feed;
use Plagger::Entry;

use URI;
use HTML::TreeBuilder::XPath;

sub register {
    my ($self, $context) = @_;
    $context->register_hook(
        $self,
        'subscription.load' => \&load,
    );
}

sub load {
    my($self, $context) = @_;

    my @tora_newka_shops = @{$self->conf->{shop}};
    
    for my $shop (@tora_newka_shops) {
        my $feed = Plagger::Feed->new;
        if ($shop eq 'mailorder') {
            $feed->aggregator( sub { $self->aggregate_mailorder($context); });
        }
        else {
            $feed->aggregator( sub { $self->aggregate($context, $shop); });
        }
        $context->subscription->add($feed);
    }
}

sub aggregate {
    my($self, $context, $shop) = @_;
    
    my @t = localtime(time);
    my $day = $self->conf->{day} || sprintf("%02d%02d", $t[4]+1, $t[3]);
    
    my $booknodes = $self->_uri2nodes(
        "http://www.toranoana.jp/shop/newka/${day}/${shop}_all.html",
        "//body/div[3]/table"
    );
    
    my $feed = Plagger::Feed->new;
    $feed->title("Toranoana\@$shop: $day");
    for my $book (@$booknodes) {
        $self->_add_entry($context, $feed, {
            html => $book,
            mailorder => 0
        });
    }
    $context->update->add($feed);
}

sub aggregate_mailorder {
    my($self, $context) = @_;
    
    my @t = localtime(time);
    my $day = $self->conf->{day} || sprintf("%02d%02d", $t[4]+1, $t[3]);
    my $mon = substr($day, 0, 2);
    my $year = 1900 + $t[5];
    
    my $booknodes = $self->_uri2nodes(
        "http://www.toranoana.jp/cgi-bin/navi.cgi\?nvl=/mailorder/cot/newly/$year/$mon/$year$day.nvl\&n=1",
        "//table"
    );
    
    my $feed = Plagger::Feed->new;
    $feed->title("Toranoana mailorder: $year$day");
    for my $book (@$booknodes) {
        $self->_add_entry($context, $feed, {
            html => $book,
            mailorder => 1
        });
    }
    $context->update->add($feed);
}

sub _uri2nodes {
    my ($self, $url, $xpath) = @_;
    
    my $data = Plagger::Util::load_uri(URI->new($url));
    
    my $tree = HTML::TreeBuilder::XPath->new;
    $tree->parse($data);
    
    my $booknodes;
    eval {
        $booknodes = $tree->findnodes($xpath);
    } or die $@;
    
    return $booknodes;
}

sub _add_entry {
    my($self, $context, $feed, $hash) = @_;
    
    if (my $book_ref = ($hash->{mailorder}) ? $self->_extract_mailorder($context, $hash->{html})
                                            : $self->_extract($context, $hash->{html})) {
        my $entry = Plagger::Entry->new;
        while (my ($key, $value) = each %$book_ref) {
            $entry->{$key} = $value;
        }
        $feed->add_entry($entry);
    }
}

sub _extract {
    my($self, $context, $book) = @_;
    
    my $html = $book->as_HTML('<>&');
    
    if (my @table = ($html =~ m|<td[^>]*>([^<]*)</td>|g)) {
        my $book_id = $table[0];
        
        if ($self->_is_yaoi($html, 'color\: \#FF99FF')) {
            $context->log(info => "$book_id is yaoi");
            return;
        }
        
        my $dir = join('/', ($book_id =~ /^(\d{2})(\d{4})(\d{2})(\d{2})/)) . $book_id;
        my $link = "http://www.toranoana.jp/mailorder/article/$dir.html";

        my $result = {
            title => $table[2],
            link => $link,
            body => _gen_body($dir, $link),
            author => $table[1],
            tags => $table[3]
        };
        
        return $result;
    }
}

sub _extract_mailorder {
    my($self, $context, $book) = @_;
    
    my $html = $book->as_HTML('<>&');
    
    $html =~ m|(\d{2}/\d{4}/\d{2}/\d{2}/\d{12})|m;
    my $dir = $1;
    
    if ($dir) {
        if ($self->_is_yaoi($html, 'navi_icon_bl\.gif')) {
            $context->log(info => "$dir is yaoi");
            return;
        }
        
        my $link = "http://www.toranoana.jp/mailorder/article/$dir.html";
        
        $html =~ /\>([^\<]+)\<\/a/m;
        
        my $result = {
            title => $1,
            link => $link,
            body => _gen_body($dir, $link)
        };
        
        if ($html =~ /<td class\=\"td_block_no_color\">\s+([^<]*)<.*<td class\=\"td_block_color\">\s+([^<]*)</m) {
            $result->{tags} = $1;
            $result->{author} = $2;
        }
        
        return $result;
    }
}

sub _is_yaoi {
    my ($self, $target, $regexp) = @_;
    if (
        ($target =~ /$regexp/)
        and ($self->conf->{yaoi_block})
        )
    {
        return 1;
    }
    
    return undef;
}

sub _gen_body {
    my ($dir, $link) = @_;
    
    my $result =  join '', map {
        "<img src=\"http://img.toranoana.jp/img/${dir}-${_}.gif\" />"
    } (1..3);
    $result = "<a href=\"$link\">$result</a>";
    
    return $result;
}

1;

config.yaml

plugins:
  - module: Subscription::Toranoana
    config:
      shop:
        - mailorder
      yaoi_block: 1

  - module: Publish::Gmail

ガメシャ