リファクタリングした

d:id:sawat:20060607RSSから条件に一致するエントリーだけを抽出するフィルター」の続き

#! ruby 
 
require 'rss/1.0'
require 'rss/content'
require 'rss/trackback'
require 'rss/dublincore'
require 'rss/syndication'
require 'rss/maker'
 
class Grfeed
  attr :rss

  def initialize
    @rss = nil
    @filters = []
  end
  
  def parse(rss_source)
    @rss = RSS::Parser.parse(rss_source, false)
  end

  def restruct
    return nil unless @rss
    
    new_rss = RSS::Maker.make("1.0") do | maker |
      maker.channel.about = (@rss.channel.about || "none")
      maker.channel.title = (@rss.channel.title || "none")
      maker.channel.description = (@rss.channel.description || "none")
      maker.channel.link = @rss.channel.link
      maker.channel.dc_date = @rss.channel.dc_date if @rss.channel.dc_date
      
      if(@rss.image) 
        maker.image.title = @rss.image.title
        maker.image.url = @rss.image.url
      end
 
      @rss.items.each do | it |
        next unless test(it)
        
        item = maker.items.new_item
        item.link = it.link
        item.title = it.title
        item.description = it.description
        item.content_encoded = it.content_encoded
        item.dc_creator = it.dc_creator
        item.dc_subject = it.dc_subject
        if(it.class.method_defined? :dc_subjects)
          it.dc_subjects.each do | subj |
            item.dc_subjects.new_subject.value=subj.value
          end
        end
        item.dc_date = it.dc_date
      end
    end
    @rss = new_rss
  end
  
  def add_filter(filter) 
    @filters.push(filter)
  end
  def add_new_filter(tagname, pattern, inc, ignore=nil)
    self.add_filter Filter.new(tagname, pattern, inc, ignore=nil)
  end
  
  def test(item)
    @filters.all?{ |filter| filter.test(item) }
  end
  
  class Filter
    def initialize(tagname, pattern, inc, ignore=nil)
      @tagname = tagname
      @regexp = Regexp.new(pattern, ignore ? Regexp::IGNORECASE  : 0)
      @include = inc
    end
    def test(item)
      text = item.method(@tagname).call
      if(item.class.method_defined?("#{@tagname}s"))
        text = item.method("#{@tagname}s").call.collect{ |tg| tg.value.to_s }.sort.join(" ")
      end
      !(@regexp =~ text) ^ @include
    end
  end
end 

主な改善点は、呼び出しIFの改善、メソッド名などの変更と、ruby 1.8.4/1.8.2コンパチ化、コピーしていなかったタグのコピーの追加。


呼び出し元CGIはこんな感じ

#!ruby
 
require 'open-uri'
require 'cgi'
require 'grfeed.rb'
 
cgi = CGI.new
 
url = cgi['url']
url = "http://b.hatena.ne.jp/sawat/favorite?mode=rss" unless /^https?:.*$/ =~ url

begin
  ignore_case = (cgi['i'] == 'on');
  grfeed = Grfeed.new;
  grfeed.add_new_filter("title", cgi['tp'], cgi['ti'] != 'ex', ignore_case) unless(cgi['tp'].empty?)
  grfeed.add_new_filter("link",  cgi['lp'], cgi['li'] != 'ex', ignore_case) unless(cgi['lp'].empty?)
  grfeed.add_new_filter("dc_subject",  cgi['sp'], cgi['si'] != 'ex', ignore_case) unless(cgi['sp'].empty?)
  grfeed.add_new_filter("dc_creator",  cgi['cp'], cgi['ci'] != 'ex', ignore_case) unless(cgi['cp'].empty?)
  grfeed.add_new_filter("description", cgi['dp'], cgi['di'] != 'ex', ignore_case) unless(cgi['dp'].empty?)

  open(url) do |f| 
    grfeed.parse(f.read)
    grfeed.restruct

    raise "RSS解析エラー" if(grfeed.rss == nil) 
      
    cgi.out({"status" => "OK",
             "type" => (f.content_type || "application/xml"),
             "charset" => (f.charset || "charset=utf-8") }) { grfeed.rss.to_s }
  end 
rescue
  cgi.out({
            "status" => "200 OK",
#            "status" => "500 Internal Server Error",
            "type" => "text/html",
            "charset" => "charset=utf-8"
      }) do 
response = <<EOF
<html><head><title>500 Internal Server Error</title></head>
<body>
<h1>500 Internal Server Error</h1>
<h2>処理に失敗しました・・・。</h2>
<p>#{CGI.escapeHTML($!.message)}
</p></body>
</html>
EOF
  end
end