リファクタリングした
d:id:sawat:20060607 「RSSから条件に一致するエントリーだけを抽出するフィルター」の続き
#! ruby require 'rss/1.0' require 'rss/content' require 'rss/trackback' require 'rss/dublincore' require 'rss/syndication' require 'rss/maker' class Grfeed attr :rss def initialize @rss = nil @filters = [] end def parse(rss_source) @rss = RSS::Parser.parse(rss_source, false) end def restruct return nil unless @rss new_rss = RSS::Maker.make("1.0") do | maker | maker.channel.about = (@rss.channel.about || "none") maker.channel.title = (@rss.channel.title || "none") maker.channel.description = (@rss.channel.description || "none") maker.channel.link = @rss.channel.link maker.channel.dc_date = @rss.channel.dc_date if @rss.channel.dc_date if(@rss.image) maker.image.title = @rss.image.title maker.image.url = @rss.image.url end @rss.items.each do | it | next unless test(it) item = maker.items.new_item item.link = it.link item.title = it.title item.description = it.description item.content_encoded = it.content_encoded item.dc_creator = it.dc_creator item.dc_subject = it.dc_subject if(it.class.method_defined? :dc_subjects) it.dc_subjects.each do | subj | item.dc_subjects.new_subject.value=subj.value end end item.dc_date = it.dc_date end end @rss = new_rss end def add_filter(filter) @filters.push(filter) end def add_new_filter(tagname, pattern, inc, ignore=nil) self.add_filter Filter.new(tagname, pattern, inc, ignore=nil) end def test(item) @filters.all?{ |filter| filter.test(item) } end class Filter def initialize(tagname, pattern, inc, ignore=nil) @tagname = tagname @regexp = Regexp.new(pattern, ignore ? Regexp::IGNORECASE : 0) @include = inc end def test(item) text = item.method(@tagname).call if(item.class.method_defined?("#{@tagname}s")) text = item.method("#{@tagname}s").call.collect{ |tg| tg.value.to_s }.sort.join(" ") end !(@regexp =~ text) ^ @include end end end
主な改善点は、呼び出しIFの改善、メソッド名などの変更と、ruby 1.8.4/1.8.2コンパチ化、コピーしていなかったタグのコピーの追加。
呼び出し元CGIはこんな感じ
#!ruby require 'open-uri' require 'cgi' require 'grfeed.rb' cgi = CGI.new url = cgi['url'] url = "http://b.hatena.ne.jp/sawat/favorite?mode=rss" unless /^https?:.*$/ =~ url begin ignore_case = (cgi['i'] == 'on'); grfeed = Grfeed.new; grfeed.add_new_filter("title", cgi['tp'], cgi['ti'] != 'ex', ignore_case) unless(cgi['tp'].empty?) grfeed.add_new_filter("link", cgi['lp'], cgi['li'] != 'ex', ignore_case) unless(cgi['lp'].empty?) grfeed.add_new_filter("dc_subject", cgi['sp'], cgi['si'] != 'ex', ignore_case) unless(cgi['sp'].empty?) grfeed.add_new_filter("dc_creator", cgi['cp'], cgi['ci'] != 'ex', ignore_case) unless(cgi['cp'].empty?) grfeed.add_new_filter("description", cgi['dp'], cgi['di'] != 'ex', ignore_case) unless(cgi['dp'].empty?) open(url) do |f| grfeed.parse(f.read) grfeed.restruct raise "RSS解析エラー" if(grfeed.rss == nil) cgi.out({"status" => "OK", "type" => (f.content_type || "application/xml"), "charset" => (f.charset || "charset=utf-8") }) { grfeed.rss.to_s } end rescue cgi.out({ "status" => "200 OK", # "status" => "500 Internal Server Error", "type" => "text/html", "charset" => "charset=utf-8" }) do response = <<EOF <html><head><title>500 Internal Server Error</title></head> <body> <h1>500 Internal Server Error</h1> <h2>処理に失敗しました・・・。</h2> <p>#{CGI.escapeHTML($!.message)} </p></body> </html> EOF end end