Create  Edit  Diff  Phillro Industries  Index  Search  Changes  History  Source  RSS  Note  wikifarm  Login

ap-hiki-cache.rb

ap-hiki-cache.rb

   1|#!/bin/env ruby
   2|# ap-hiki-cache.rb - Apollo Hiki の src を cache する
   3|# author: Kazuhiro Yoshdia <moriq@moriq.com>
   4|
   5|require 'cgi'
   6|require 'nkf'
   7|require 'open-uri'
   8|require 'fileutils'
   9|
  10|@cache_dir = "ap-hiki-cache"
  11|
  12|FileUtils.mkdir_p @cache_dir
  13|
  14|def cache_filename(raw_page)
  15|  "#@cache_dir/#{raw_page}.hiki"
  16|end
  17|
  18|def get_src(raw_page)
  19|  uri = "http://wiki.fdiary.net/apollo/?c=src;p=#{raw_page}"
  20|  o = open(cache_filename(raw_page), "w")
  21|  open(uri) do |f|
  22|    11.times { f.gets }
  23|    while line = f.gets
  24|      if line =~ /</
  25|        o.print $`
  26|        break 
  27|      end
  28|      o.print NKF.nkf('-Es', CGI.unescapeHTML(line))
  29|    end
  30|  end
  31|  o.close
  32|  nil
  33|end
  34|
  35|def get_cache_mtime(raw_page)
  36|  File.stat(cache_filename(raw_page)).mtime rescue nil
  37|end
  38|
  39|@index_uri = "http://wiki.fdiary.net/apollo/?c=index"
  40|@index_cache_filename = "ap-hiki-index.html"
  41|
  42|@rss_uri = "http://wiki.fdiary.net/apollo/?c=rss"
  43|
  44|def setup_dc_date
  45|  print "RSS を読んでいます… "
  46|  dc_date_text = nil
  47|  open(@rss_uri) do |f|
  48|    while line = f.gets
  49|      if line =~ %r[<dc:date>(.*?)</dc:date>]n
  50|        dc_date_text = $1
  51|        break
  52|      end
  53|    end
  54|  end
  55|  if dc_date_text.nil?
  56|    raise "dc:date cannot found"
  57|  end
  58|  @dc_date = Time.gm(*dc_date_text.unpack("A4xA2xA2xA2xA2xA2"))
  59|  puts "完了。"
  60|  @dc_date
  61|end
  62|
  63|setup_dc_date
  64|
  65|def update_index_file
  66|  print "index file を更新しています… "
  67|  open(@index_cache_filename, "w") do |o|
  68|    open(@index_uri) do |f|
  69|      while line = f.gets
  70|        o.print NKF.nkf('-Es', line)
  71|      end
  72|    end
  73|  end
  74|  puts "完了。"
  75|  nil
  76|end
  77|
  78|def get_index_mtime
  79|  File.stat(@index_cache_filename).mtime rescue nil
  80|end
  81|
  82|index_mtime = get_index_mtime
  83|
  84|if index_mtime.nil? || index_mtime < @dc_date
  85|  update_index_file
  86|else
  87|  puts "index file は更新済みです。"
  88|end
  89|
  90|def parse_li_datetimestr(datetimestr)
  91|  datetimestr =~ /\((.*?)\)/n
  92|  date, wday, time = $`, $1, $'
  93|  year, mon, day = date.unpack('A4xA2xA2')
  94|  hour, min, sec = time.unpack('A2xA2xA2')
  95|  datetime = Time.local(year, mon, day, hour, min, sec)
  96|  datetime
  97|end
  98|
  99|def index_cache_each_page
 100|  li_line = nil
 101|  open(@index_cache_filename) do |f|
 102|    33.times{ f.gets }
 103|    li_line = f.gets
 104|  end
 105|  unless li_line =~ /^ *<li>/
 106|    File.delete @index_cache_filename
 107|    raise "index file has broken. deleted."
 108|  end
 109|  li_line.scan(%r[<li>(.*?)</li>]) do |li_node_text, |
 110|    li_node_text =~ %r[<a href="(.*?)">(.*?)</a>: (.*)]n
 111|    href, caption, datetimestr = $1, $2, $3
 112|    href =~ %r[\./\?(.*)]
 113|    raw_page = $1||'FrontPage'
 114|    datetime = parse_li_datetimestr(datetimestr)
 115|    yield raw_page, caption, datetime
 116|  end
 117|end
 118|
 119|index_cache_each_page do |raw_page, caption, datetime|
 120|  cache_mtime = get_cache_mtime(raw_page)
 121|  if cache_mtime.nil? || cache_mtime < datetime
 122|    cmd = cache_mtime.nil? ? 'create' : 'update'
 123|    page = NKF.nkf('-Es', CGI.unescape(raw_page))
 124|    puts "#{cmd} #{page}"
 125|    get_src(raw_page)
 126|  end
 127|end
 128|
 129|puts "更新完了。"

Last modified:2004/12/07 12:20:36
Keyword(s):
References:[SupportScripts]