Pixivのタイトル、タグ検索から一定数以上のブックマークがある画像だけを保存するスクリプト2
ページ取得処理を明確に分離しました。
- 構成物
- pixiv.rb
- main.rb
- config.yaml
pixiv.rb
#! ruby -Ku require 'net/http' require 'uri' require 'kconv' class AccessPixiv attr_reader :total_images, :total_pages #InitializeでCookieを取得しHeaderも構築する #成功:Trueを返す #失敗:Falseを返す def initialize(pixiv_id, pixiv_pass, user_agent, referer) get_cookie(pixiv_id, pixiv_pass, user_agent, referer) end #uriで指定されたページ内容を取得する #成功:対象URIの内容を返す #失敗:Falseを返す def get_file(uri) sleep 2 * (rand(5) + 1) puts "get #{uri}" site = URI.parse(uri) Net::HTTP.start(site.host, 80) do |http| response = http.get(site.request_uri, @header) if disp_error(response) == true return response.body else return false end end end #word : 検索語 #s_mode : 検索モード #page_num : 取得するページ def search(word, s_mode, page_num) page = get_file("http://www.pixiv.net/search.php?word=#{URI.encode(word)}&s_mode=#{s_mode}&p=#{page_num}") /検索結果:(\d+)件/ =~ page @total_images = $1.to_i @total_pages = @total_images / 20 + 1 return page end #タグ検索 def search_tag(word, page_num=1) search(word, 's_tag', page_num) end #タイトル・キャプション検索 def search_title(word, page_num=1) search(word, 's_tc', page_num) end #ファイル保存 def save_file(data, filename) puts "save #{filename}" open(filename, 'wb') do |f| f.puts data end end #ディレクトリ作成 def make_dir(name) if /mswin(?!ce)|mingw|cygwin|bccwin/ =~ RUBY_PLATFORM.downcase save_dir = "./#{name.tosjis}" else save_dir = "./#{name}" end if File.exist?(save_dir) puts "Directory exist" else puts "Create Directory" Dir.mkdir(save_dir) end return save_dir end private #Cookie取得 def get_cookie(pixiv_id, pixiv_pass, user_agent, referer) Net::HTTP.start('www.pixiv.net', 80) do |http| response = http.post('/index.php', "mode=login&pixiv_id=#{pixiv_id}&pass=#{pixiv_pass}", 'User-Agent' => user_agent ) if disp_error(response) == true cookie = response['Set-Cookie'].split(',') @header = { 'User-Agent' => user_agent, 'Referer' => referer, 'Cookie' => cookie[1] } return true else return false end end end #Error表示 def disp_error(response) case response when Net::HTTPBadRequest puts 'Error 400' when Net::HTTPUnauthorized puts 'Error 401' when Net::HTTPForbidden puts 'Error 403' when Net::HTTPNotFound puts 'Error 404' when Net::HTTPInternalServerError puts 'Error 500' when Net::HTTPServiceUnavailable puts 'Error 503' else return true end return false end end
main.rb
#! ruby -Ku require 'yaml' require 'pixiv' CONFIG_FILE = ARGV[0] SEARCH_WORD = ARGV[1].toutf8 THRESHOLD_USERS = ARGV[2].to_i START_PAGE = 1 config = YAML::load_file(CONFIG_FILE) pixiv = AccessPixiv.new(config['pixiv']['id'], config['pixiv']['pass'], config['pixiv']['user_agent'], config['pixiv']['referer']) save_dir = pixiv.make_dir(SEARCH_WORD) pixiv.search_tag(SEARCH_WORD) for page_num in START_PAGE..pixiv.total_pages puts "Total Images: #{pixiv.total_images}" puts "Total Pages : #{pixiv.total_pages}" page = pixiv.search_tag(SEARCH_WORD, page_num) page.scan(/"(http:\/\/.+\.pixiv\.net\/img\/.+\/(\d+_s\..{3}))".+?\s.+?\s.+?(\d+) users/) do |uri, id, user| if user.to_i >= THRESHOLD_USERS uri.sub!(/_s/, ''); id.sub!(/_s/, '') if data = pixiv.get_file(uri) pixiv.save_file(data, "#{save_dir}/#{user}_#{id}") end end end end puts 'Done!'
config.yaml
pixiv: id : 'UserID' pass: 'PassWord' user_agent: 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' referer: 'http://www.pixiv.net/mypage.php'