Advertisement
sinulucky

task scrape

Feb 7th, 2017
431
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Rails 1.64 KB | None | 0 0
  1. namespace :scraper do
  2.   desc "Fetch the data from scraper"
  3.   task scrape: :environment do
  4.     require 'nokogiri'
  5. require 'open-uri'
  6.  
  7.  
  8. urltext = 'http://oploverz.in/page/%d'
  9.  
  10. links = []
  11. (1..3).each do |i|
  12.     page = Nokogiri::HTML(open(urltext % i))
  13.     page.css('.dtl h2 a[href]').each do |line|
  14.         links << line.attr(:href)
  15.     end
  16. end
  17.  
  18. names = []
  19. links.each do |link|
  20.   page2 = Nokogiri::HTML(open(links))
  21.   page2.css('.dtl h1').each do |name|
  22.     names << name.text.strip
  23.   end
  24. end
  25.  
  26. sinops = []
  27. links.each do |link|
  28.   page3 = Nokogiri::HTML(open(links))
  29.   page3.css('.sinop p').each do |sinop|
  30.     sinops << sinop.text.strip
  31.   end
  32. end
  33.  
  34.  
  35. smalls = []
  36. links.each do |link|
  37.   page4 = Nokogiri::HTML(open(links))
  38.   link_1 = page4.css('.soraddl.op-download')[0]
  39.   list_d = link_1.css('.soraurl.list-download')[0]
  40.   linkb = list_d.css('span strong a[href]')[0..3].each do |small|
  41.     smalls << small.attr(:href).strip.split
  42.   end
  43. end
  44.  
  45. oploverzs = []
  46.  
  47. mediums = []
  48. links.each do |link|
  49.   page5 = Nokogiri::HTML(open(links))
  50.   link_2 = page5.css('.soraddl.op-download')[0]
  51.   lista = link_2.css('.soraurl.list-download')[1]
  52.   linkb = lista.css('span strong a[href]')[4..7].each do |medium|
  53.     mediums << medium.attr(:href).strip.split
  54.   end
  55. end
  56.  
  57. oploverzs << {
  58.   title: names,
  59.   sinops: sinops,
  60.   small: smalls,
  61.   medium: mediums
  62. }
  63.  
  64. oploverzs.each do |oploverz|
  65.   @post = Post.new
  66.   @post.title = oploverz[names]
  67.   @post.sinops = oploverz[sinops]
  68.   @post.small = oploverz[smalls]
  69.   @post.medium = oploverz[mediums]
  70.  
  71.   @post.save
  72. end
  73.   end
  74.  
  75.   desc "TODO"
  76.   task destroy_all_posts: :environment do
  77.   end
  78. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement