Advertisement
hocikto19

dubravy parser

Sep 8th, 2016
358
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 3.35 KB | None | 0 0
  1. require 'rubygems'
  2. require 'nokogiri'
  3. require 'open-uri'
  4.  
  5. class ParsingRobots
  6.   #returns array of hashes {:apartment =>, :project_property =>, :outside_area =>, :attachments =>}
  7.   def parse_dubravy_apartments(url, block)
  8.     doc = Nokogiri::HTML open(url)
  9.     apartments = []
  10.     urls=[]
  11.  
  12.     #download links to estates for block given in argument
  13.     doc.css('table.sortable')[4].css('tr').each do |tr|
  14.       id = tr.css('td')[1]
  15.       urls.push "http://dubravy.eu/jednoizbovy-byt/#{id.content}" unless id.nil? or id.content[0]!=block
  16.     end
  17.  
  18.     #parsing concrete estates
  19.     urls.each do |url|
  20.       doc = Nokogiri::HTML open(url)
  21.       #f name is fully deserved
  22.       f_table = doc.css('div.floor-info div')[0].content.tr("\t", '').split("\n").select { |line| line =~ /[0-1a-z]+/i }
  23.       #appartment overlay table
  24.       overlay_table = doc.css('div.floor-info table tr')
  25.       #nope zone
  26.       app = {
  27.           project_property: {
  28.               name: f_table[0],
  29.               csl_category: 'Byty',
  30.               price: f_table.select { |line| line =~ /.*Cena:.*/ }[0].split(':')[-1].split(' ')[0..1].join.to_i,
  31.               area: f_table.select { |line| line =~ /.*Interi.r:.*/ }[0].split(' ')[1].to_i,
  32.               property_type: 'apartment',
  33.               rooms: f_table.select { |line| line =~ /[1-4] izb[ay]/ }[0].split(' ')[0].to_i,
  34.               status: dubravy_match_status(f_table[6]),
  35.               identification: "dubrava-#{block}-#{f_table[0]}",
  36.               offer_type_code: 'sale',
  37.               orientation: dubravy_orientation(doc.css('div.floor-info img.compas').attr('class').value.
  38.                   split(' ')[1]&.tr('^0-9', '').to_i)
  39.           },
  40.           apartment: {
  41.               construction_code: 'reinforced_concrete',
  42.               floor: f_table[0].split(' ')[1][1].to_i,
  43.               floor_count: 7,
  44.               state_code: 'approved',
  45.               garden_area: 0
  46.           },
  47.           outside_area: {
  48.               balconies: overlay_table.select { |row| row.css('td')[1].content =~ /balk.n/i unless row.css('td')[1].nil? }.
  49.                   map { |row| row.css('td')[2].content.to_i },
  50.               cellars: overlay_table.select { |row| row.css('td')[1].content =~ /komora/i unless row.css('td')[1].nil? }.
  51.                   map { |row| row.css('td')[2].content.to_i }
  52.           },
  53.           attachments: {
  54.               blueprint: 'http://dubravy.eu' + doc.css('div.floor-info a').
  55.                   select { |link| link.attr('href') =~ /.pdf$/ }[0].attr('href')
  56.           }
  57.       }
  58.       apartments.push app
  59.     end
  60.     apartments
  61.   end
  62.  
  63.   def dubravy_match_status(status)
  64.     case status
  65.       when /rezervovan./
  66.         rv = 'reserved'
  67.       when /predrezervovan./
  68.         rv = 'prereserved'
  69.       when /vo.n./
  70.         rv = 'free'
  71.       when /predan./
  72.         rv = 'sold'
  73.       else
  74.         rv = nil
  75.     end
  76.     rv
  77.   end
  78.  
  79.   def dubravy_orientation(compass_rotation)
  80.     case compass_rotation
  81.       when 0..5
  82.         rv = 'N'
  83.       when 6...85
  84.         rv = 'NE'
  85.       when 85..95
  86.         rv = 'E'
  87.       when 96...175
  88.         rv = 'ES'
  89.       when 175..185
  90.         rv = 'S'
  91.       when 186...265
  92.         rv = 'SW'
  93.       when 266..275
  94.         rv = 'W'
  95.       when 276...355
  96.         rv = 'WN'
  97.       when 356..360
  98.         rv = 'N'
  99.       else
  100.         rv = nil
  101.     end
  102.     rv
  103.   end
  104. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement