Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'rubygems'
- require 'nokogiri'
- require 'open-uri'
- class ParsingRobots
- #returns array of hashes {:apartment =>, :project_property =>, :outside_area =>, :attachments =>}
- def parse_dubravy_apartments(url, block)
- doc = Nokogiri::HTML open(url)
- apartments = []
- urls=[]
- #download links to estates for block given in argument
- doc.css('table.sortable')[4].css('tr').each do |tr|
- id = tr.css('td')[1]
- urls.push "http://dubravy.eu/jednoizbovy-byt/#{id.content}" unless id.nil? or id.content[0]!=block
- end
- #parsing concrete estates
- urls.each do |url|
- doc = Nokogiri::HTML open(url)
- #f name is fully deserved
- f_table = doc.css('div.floor-info div')[0].content.tr("\t", '').split("\n").select { |line| line =~ /[0-1a-z]+/i }
- #appartment overlay table
- overlay_table = doc.css('div.floor-info table tr')
- #nope zone
- app = {
- project_property: {
- name: f_table[0],
- csl_category: 'Byty',
- price: f_table.select { |line| line =~ /.*Cena:.*/ }[0].split(':')[-1].split(' ')[0..1].join.to_i,
- area: f_table.select { |line| line =~ /.*Interi.r:.*/ }[0].split(' ')[1].to_i,
- property_type: 'apartment',
- rooms: f_table.select { |line| line =~ /[1-4] izb[ay]/ }[0].split(' ')[0].to_i,
- status: dubravy_match_status(f_table[6]),
- identification: "dubrava-#{block}-#{f_table[0]}",
- offer_type_code: 'sale',
- orientation: dubravy_orientation(doc.css('div.floor-info img.compas').attr('class').value.
- split(' ')[1]&.tr('^0-9', '').to_i)
- },
- apartment: {
- construction_code: 'reinforced_concrete',
- floor: f_table[0].split(' ')[1][1].to_i,
- floor_count: 7,
- state_code: 'approved',
- garden_area: 0
- },
- outside_area: {
- balconies: overlay_table.select { |row| row.css('td')[1].content =~ /balk.n/i unless row.css('td')[1].nil? }.
- map { |row| row.css('td')[2].content.to_i },
- cellars: overlay_table.select { |row| row.css('td')[1].content =~ /komora/i unless row.css('td')[1].nil? }.
- map { |row| row.css('td')[2].content.to_i }
- },
- attachments: {
- blueprint: 'http://dubravy.eu' + doc.css('div.floor-info a').
- select { |link| link.attr('href') =~ /.pdf$/ }[0].attr('href')
- }
- }
- apartments.push app
- end
- apartments
- end
- def dubravy_match_status(status)
- case status
- when /rezervovan./
- rv = 'reserved'
- when /predrezervovan./
- rv = 'prereserved'
- when /vo.n./
- rv = 'free'
- when /predan./
- rv = 'sold'
- else
- rv = nil
- end
- rv
- end
- def dubravy_orientation(compass_rotation)
- case compass_rotation
- when 0..5
- rv = 'N'
- when 6...85
- rv = 'NE'
- when 85..95
- rv = 'E'
- when 96...175
- rv = 'ES'
- when 175..185
- rv = 'S'
- when 186...265
- rv = 'SW'
- when 266..275
- rv = 'W'
- when 276...355
- rv = 'WN'
- when 356..360
- rv = 'N'
- else
- rv = nil
- end
- rv
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement