Advertisement
furas

Python - Scrapy in standalone script with saving in CSV

Dec 2nd, 2017
278
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.95 KB | None | 0 0
  1. import scrapy
  2.  
  3. class QuotesSpider(scrapy.Spider):
  4.  
  5.     name = "quotes"
  6.     start_urls = [
  7.         'http://quotes.toscrape.com/page/1/',
  8.     ]
  9.  
  10.     def parse(self, response):
  11.         for quote in response.css('div.quote'):
  12.             yield {
  13.                 'text': quote.css('span.text::text').extract_first(),
  14.                 'author': quote.css('small.author::text').extract_first(),
  15.                 "tags": quote.css('div.tags a.tag::text').extract(),
  16.             }
  17.  
  18.         next_page = response.css('li.next a::attr(href)').extract_first()
  19.         if next_page is not None:
  20.             next_page = response.urljoin(next_page)
  21.             yield scrapy.Request(next_page, callback=self.parse)
  22.  
  23. # --- run it ---
  24.  
  25. import scrapy.crawler
  26.  
  27. c = scrapy.crawler.CrawlerProcess({
  28.     'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
  29.     'FEED_FORMAT': 'csv',
  30.     'FEED_URI': 'data.csv'
  31. })
  32. c.crawl(QuotesSpider)
  33. c.start()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement