Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- class AuthorSpider(scrapy.Spider):
- name = "quotes"
- start_urls = [
- 'http://quotes.toscrape.com/page/1/',
- ]
- page=0;
- def parse(self, response):
- quotes = response.css('div.quote')
- AuthorSpider.page +=1
- for quote in quotes:
- content = quote.css('span.text::text').get()
- author = quote.css('small.author::text').get()
- tags = quote.css('a.tag::text').getall()
- yield {
- 'content' : content,
- 'author' : author,
- 'tags' : tags,
- 'page' : AuthorSpider.page
- }
- next_page = response.css('li.next a::attr(href)').get()
- if next_page is not None:
- next_page = response.urljoin(next_page)
- yield scrapy.Request(next_page, callback=self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement