Python scrapy爬虫

scrapy API | 开源project-github

1. demo

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import scrapy

class QuotesSpider(scrapy.Spider):
name = 'quotes'
start_urls = [
'http://quotes.toscrape.com/tag/humor/',
]

def parse(self, response):
for quote in response.css('div.quote'):
yield {
'author': quote.xpath('span/small/text()').get(),
'text': quote.css('span.text::text').get(),
}