class QuotesSpider(scrapy.Spider): name = 'quotes' allowed_domains = ['toscrap.com/page/'] start_urls = ['http://quotes.toscrape.com/page/1/']
def parse(self, response): self.log("I just visited " +response.url) for quotes in response.css('div.quote'): item = {'authorname':
quotes.css('small.author::text').extract_first(), 'text':
quotes.css('span.text::text').extract_first(), 'tags': quotes.css('a.tag::text').extract() }
yield item # #now paging forward link starts next_page_url = response.css('li.next>a::attr(href)').extract_first() if next_page_url: next_page_url = response.urljoin(next_page_url) yield scrapy.Request(url=next_page_url,callback=self.parse)
Now above code shows no error. BUt the problem is that the callback in yield function is not repeating. I have checked that next_page_url is working fine and giving correct url for the next page. But after scraping a single page it stops scrapping.
0 Votes
0 Comments
Login to post a comment