Skip to content

Commit

Permalink
Merge pull request rmax#15 from nside/master
Browse files Browse the repository at this point in the history
avoid waiting for the spider to be idle before popping the redis queue
  • Loading branch information
rmax committed Dec 2, 2013
2 parents 48e22c2 + fef6b3c commit a295b18
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions scrapy_redis/spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def setup_redis(self):
# idle signal is called when the spider has no requests left,
# that's when we will schedule new requests from redis queue
self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
self.log("Reading URLs from redis list '%s'" % self.redis_key)

def next_request(self):
Expand All @@ -29,13 +30,21 @@ def next_request(self):
if url:
return self.make_requests_from_url(url)

def spider_idle(self):
"""Schedules a request if available, otherwise waits."""
def schedule_next_request(self):
"""Schedules a request if available"""
req = self.next_request()
if req:
self.crawler.engine.crawl(req, spider=self)

def spider_idle(self):
"""Schedules a request if available, otherwise waits."""
self.schedule_next_request()
raise DontCloseSpider

def item_scraped(self, *args, **kwargs):
"""Avoids waiting for the spider to idle before scheduling the next request"""
self.schedule_next_request()


class RedisSpider(RedisMixin, BaseSpider):
"""Spider that reads urls from redis queue when idle."""
Expand Down

0 comments on commit a295b18

Please sign in to comment.