Skip to content

Commit 472a8a4

Browse files
committed
Change name, log once
1 parent ec1c615 commit 472a8a4

File tree

4 files changed

+20
-4
lines changed

4 files changed

+20
-4
lines changed

docs/topics/jobs.rst

+2
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,6 @@ But this will::
9696
somearg = response.meta['somearg']
9797
print "the argument passed is:", somearg
9898

99+
If you wish to log the requests that couldn't be serialized, you can set the ``SCHEDULER_DEBUG`` setting to ``True`` in the project's settings page. It is ``False`` by default.
100+
99101
.. _pickle: http://docs.python.org/library/pickle.html

docs/topics/settings.rst

+10
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,16 @@ Default: ``'scrapy.core.scheduler.Scheduler'``
10261026

10271027
The scheduler to use for crawling.
10281028

1029+
.. setting:: SCHEDULER_DEBUG
1030+
1031+
SCHEDULER_DEBUG
1032+
---------------
1033+
1034+
Default: ``False``
1035+
1036+
Setting to ``True`` will log the first unserializable request encountered.
1037+
Stats collected.
1038+
10291039
.. setting:: SPIDER_CONTRACTS
10301040

10311041
SPIDER_CONTRACTS

scrapy/core/scheduler.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def from_crawler(cls, crawler):
3030
pqclass = load_object(settings['SCHEDULER_PRIORITY_QUEUE'])
3131
dqclass = load_object(settings['SCHEDULER_DISK_QUEUE'])
3232
mqclass = load_object(settings['SCHEDULER_MEMORY_QUEUE'])
33-
logunser = settings.getbool('LOG_UNSERIALIZABLE_REQUESTS')
33+
logunser = settings.getbool('SCHEDULER_DEBUG')
3434
return cls(dupefilter, jobdir=job_dir(settings), logunser=logunser,
3535
stats=crawler.stats, pqclass=pqclass, dqclass=dqclass, mqclass=mqclass)
3636

@@ -86,9 +86,13 @@ def _dqpush(self, request):
8686
self.dqs.push(reqd, -request.priority)
8787
except ValueError as e: # non serializable request
8888
if self.logunser:
89-
logger.error("Unable to serialize request: %(request)s - reason: %(reason)s",
90-
{'request': request, 'reason': e},
89+
msg = ("Unable to serialize request: %(request)s - reason: %(reason)s"
90+
" - no more unserializable requests will be logged"
91+
" (stats being collected)")
92+
logger.error(msg, {'request': request, 'reason': e},
9193
exc_info=True, extra={'spider': self.spider})
94+
self.logunser = False
95+
self.stats.inc_value('scheduler/unserializable', spider=self.spider)
9296
return
9397
else:
9498
return True

scrapy/settings/default_settings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@
191191
LOG_LEVEL = 'DEBUG'
192192
LOG_FILE = None
193193

194-
LOG_UNSERIALIZABLE_REQUESTS = False
194+
SCHEDULER_DEBUG = False
195195

196196
LOGSTATS_INTERVAL = 60.0
197197

0 commit comments

Comments
 (0)