@@ -30,7 +30,7 @@ def from_crawler(cls, crawler):
30
30
pqclass = load_object (settings ['SCHEDULER_PRIORITY_QUEUE' ])
31
31
dqclass = load_object (settings ['SCHEDULER_DISK_QUEUE' ])
32
32
mqclass = load_object (settings ['SCHEDULER_MEMORY_QUEUE' ])
33
- logunser = settings .getbool ('LOG_UNSERIALIZABLE_REQUESTS' )
33
+ logunser = settings .getbool ('LOG_UNSERIALIZABLE_REQUESTS' , settings . getbool ( 'SCHEDULER_DEBUG' ) )
34
34
return cls (dupefilter , jobdir = job_dir (settings ), logunser = logunser ,
35
35
stats = crawler .stats , pqclass = pqclass , dqclass = dqclass , mqclass = mqclass )
36
36
@@ -84,11 +84,16 @@ def _dqpush(self, request):
84
84
try :
85
85
reqd = request_to_dict (request , self .spider )
86
86
self .dqs .push (reqd , - request .priority )
87
- except ValueError as e : # non serializable request
87
+ except ValueError as e : # non serializable request
88
88
if self .logunser :
89
- logger .error ("Unable to serialize request: %(request)s - reason: %(reason)s" ,
90
- {'request' : request , 'reason' : e },
89
+ msg = ("Unable to serialize request: %(request)s - reason:"
90
+ " %(reason)s - no more unserializable requests will be"
91
+ " logged (stats being collected)" )
92
+ logger .error (msg , {'request' : request , 'reason' : e },
91
93
exc_info = True , extra = {'spider' : self .spider })
94
+ self .logunser = False
95
+ self .stats .inc_value ('scheduler/unserializable' ,
96
+ spider = self .spider )
92
97
return
93
98
else :
94
99
return True
0 commit comments