Skip to content

Commit

Permalink
use status pack send back _on_get_info
Browse files Browse the repository at this point in the history
  • Loading branch information
binux committed May 27, 2015
1 parent 932673f commit bff5611
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 19 deletions.
8 changes: 2 additions & 6 deletions pyspider/libs/base_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,12 +400,8 @@ def _on_cronjob(self, response, task):
function = cronjob.__get__(self, self.__class__)
self._run_func(function, response, task)

@not_send_status
def _on_get_info(self, response, task):
"""Sending runtime infomation about this script."""
result = {}
assert response.save
for each in response.save:
for each in response.save or []:
if each == 'min_tick':
result[each] = self._min_tick
self.crawl('data:,on_get_info', save=result)
self.save[each] = self._min_tick
23 changes: 11 additions & 12 deletions pyspider/scheduler/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ def _update_project(self, project):
self.task_queue[project['name']].rate = project['rate']
self.task_queue[project['name']].burst = project['burst']

# update project runtime info from processing by sending a on_get_info request,
# result is catched by on_request
# update project runtime info from processor by sending a _on_get_info
# request, result is in status_page.track.save
self.on_select_task({
'taskid': '_on_get_info',
'project': project['name'],
'url': 'data:,_on_get_info',
'status': self.taskdb.ACTIVE,
'status': self.taskdb.SUCCESS,
'fetch': {
'save': ['min_tick', ],
},
Expand Down Expand Up @@ -208,7 +208,14 @@ def _check_task_done(self):
try:
while True:
task = self.status_queue.get_nowait()
if not self.task_verify(task):
# check _on_get_info result here
if task.get('taskid') == '_on_get_info' and 'project' in task and 'track' in task:
self.projects[task['project']].update(task['track'].get('save', {}))
logger.info(
'%s on_get_info %r', task['project'], task['track'].get('save', {})
)
continue
elif not self.task_verify(task):
continue
self.on_task_status(task)
cnt += 1
Expand Down Expand Up @@ -236,14 +243,6 @@ def _check_request(self):
if not self.task_verify(task):
continue

# check _on_get_info result here
if task['url'] == 'data:,on_get_info':
self.projects[task['project']].update(task['fetch'].get('save', {}))
logger.info(
'%s on_get_info %r', task['project'], task['fetch'].get('save', {})
)
continue

if task['taskid'] in self.task_queue[task['project']]:
if not task.get('schedule', {}).get('force_update', False):
logger.debug('ignore newtask %(project)s:%(taskid)s %(url)s', task)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def test_20_get_info(self):
fetch_result['save'] = task['fetch']['save']

ret = self.instance.run_task(self.module, task, fetch_result)
self.assertEqual(len(ret.follows), 1, ret.logstr())
self.assertEqual(len(ret.save), 1, ret.logstr())
for each in ret.follows:
self.assertEqual(each['url'], 'data:,on_get_info')
self.assertEqual(each['fetch']['save']['min_tick'], 10)
Expand Down

0 comments on commit bff5611

Please sign in to comment.