Skip to content

Commit

Permalink
fix issue crawlab-team#12
Browse files Browse the repository at this point in the history
  • Loading branch information
Marvin Zhang committed Apr 23, 2019
1 parent 4dff16e commit db1cd7e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 0 deletions.
4 changes: 4 additions & 0 deletions crawlab/db/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,5 +175,9 @@ def aggregate(self, col_name: str, pipelines, **kwargs):
col = self.db[col_name]
return col.aggregate(pipelines, **kwargs)

def create_index(self, col_name: str, keys: dict, **kwargs):
col = self.db[col_name]
col.create_index(keys=keys, **kwargs)


db_manager = DbManager()
21 changes: 21 additions & 0 deletions crawlab/tasks/spider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from datetime import datetime
from time import sleep

from bson import ObjectId
from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER, PYTHON_ENV_PATH
Expand All @@ -10,6 +11,17 @@
from utils.log import other as logger


def get_task(id: str):
i = 0
while i < 5:
task = db_manager.get('tasks', id=id)
if task is not None:
return task
i += 1
sleep(1)
return None


@celery_app.task(bind=True)
def execute_spider(self, id: str, params: str = None):
"""
Expand All @@ -26,6 +38,12 @@ def execute_spider(self, id: str, params: str = None):
if params is not None:
command += ' ' + params

# get task object and return if not found
task = get_task(task_id)
if task is None:
return

# current working directory
current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id')))

# log info
Expand Down Expand Up @@ -69,6 +87,9 @@ def execute_spider(self, id: str, params: str = None):
if spider.get('col'):
env['CRAWLAB_COLLECTION'] = spider.get('col')

# create index to speed results data retrieval
db_manager.create_index(spider.get('col'), {'task_id': 1})

# start process
cmd_arr = command.split(' ')
cmd_arr = list(filter(lambda x: x != '', cmd_arr))
Expand Down

0 comments on commit db1cd7e

Please sign in to comment.