Skip to content

Commit

Permalink
it之家热评 优化逻辑,添加注释
Browse files Browse the repository at this point in the history
  • Loading branch information
Ehco1996 committed Aug 25, 2017
1 parent 4ff8f0a commit 7f888d4
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 9 deletions.
6 changes: 4 additions & 2 deletions ithome/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
'''
mongodb的配置文件
'''

# 数据库url
MONGO_URL = 'localhost'
# 数据库名
MONGO_DB = 'ithome'
MONGO_TABLE = 'hotcomment_network'
# 数据库表
MONGO_TABLE = 'hotcomment_it'
1 change: 1 addition & 0 deletions ithome/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
client = MongoClient(MONGO_URL, connect=True)
db = client[MONGO_DB]

# 将记录写入数据库
def save_to_mongo(result):
if db[MONGO_TABLE].insert(result):
print('存储成功', result)
Expand Down
13 changes: 6 additions & 7 deletions ithome/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,8 @@ def parse_news_id(categoryid, page_start):


import time
# 写了一个检测函数运行时间的装饰器


# 写了一个检测函数运行时间的装饰器
def clock(func):
def clocked(*args):
t0 = time.perf_counter()
Expand All @@ -109,10 +108,10 @@ def clocked(*args):

#@clock
def main(page_start):
# 苹果分类的id
APPLE_ID = '32'
# 新闻分类的id
ID = '31'
# 建立苹果新闻分类对象
apple = parse_news_id(APPLE_ID, page_start)
apple = parse_news_id(ID, page_start)

# 利用迭代器抓取热评
for newsid in apple:
Expand All @@ -131,9 +130,9 @@ def main(page_start):

# 开启多进程模式
from multiprocessing import Pool
pool = Pool()
pool = Pool()
# 进程池,每个进程抓取10页新闻的热评
groups = ([x for x in range(1, 31,10)])
groups = ([x for x in range(111, 191,10)])
pool.map(main, groups)
pool.close()
pool.join()
Expand Down

0 comments on commit 7f888d4

Please sign in to comment.