Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
pakoo committed Jul 18, 2013
1 parent 64881ff commit 10a92e6
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def save_item(data):
save_item_log(data)
print '============================'

def searchcrawler(url):
def searchcrawler(url,keyword=''):
"""
tb搜索页爬虫
"""
Expand All @@ -211,7 +211,7 @@ def searchcrawler(url):
item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
print item_url
print item_id
judge_site(item_url)
judge_site(item_url,keyword)
items_col = soup.findAll('div',{'class':'col item icon-datalink'})
if items_col:
print '=======================row search col=========================='
Expand All @@ -223,7 +223,7 @@ def searchcrawler(url):
item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
print item_url
print item_id
judge_site(item_url)
judge_site(item_url,keyword)


def itemcrawler(iid,source='tb'):
Expand Down Expand Up @@ -402,7 +402,7 @@ def getTaobaoItemInfo(iid,keyword=''):
iteminfo['location'] = quantity_info['location']
return iteminfo

def judge_site(url):
def judge_site(url,keyword=''):
"""
判断物品是tb还是tm
"""
Expand All @@ -413,13 +413,13 @@ def judge_site(url):
try:
if url_info[1] == 'detail.tmall.com':
print 'it is a tm item'
data = getTmallItemInfo(iid)
data = getTmallItemInfo(iid,keyword)
elif urlkey.get('cm_id'):
print 'it is a tm item'
data = getTmallItemInfo(iid)
data = getTmallItemInfo(iid,keyword)
else:
print 'it is a tb item'
data = getTaobaoItemInfo(iid)
data = getTaobaoItemInfo(iid,keyword)
except Exception ,e:
print traceback.print_exc()
return
Expand Down Expand Up @@ -528,6 +528,11 @@ def runcrawler():
for k in keyword:
searchcrawler(url%k,keyword=k)

def cleandata():
db.item.drop()
db.itemlog.drop()
db.shop.drop()

if __name__ == "__main__":
pass
#print '*******************************************'
Expand Down

0 comments on commit 10a92e6

Please sign in to comment.