forked from ShilongLee/Crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b1592e3
commit 9e68a1e
Showing
14 changed files
with
320 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,6 +67,8 @@ | |
- 添加淘宝账号 | ||
- 过期淘宝账号 | ||
- 获取淘宝账号列表 | ||
- 获取淘宝商品详情 | ||
- 获取淘宝商品评论 | ||
- 淘宝搜索结果获取 | ||
|
||
淘宝:[API 文档](api/taobao/taobao.md) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,6 @@ | |
import urllib.parse | ||
import re | ||
import random | ||
import json | ||
|
||
HOST = 'https://www.douyin.com' | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
from .search import request_search | ||
from .detail import request_detail | ||
from .comments import request_comments |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
from .common import COMMON_HEADERS, APPKEY, HOST, get_token, sign | ||
from lib.logger import logger | ||
from lib import requests | ||
from urllib.parse import quote | ||
import time | ||
import asyncio | ||
|
||
async def request_comments(id: str, cookie: str, offset: int = 0 , limit: int = 20) -> dict: | ||
""" | ||
请求taobao获取商品评论 | ||
""" | ||
page_size = 20 | ||
start_page = int( offset / page_size ) + 1 | ||
end_page = int((offset + limit - 1) / page_size) + 1 | ||
tasks = [request_page(id, cookie, page) for page in range(start_page, end_page + 1)] | ||
results = await asyncio.gather(*tasks) | ||
comments = [] | ||
total = 0 | ||
for comment_list, _total in results: | ||
comments.extend(comment_list) | ||
total = _total if _total > 0 else total | ||
|
||
return {'comments': comments[(offset % page_size):(offset % page_size + limit)], 'total': total} | ||
|
||
async def request_page(id: str, cookie: str, page: int) -> tuple[list, int]: | ||
str_data = f'{{"itemId":"{id}","bizCode":"ali.china.tmall","channel":"pc_detail","pageSize":20,"pageNum":{page}}}' | ||
quote_data = quote(str_data, 'utf-8') | ||
token = get_token(cookie) | ||
timestamp = str(time.time()).replace('.', '')[0:13] | ||
sgn = sign(token, timestamp, APPKEY, str_data) | ||
param = { | ||
'jsv': '2.6.1', | ||
'appKey': APPKEY, | ||
't': timestamp, | ||
'sign': sgn, | ||
'api': 'mtop.alibaba.review.list.for.new.pc.detail', | ||
'v': '1.0', | ||
'isSec': '0', | ||
'ecode': '0', | ||
'timeout': '20000', | ||
'ttid': '2022@taobao_litepc_9.17.0', | ||
'AntiFlood': 'true', | ||
'AntiCreep': 'true', | ||
'dataType': 'json', | ||
'valueType': 'string', | ||
'preventFallback': 'true', | ||
'type': 'json', | ||
'data': quote_data | ||
} | ||
headers = {'cookie': cookie} | ||
headers.update(COMMON_HEADERS) | ||
url = f'{HOST}/h5/mtop.alibaba.review.list.for.new.pc.detail/1.0/' | ||
logger.info(f'请求商品评论, url: {url}, params: {param}') | ||
resp = await requests.get(url, headers=headers, params=param) | ||
logger.info(f'请求商品评论, code: {resp.status_code}, body: {resp.json()}, url: {url}, params: {param}') | ||
if resp.status_code != 200: | ||
logger.error(f'请求商品详情失败, status_code: {resp.status_code}') | ||
return [], 0 | ||
data = resp.json().get('data', {}).get('module', {}) | ||
return data.get('reviewVOList', []), int(data.get('foldFlagCount', 0)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from .common import COMMON_HEADERS, APPKEY, HOST, get_token, sign | ||
from lib.logger import logger | ||
from lib import requests | ||
from urllib.parse import quote | ||
import time | ||
|
||
async def request_detail(id: str, cookie: str) -> dict: | ||
""" | ||
请求taobao获取商品详情 | ||
""" | ||
str_data = f'{{"id":"{id}","detail_v":"3.3.2","exParams":"{{\\"id\\":\\"{id}\\",\\"item_type\\":\\"ad\\",\\"ali_refid\\":\\"a3_431358_1007:15572659:H:15572659_0_11770813889:c0a3730368049201836ac06218cb19dd\\",\\"ali_trackid\\":\\"296_c0a3730368049201836ac06218cb19dd\\",\\"spm\\":\\"a21bo.jianhua/a.201876.d5\\",\\"scm\\":\\"1007.40986.397407.0\\",\\"queryParams\\":\\"ali_refid=a3_431358_1007%3A15572659%3AH%3A15572659_0_11770813889%3Ac0a3730368049201836ac06218cb19dd&ali_trackid=296_c0a3730368049201836ac06218cb19dd&id={id}&item_type=ad&scm=1007.40986.397407.0&spm=a21bo.jianhua%2Fa.201876.d5\\",\\"domain\\":\\"https://item.taobao.com\\",\\"path_name\\":\\"/item.htm\\"}}"}}' | ||
quote_data = quote(str_data, 'utf-8') | ||
token = get_token(cookie) | ||
timestamp = str(time.time()).replace('.', '')[0:13] | ||
sgn = sign(token, timestamp, APPKEY, str_data) | ||
param = { | ||
'jsv': '2.6.1', | ||
'appKey': APPKEY, | ||
't': timestamp, | ||
'sign': sgn, | ||
'api': 'mtop.taobao.pcdetail.data.get', | ||
'v': '1.0', | ||
'isSec': '0', | ||
'ecode': '0', | ||
'timeout': '10000', | ||
'ttid': '2022@taobao_litepc_9.17.0', | ||
'AntiFlood': 'true', | ||
'AntiCreep': 'true', | ||
'dataType': 'json', | ||
'valueType': 'string', | ||
'preventFallback': 'true', | ||
'type': 'json', | ||
'data': quote_data | ||
} | ||
headers = {'cookie': cookie} | ||
headers.update(COMMON_HEADERS) | ||
url = f'{HOST}/h5/mtop.taobao.pcdetail.data.get/1.0/' | ||
logger.info(f'请求商品详情, url: {url}, params: {param}') | ||
resp = await requests.get(url, headers=headers, params=param) | ||
logger.info(f'请求商品详情, code: {resp.status_code}, body: {resp.json()}, url: {url}, params: {param}') | ||
if resp.status_code != 200: | ||
logger.error(f'请求商品详情失败, status_code: {resp.status_code}') | ||
return {} | ||
return resp.json().get('data') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from utils.error_code import ErrorCode | ||
from utils.reply import reply | ||
from ..models import accounts | ||
from lib.logger import logger | ||
from ..logic import request_comments | ||
import random | ||
|
||
async def comments(id: str, offset: int = 0, limit: int = 20): | ||
""" | ||
获取商品评论 | ||
""" | ||
_accounts = await accounts.load() | ||
random.shuffle(_accounts) | ||
for account in _accounts: | ||
if account.get('expired', 0) == 1: | ||
continue | ||
account_id = account.get('id', '') | ||
res = await request_comments(id, account.get('cookie', ''), offset, limit) | ||
if res == {} : | ||
logger.error(f'get comments failed, account: {account_id}, id: {id}, offset: {offset}, limit: {limit}, res: {res}') | ||
continue | ||
logger.info(f'get comments success, account: {account_id}, id: {id}, offset: {offset}, limit: {limit}, res: {res}') | ||
return reply(ErrorCode.OK, '成功' , res) | ||
logger.warning(f'get comments failed. id: {id}, offse: {offset}, limit: {limit}') | ||
return reply(ErrorCode.NO_ACCOUNT, '请先添加账号') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from utils.error_code import ErrorCode | ||
from utils.reply import reply | ||
from ..models import accounts | ||
from lib.logger import logger | ||
from ..logic import request_detail | ||
import random | ||
|
||
# route | ||
async def detail(id: str): | ||
""" | ||
获取商品详情 | ||
""" | ||
_accounts = await accounts.load() | ||
random.shuffle(_accounts) | ||
for account in _accounts: | ||
if account.get('expired', 0) == 1: | ||
continue | ||
account_id = account.get('id', '') | ||
res = await request_detail(id, account.get('cookie', '')) | ||
if res == {}: | ||
logger.error(f'get item detail failed, account: {account_id}, id: {id}, res: {res}') | ||
continue | ||
logger.info(f'get item detail success, account: {account_id}, id: {id}, res: {res}') | ||
return reply(ErrorCode.OK, '成功' , res) | ||
logger.warning(f'get item detail failed. id: {id}') | ||
return reply(ErrorCode.NO_ACCOUNT, '请先添加账号') |
Oops, something went wrong.