Skip to content

Commit

Permalink
ADD:增加淘宝接口
Browse files Browse the repository at this point in the history
  • Loading branch information
ShilongLee committed Jul 22, 2024
1 parent b1592e3 commit 9e68a1e
Show file tree
Hide file tree
Showing 14 changed files with 320 additions and 27 deletions.
49 changes: 48 additions & 1 deletion docs/api/taobao/taobao.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,54 @@
| ut | true | int | 更新时间戳 |
| expired | true | int | 0: 有效 1: 过期 (请求失败时自动设为过期) |

### 获取商品详情

- **URL**

`/taobao/detail`

- **Method**

`GET`

- **URL Params**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| id | true | string | 商品id |

- **Success Response**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
| data | true | struct | 数据 |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |

### 获取商品评论

- **URL**

`/taobao/comments`

- **Method**

`GET`

- **URL Params**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| id | true | string | 商品id |

- **Success Response**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
| data | true | struct | 数据 |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |

### 关键词搜索商品

- **URL**
Expand All @@ -118,4 +166,3 @@
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
| data | true | struct | 数据 |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |

2 changes: 2 additions & 0 deletions docs/doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
- 添加淘宝账号
- 过期淘宝账号
- 获取淘宝账号列表
- 获取淘宝商品详情
- 获取淘宝商品评论
- 淘宝搜索结果获取

淘宝:[API 文档](api/taobao/taobao.md)
Expand Down
8 changes: 6 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
| 哔哩哔哩 |||||||
| 小红书 |||||||
| 微博 |||||||
| 淘宝 || | | || |
| 淘宝 || | | || |
| 京东 |||||||

- 快手、抖音、哔哩哔哩、小红书、淘宝、京东、微博平台的爬虫接口
Expand Down Expand Up @@ -61,6 +61,10 @@

日志默认存放在`.log/`目录下,crawler.log为爬虫日志。

5. 本地测试

`test/cookie.py`中添加自己的cookie,然后运行`make test module=douyin`进行单元测试,不加module参数则运行所有测试用例。

### 使用docker

1. 一键启动
Expand Down Expand Up @@ -98,7 +102,7 @@ API文档:[API 文档](docs/doc.md)
- [x] 微博
- [x] 抖音全面更新a_bogus
- [x] 更新异步框架,多请求并行提高响应速度
- [ ] 淘宝详情、评论、用户信息功能
- [x] 淘宝详情、评论、用户信息功能
- [ ] 京东详情、评论、用户信息功能

## star 趋势图
Expand Down
1 change: 0 additions & 1 deletion service/douyin/logic/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import urllib.parse
import re
import random
import json

HOST = 'https://www.douyin.com'

Expand Down
2 changes: 2 additions & 0 deletions service/taobao/logic/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from .search import request_search
from .detail import request_detail
from .comments import request_comments
60 changes: 60 additions & 0 deletions service/taobao/logic/comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from .common import COMMON_HEADERS, APPKEY, HOST, get_token, sign
from lib.logger import logger
from lib import requests
from urllib.parse import quote
import time
import asyncio

async def request_comments(id: str, cookie: str, offset: int = 0 , limit: int = 20) -> dict:
"""
请求taobao获取商品评论
"""
page_size = 20
start_page = int( offset / page_size ) + 1
end_page = int((offset + limit - 1) / page_size) + 1
tasks = [request_page(id, cookie, page) for page in range(start_page, end_page + 1)]
results = await asyncio.gather(*tasks)
comments = []
total = 0
for comment_list, _total in results:
comments.extend(comment_list)
total = _total if _total > 0 else total

return {'comments': comments[(offset % page_size):(offset % page_size + limit)], 'total': total}

async def request_page(id: str, cookie: str, page: int) -> tuple[list, int]:
str_data = f'{{"itemId":"{id}","bizCode":"ali.china.tmall","channel":"pc_detail","pageSize":20,"pageNum":{page}}}'
quote_data = quote(str_data, 'utf-8')
token = get_token(cookie)
timestamp = str(time.time()).replace('.', '')[0:13]
sgn = sign(token, timestamp, APPKEY, str_data)
param = {
'jsv': '2.6.1',
'appKey': APPKEY,
't': timestamp,
'sign': sgn,
'api': 'mtop.alibaba.review.list.for.new.pc.detail',
'v': '1.0',
'isSec': '0',
'ecode': '0',
'timeout': '20000',
'ttid': '2022@taobao_litepc_9.17.0',
'AntiFlood': 'true',
'AntiCreep': 'true',
'dataType': 'json',
'valueType': 'string',
'preventFallback': 'true',
'type': 'json',
'data': quote_data
}
headers = {'cookie': cookie}
headers.update(COMMON_HEADERS)
url = f'{HOST}/h5/mtop.alibaba.review.list.for.new.pc.detail/1.0/'
logger.info(f'请求商品评论, url: {url}, params: {param}')
resp = await requests.get(url, headers=headers, params=param)
logger.info(f'请求商品评论, code: {resp.status_code}, body: {resp.json()}, url: {url}, params: {param}')
if resp.status_code != 200:
logger.error(f'请求商品详情失败, status_code: {resp.status_code}')
return [], 0
data = resp.json().get('data', {}).get('module', {})
return data.get('reviewVOList', []), int(data.get('foldFlagCount', 0))
23 changes: 3 additions & 20 deletions service/taobao/logic/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from urllib.parse import quote
from lib.logger import logger

HOST = 'https://h5api.m.taobao.com'

COMMON_HEADERS = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
Expand All @@ -16,34 +18,15 @@
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
}

SEARCH_API = 'mtop.relationrecommend.wirelessrecommend.recommend'
SEARCH_URL = f'https://h5api.m.taobao.com/h5/{SEARCH_API}/2.0/'
APPKEY = '12574478'
SEARCH_JSV = '2.7.2'
SEARCH_V = '2.0'

def pack_search_query(cookie, keyword, page):
quote_keyword = quote(keyword, 'utf-8')
str_data = f'{{"appId":"34385","params":"{{\\"device\\":\\"HMA-AL00\\",\\"isBeta\\":\\"false\\",\\"grayHair\\":\\"false\\",\\"from\\":\\"nt_history\\",\\"brand\\":\\"HUAWEI\\",\\"info\\":\\"wifi\\",\\"index\\":\\"4\\",\\"rainbow\\":\\"\\",\\"schemaType\\":\\"auction\\",\\"elderHome\\":\\"false\\",\\"isEnterSrpSearch\\":\\"true\\",\\"newSearch\\":\\"false\\",\\"network\\":\\"wifi\\",\\"subtype\\":\\"\\",\\"hasPreposeFilter\\":\\"false\\",\\"prepositionVersion\\":\\"v2\\",\\"client_os\\":\\"Android\\",\\"gpsEnabled\\":\\"false\\",\\"searchDoorFrom\\":\\"srp\\",\\"debug_rerankNewOpenCard\\":\\"false\\",\\"homePageVersion\\":\\"v7\\",\\"searchElderHomeOpen\\":\\"false\\",\\"search_action\\":\\"initiative\\",\\"sugg\\":\\"_4_1\\",\\"sversion\\":\\"13.6\\",\\"style\\":\\"list\\",\\"ttid\\":\\"600000@taobao_pc_10.7.0\\",\\"needTabs\\":\\"true\\",\\"areaCode\\":\\"CN\\",\\"vm\\":\\"nw\\",\\"countryNum\\":\\"156\\",\\"m\\":\\"pc\\",\\"page\\":{page},\\"n\\":48,\\"q\\":\\"{quote_keyword}\\",\\"tab\\":\\"all\\",\\"pageSize\\":48,\\"sourceS\\":\\"0\\",\\"sort\\":\\"_coefp\\",\\"bcoffset\\":\\"\\",\\"ntoffset\\":\\"\\",\\"filterTag\\":\\"\\",\\"service\\":\\"\\",\\"prop\\":\\"\\",\\"loc\\":\\"\\",\\"start_price\\":null,\\"end_price\\":null,\\"startPrice\\":null,\\"endPrice\\":null,\\"itemIds\\":null,\\"p4pIds\\":null,\\"categoryp\\":\\"\\"}}"}}'
quote_data = quote(str_data, 'utf-8')
timestamp = str(time.time()).replace('.', '')[0:13]
token = get_token(cookie)
sgn = sign(token, timestamp, APPKEY, str_data)
search_query = f'?jsv={SEARCH_JSV}&appKey={APPKEY}&t={timestamp}&sign={sgn}&api={SEARCH_API}&v={SEARCH_V}&type=jsonp&dataType=jsonp&callback=mtopjsonp2&data={quote_data}'
logger.info(f'keyword: {keyword}, page: {page}, search_query: {search_query}')
return search_query

def sign(token, tme, appKey, data):
st = token+"&"+tme+"&"+appKey+"&"+data
m = hashlib.md5(st.encode(encoding='utf-8')).hexdigest()
return(m)

def convert_cookies_to_dict(cookies):
cookies = dict([l.split("=", 1) for l in cookies.split("; ")])
return cookies

def get_token(cookie):
get_cookies = convert_cookies_to_dict(cookie)
get_cookies = dict([l.split("=", 1) for l in cookie.split("; ")])
_m_h5_tk = get_cookies['_m_h5_tk']
token = _m_h5_tk.split('_')[0]
return token
44 changes: 44 additions & 0 deletions service/taobao/logic/detail.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from .common import COMMON_HEADERS, APPKEY, HOST, get_token, sign
from lib.logger import logger
from lib import requests
from urllib.parse import quote
import time

async def request_detail(id: str, cookie: str) -> dict:
"""
请求taobao获取商品详情
"""
str_data = f'{{"id":"{id}","detail_v":"3.3.2","exParams":"{{\\"id\\":\\"{id}\\",\\"item_type\\":\\"ad\\",\\"ali_refid\\":\\"a3_431358_1007:15572659:H:15572659_0_11770813889:c0a3730368049201836ac06218cb19dd\\",\\"ali_trackid\\":\\"296_c0a3730368049201836ac06218cb19dd\\",\\"spm\\":\\"a21bo.jianhua/a.201876.d5\\",\\"scm\\":\\"1007.40986.397407.0\\",\\"queryParams\\":\\"ali_refid=a3_431358_1007%3A15572659%3AH%3A15572659_0_11770813889%3Ac0a3730368049201836ac06218cb19dd&ali_trackid=296_c0a3730368049201836ac06218cb19dd&id={id}&item_type=ad&scm=1007.40986.397407.0&spm=a21bo.jianhua%2Fa.201876.d5\\",\\"domain\\":\\"https://item.taobao.com\\",\\"path_name\\":\\"/item.htm\\"}}"}}'
quote_data = quote(str_data, 'utf-8')
token = get_token(cookie)
timestamp = str(time.time()).replace('.', '')[0:13]
sgn = sign(token, timestamp, APPKEY, str_data)
param = {
'jsv': '2.6.1',
'appKey': APPKEY,
't': timestamp,
'sign': sgn,
'api': 'mtop.taobao.pcdetail.data.get',
'v': '1.0',
'isSec': '0',
'ecode': '0',
'timeout': '10000',
'ttid': '2022@taobao_litepc_9.17.0',
'AntiFlood': 'true',
'AntiCreep': 'true',
'dataType': 'json',
'valueType': 'string',
'preventFallback': 'true',
'type': 'json',
'data': quote_data
}
headers = {'cookie': cookie}
headers.update(COMMON_HEADERS)
url = f'{HOST}/h5/mtop.taobao.pcdetail.data.get/1.0/'
logger.info(f'请求商品详情, url: {url}, params: {param}')
resp = await requests.get(url, headers=headers, params=param)
logger.info(f'请求商品详情, code: {resp.status_code}, body: {resp.json()}, url: {url}, params: {param}')
if resp.status_code != 200:
logger.error(f'请求商品详情失败, status_code: {resp.status_code}')
return {}
return resp.json().get('data')
17 changes: 15 additions & 2 deletions service/taobao/logic/search.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from .common import pack_search_query, SEARCH_URL, COMMON_HEADERS
from .common import COMMON_HEADERS, APPKEY, HOST, sign, get_token
from lib.logger import logger
from lib import requests
from urllib.parse import quote
import json
import time
import re
import asyncio

Expand All @@ -24,11 +26,22 @@ async def request_search(keyword: str, cookie: str, offset: int = 0, limit: int
ret = {'total': total, 'results': results[(offset % page_size):(offset % page_size + limit)]}
return ret

def pack_search_query(cookie, keyword, page):
quote_keyword = quote(keyword, 'utf-8')
str_data = f'{{"appId":"34385","params":"{{\\"device\\":\\"HMA-AL00\\",\\"isBeta\\":\\"false\\",\\"grayHair\\":\\"false\\",\\"from\\":\\"nt_history\\",\\"brand\\":\\"HUAWEI\\",\\"info\\":\\"wifi\\",\\"index\\":\\"4\\",\\"rainbow\\":\\"\\",\\"schemaType\\":\\"auction\\",\\"elderHome\\":\\"false\\",\\"isEnterSrpSearch\\":\\"true\\",\\"newSearch\\":\\"false\\",\\"network\\":\\"wifi\\",\\"subtype\\":\\"\\",\\"hasPreposeFilter\\":\\"false\\",\\"prepositionVersion\\":\\"v2\\",\\"client_os\\":\\"Android\\",\\"gpsEnabled\\":\\"false\\",\\"searchDoorFrom\\":\\"srp\\",\\"debug_rerankNewOpenCard\\":\\"false\\",\\"homePageVersion\\":\\"v7\\",\\"searchElderHomeOpen\\":\\"false\\",\\"search_action\\":\\"initiative\\",\\"sugg\\":\\"_4_1\\",\\"sversion\\":\\"13.6\\",\\"style\\":\\"list\\",\\"ttid\\":\\"600000@taobao_pc_10.7.0\\",\\"needTabs\\":\\"true\\",\\"areaCode\\":\\"CN\\",\\"vm\\":\\"nw\\",\\"countryNum\\":\\"156\\",\\"m\\":\\"pc\\",\\"page\\":{page},\\"n\\":48,\\"q\\":\\"{quote_keyword}\\",\\"tab\\":\\"all\\",\\"pageSize\\":48,\\"sourceS\\":\\"0\\",\\"sort\\":\\"_coefp\\",\\"bcoffset\\":\\"\\",\\"ntoffset\\":\\"\\",\\"filterTag\\":\\"\\",\\"service\\":\\"\\",\\"prop\\":\\"\\",\\"loc\\":\\"\\",\\"start_price\\":null,\\"end_price\\":null,\\"startPrice\\":null,\\"endPrice\\":null,\\"itemIds\\":null,\\"p4pIds\\":null,\\"categoryp\\":\\"\\"}}"}}'
quote_data = quote(str_data, 'utf-8')
timestamp = str(time.time()).replace('.', '')[0:13]
token = get_token(cookie)
sgn = sign(token, timestamp, APPKEY, str_data)
search_query = f'?jsv=2.7.2&appKey={APPKEY}&t={timestamp}&sign={sgn}&api=mtop.relationrecommend.wirelessrecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp2&data={quote_data}'
logger.info(f'keyword: {keyword}, page: {page}, search_query: {search_query}')
return search_query

async def search(keyword: str, cookie: str, page: int) -> dict:
headers = {'cookie': cookie}
headers.update(COMMON_HEADERS)
query = pack_search_query(cookie, keyword, page)
url = f'{SEARCH_URL}{query}'
url = f'{HOST}/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/{query}'
try:
logger.info(f'request url: {url}')
resp = await requests.get(url, headers=headers)
Expand Down
4 changes: 3 additions & 1 deletion service/taobao/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@
router.add_api_route('/add_account', views.add_account, methods=['POST'])
router.add_api_route('/expire_account', views.expire_account, methods=['POST'])
router.add_api_route('/account_list', views.account_list, methods=['GET'])
router.add_api_route('/search', views.search, methods=['GET'])
router.add_api_route('/search', views.search, methods=['GET'])
router.add_api_route('/detail', views.detail, methods=['GET'])
router.add_api_route('/comments', views.comments, methods=['GET'])
2 changes: 2 additions & 0 deletions service/taobao/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
from .expire_account import expire_account
from .add_account import add_account
from .search import search
from .detail import detail
from .comments import comments
25 changes: 25 additions & 0 deletions service/taobao/views/comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from utils.error_code import ErrorCode
from utils.reply import reply
from ..models import accounts
from lib.logger import logger
from ..logic import request_comments
import random

async def comments(id: str, offset: int = 0, limit: int = 20):
"""
获取商品评论
"""
_accounts = await accounts.load()
random.shuffle(_accounts)
for account in _accounts:
if account.get('expired', 0) == 1:
continue
account_id = account.get('id', '')
res = await request_comments(id, account.get('cookie', ''), offset, limit)
if res == {} :
logger.error(f'get comments failed, account: {account_id}, id: {id}, offset: {offset}, limit: {limit}, res: {res}')
continue
logger.info(f'get comments success, account: {account_id}, id: {id}, offset: {offset}, limit: {limit}, res: {res}')
return reply(ErrorCode.OK, '成功' , res)
logger.warning(f'get comments failed. id: {id}, offse: {offset}, limit: {limit}')
return reply(ErrorCode.NO_ACCOUNT, '请先添加账号')
26 changes: 26 additions & 0 deletions service/taobao/views/detail.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from utils.error_code import ErrorCode
from utils.reply import reply
from ..models import accounts
from lib.logger import logger
from ..logic import request_detail
import random

# route
async def detail(id: str):
"""
获取商品详情
"""
_accounts = await accounts.load()
random.shuffle(_accounts)
for account in _accounts:
if account.get('expired', 0) == 1:
continue
account_id = account.get('id', '')
res = await request_detail(id, account.get('cookie', ''))
if res == {}:
logger.error(f'get item detail failed, account: {account_id}, id: {id}, res: {res}')
continue
logger.info(f'get item detail success, account: {account_id}, id: {id}, res: {res}')
return reply(ErrorCode.OK, '成功' , res)
logger.warning(f'get item detail failed. id: {id}')
return reply(ErrorCode.NO_ACCOUNT, '请先添加账号')
Loading

0 comments on commit 9e68a1e

Please sign in to comment.