升级接口及签名

XuCcc · Jan 30, 2020 · 98b50e0 · 98b50e0
1 parent 2888550
commit 98b50e0
Show file tree

Hide file tree

Showing 28 changed files with 784 additions and 522 deletions.
diff --git a/.gitignore b/.gitignore
diff --git a/.images/dashboard.png b/.images/dashboard.png
diff --git a/DuTracker/__init__.py b/DuTracker/__init__.py
diff --git a/DuTracker/db.py b/DuTracker/db.py
diff --git a/DuTracker/items.py b/DuTracker/items.py
diff --git a/DuTracker/middlewares.py b/DuTracker/middlewares.py
diff --git a/DuTracker/pipelines.py b/DuTracker/pipelines.py
diff --git a/DuTracker/settings.py b/DuTracker/settings.py
diff --git a/DuTracker/sign/__init__.py b/DuTracker/sign/__init__.py
diff --git a/DuTracker/sign/sign.js b/DuTracker/sign/sign.js
diff --git a/DuTracker/sign/sign.py b/DuTracker/sign/sign.py
diff --git a/DuTracker/spiders/__init__.py b/DuTracker/spiders/__init__.py
diff --git a/DuTracker/spiders/brand.py b/DuTracker/spiders/brand.py
@@ -10,82 +10,83 @@
 from DuTracker.utils.log import log, handle_parse_exception
 from DuTracker.items import ProductInfo
 from DuTracker.utils.urls import get_brand_page_url as page_url
+from DuTracker.utils.urls import get_headers as headers
 
 
 class BrandSpider(scrapy.Spider):
-    name = 'brand'
-    allowed_domains = ['m.poizon.com']
-    start_urls = [
-        'http://m.poizon.com/mapi//search/categoryDetail?catId=0&sign=4ff93b98af1253fe192ff1328ed09081'
-    ]
-    custom_settings = {
-        'ITEM_PIPELINES': {
-            'DuTracker.pipelines.SaveProductId': 300,
-        }
+	name = 'brand'
+	allowed_domains = ['app.poizon.com']
+	start_urls = [
+		'https://app.poizon.com/api/v1/h5/product/fire/search/getCategoryDetail?catId=0&sign=4ff93b98af1253fe192ff1328ed09081'
+	]
+	custom_settings = {
+		'ITEM_PIPELINES': {
+			'DuTracker.pipelines.SaveProductId': 300,
+		}
 
-    }
-    brandIds = {}
-    Ids = []
-    auto = False
+	}
+	brandIds = {}
+	Ids = []
+	auto = False
 
-    def start_requests(self):
-        log.info('获取品牌列表')
-        for url in self.start_urls:
-            yield Request(url, dont_filter=True, headers={
-                'AppId': 'wxapp',
-                'appVersion': '3.5.0',
-            }, callback=self.parse_brandList, meta={
-                'dont_retry': True
-            })
+	def start_requests(self):
+		log.info('获取品牌列表')
+		for url in self.start_urls:
+			yield Request(url, dont_filter=True, callback=self.parse_brandList, meta={
+				'dont_retry': True
+			}, headers=headers())
 
-    @handle_parse_exception
-    def parse_brandList(self, response):
-        brandList = json.loads(response.body_as_unicode())['data']['list']
-        for brand in brandList:
-            unionId = brand['brand']['goodsBrandId']
-            name = brand['brand']['brandName']
-            self.brandIds[unionId] = name
-            log.success(f'品牌：{name} 编号：{unionId}')
+	@handle_parse_exception
+	def parse_brandList(self, response):
+		brandList = json.loads(response.body_as_unicode())['data']['list']
+		for brand in brandList:
+			unionId = brand['brand']['goodsBrandId']
+			name = brand['brand']['brandName']
+			self.brandIds[unionId] = name
+			log.success(f'品牌：{name} 编号：{unionId}')
 
-        if not self.auto:
-            ids = prompt('输入需要爬取的品牌编号', default='').strip().split(' ')
-            if ids == ['']: return IgnoreRequest()
-        else:
-            ids = self.Ids
-            if not ids: return IgnoreRequest()
+		if not self.auto:
+			ids = prompt('输入需要爬取的品牌编号', default='').strip().split(' ')
+			if ids == ['']: return IgnoreRequest()
+		else:
+			ids = self.Ids
+			if not ids: return IgnoreRequest()
 
-        log.info(f'获取 {ids} 品牌包含商品')
-        for unionId in ids:
-            yield Request(page_url(unionId), callback=self.parse_brandInfo, meta={
-                'unionId': unionId,
-                'name': self.brandIds[unionId]
-            })
+		# log.info(f'品牌列表 {self.brandIds}')
+		# log.info(f'获取 {ids} 品牌包含商品')
+		for unionId in ids:
+			log.info(f'unionId: {unionId}')
+			unionId = int(unionId)
+			yield Request(page_url(unionId), callback=self.parse_brandInfo, meta={
+				'unionId': unionId,
+				'name': self.brandIds[unionId]
+			}, headers=headers())
 
-    @handle_parse_exception
-    def parse_brandInfo(self, response):
-        data = json.loads(response.body_as_unicode())['data']
-        unionId = response.meta.get('unionId')
-        name = response.meta.get('name')
+	@handle_parse_exception
+	def parse_brandInfo(self, response):
+		data = json.loads(response.body_as_unicode())['data']
+		unionId = response.meta.get('unionId')
+		name = response.meta.get('name')
 
-        num = data['total']
-        page = math.ceil(num / 20)
-        log.success(f'品牌：{name} 编号：{unionId} 商品总数：{num} 页面数：{page}')
+		num = data['total']
+		page = math.ceil(num / 20)
+		log.success(f'品牌：{name} 编号：{unionId} 商品总数：{num} 页面数：{page}')
 
-        for page in range(1, page + 1):
-            yield Request(page_url(unionId, page), callback=self.parse_productId, meta={
-                'unionId': unionId,
-                'name': self.brandIds[unionId]
-            })
+		for page in range(1, page + 1):
+			yield Request(page_url(unionId, page), callback=self.parse_productId, meta={
+				'unionId': unionId,
+				'name': self.brandIds[unionId]
+			}, headers=headers())
 
-    @handle_parse_exception
-    def parse_productId(self, response):
-        productList = json.loads(response.body_as_unicode())['data']['productList']
-        for product in productList:
-            name = response.meta.get('name')
-            pid = product['productId']
-            title = product['title']
-            yield ProductInfo(
-                id=pid,
-                title=title,
-                name=name,
-            )
+	@handle_parse_exception
+	def parse_productId(self, response):
+		productList = json.loads(response.body_as_unicode())['data']['productList']
+		for product in productList:
+			name = response.meta.get('name')
+			pid = product['productId']
+			title = product['title']
+			yield ProductInfo(
+				id=pid,
+				title=title,
+				name=name,
+			)
diff --git a/DuTracker/spiders/product.py b/DuTracker/spiders/product.py
@@ -3,71 +3,99 @@
 from scrapy.http import Request
 import json
 import math
+import execjs
 
 from DuTracker.items import ProductItem
 from DuTracker.sign.sign import sign
 from DuTracker.utils.log import log, handle_parse_exception
 from DuTracker.db import *
+from DuTracker.utils.urls import get_headers as headers
 
 
-def get_product_info_url(productid):
-    e = '048a9c4943398714b356a696503d2d36'
-    string = f'productId{productid}sourceshareDetail{e}'
-    result = sign.getSign(string)
-    return f'http://m.poizon.com/mapi/product/detail?productId={productid}&' \
-           f'source=shareDetail&' \
-           f'sign={result}'
+# def get_product_info_url(productid):
+# 	e = '048a9c4943398714b356a696503d2d36'
+# 	string = f'productId{productid}sourceshareDetail{e}'
+# 	result = sign.getSign(string)
+# 	return f'http://m.poizon.com/mapi/product/detail?productId={productid}&' \
+# 				 f'source=shareDetail&' \
+# 				 f'sign={result}'
 
+def get_product_info_url(productId):
+	# 商品详情
+	# log.info('商品详情url')
+	with open('DuTracker/sign/sign.js', 'r', encoding='utf-8') as f:
+		all_ = f.read()
+		ctx = execjs.compile(all_)
+		sign = ctx.call('getSign',
+										'productId{}productSourceNamewx19bc545a393a25177083d4a748807cc0'.format(productId))
+
+		product_detail_url = 'https://app.poizon.com/api/v1/h5/index/fire/flow/product/detail?' \
+												 'productId={}&productSourceName=wx&sign={}'.format(productId, sign)
+		# log.info(f'商品详情url: {product_detail_url}')
+		return product_detail_url
+
+
+# URL	https://app.poizon.com/api/v1/h5/index/fire/flow/product/detail?productId=26850&productSourceName=wx&sign=0e145c5543d9751497a2e700bbea1e4c
+# URL	https://app.poizon.com/api/v1/h5/index/fire/flow/product/detail?productId=65482&productSourceName=wx&sign=091fb148fe96ddbfda383d2dd46fbe67
 
 class ProductSpider(scrapy.Spider):
-    name = 'product'
-    allowed_domains = ['m.poizon.com']
-    custom_settings = {
-        'ITEM_PIPELINES': {
-            'DuTracker.pipelines.SaveProductItem': 300,
-        }
+	name = 'product'
+	# allowed_domains = ['m.poizon.com']
+	# allowed_domains = ['app.poizon.com']
+	custom_settings = {
+		'ITEM_PIPELINES': {
+			'DuTracker.pipelines.SaveProductItem': 300,
+		}
+
+	}
 
-    }
+	productIds = []
+	fromDB = False
 
-    productIds = []
-    fromDB = False
+	@db_session
+	def start_requests(self):
+		log.info('获取商品详情')
+		if self.fromDB: [self.productIds.append(p.id) for p in Product.select()]
+		for pid in self.productIds:
+			log.info(f'获取商品详情request {pid}')
+			url = get_product_info_url(pid)
+			log.info(f'商品详情request url：{url}')
+			log.info("headers ---> {0}".format(headers()))
+			yield Request(url, headers=headers())
 
-    @db_session
-    def start_requests(self):
-        if self.fromDB: [self.productIds.append(p.id) for p in Product.select()]
-        for pid in self.productIds:
-            yield Request(get_product_info_url(pid))
+	@handle_parse_exception
+	def parse(self, response):
+		# log.info('商品详情response')
+		data = json.loads(response.body_as_unicode())['data']
+		imageAndText = data['imageAndText']
+		detail = data['detail']
+		productId = detail['productId']
+		categoryId = detail['categoryId']
+		logoUrl = detail['logoUrl']
+		images = [image['url'] for image in detail['images']]
+		title = detail['title']
+		soldNum = detail['soldNum']
+		sellDate = detail['sellDate']
+		articleNumber = detail['articleNumber']
+		authPrice = detail['authPrice']
+		goodsId = detail['goodsId']
+		sizeList = detail['sizeList']
 
-    @handle_parse_exception
-    def parse(self, response):
-        data = json.loads(response.body_as_unicode())['data']
-        imageAndText = data['imageAndText']
-        detail = data['detail']
-        productId = detail['productId']
-        categoryId = detail['categoryId']
-        logoUrl = detail['logoUrl']
-        images = [image['url'] for image in detail['images']]
-        title = detail['title']
-        soldNum = detail['soldNum']
-        sellDate = detail['sellDate']
-        articleNumber = detail['articleNumber']
-        authPrice = detail['authPrice']
-        goodsId = detail['goodsId']
-        sizeList = detail['sizeList']
+		# log.info(f'商品详情 response url：{response.url}')
 
-        yield ProductItem(
-            id=productId,
-            url=response.url,
-            title=title,
-            soldNum=soldNum,
-            logo=logoUrl,
-            categoryId=categoryId,
-            images=images,
-            sellDate=sellDate,
-            articleNumber=articleNumber,
-            authPrice=authPrice,
-            goodsId=goodsId,
-            sizeList=sizeList,
-            imageAndText=imageAndText,
-            detailJson=detail
-        )
+		yield ProductItem(
+			id=productId,
+			url=response.url,
+			title=title,
+			soldNum=soldNum,
+			logo=logoUrl,
+			categoryId=categoryId,
+			images=images,
+			sellDate=sellDate,
+			articleNumber=articleNumber,
+			authPrice=authPrice,
+			goodsId=goodsId,
+			sizeList=sizeList,
+			imageAndText=imageAndText,
+			detailJson=detail
+		)