forked from kgspider/crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8b0bac7
commit 7b26081
Showing
3 changed files
with
118 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
var CryptoJS = require('crypto-js'); | ||
|
||
|
||
function hash(type, value){ | ||
if(type == 'md5'){ | ||
return CryptoJS.MD5(value).toString(); | ||
} | ||
if(type == 'sha1'){ | ||
return CryptoJS.SHA1(value).toString(); | ||
} | ||
if(type == 'sha256'){ | ||
return CryptoJS.SHA256(value).toString(); | ||
} | ||
} | ||
|
||
|
||
var _0x2228a0 = { | ||
"mLZyz" : function(_0x435347, _0x8098d) { | ||
return _0x435347 < _0x8098d; | ||
}, | ||
"SsARo" : function(_0x286fd4, _0x10b2a6) { | ||
return _0x286fd4 + _0x10b2a6; | ||
}, | ||
"jfMAx" : function(_0x6b4da, _0x19c099) { | ||
return _0x6b4da + _0x19c099; | ||
}, | ||
"HWzBW" : function(_0x3b9d7f, _0x232017) { | ||
return _0x3b9d7f + _0x232017; | ||
}, | ||
"DRnYs" : function(_0x4573a2, _0x3855be) { | ||
return _0x4573a2 == _0x3855be; | ||
}, | ||
"ZJMqu" : function(_0x3af043, _0x1dbbb7) { | ||
return _0x3af043 - _0x1dbbb7; | ||
}, | ||
}; | ||
|
||
|
||
function cookies(_0x60274b){ | ||
var _0x34d7a8 = new Date(); | ||
function _0x14e035(_0x56cbce, _0x5e5712) { | ||
var _0x2d0a43 = _0x60274b['chars']['length']; | ||
for (var _0x212ce4 = 0x0; _0x212ce4 < _0x2d0a43; _0x212ce4++) { | ||
for (var _0x8164 = 0x0; _0x2228a0["mLZyz"](_0x8164, _0x2d0a43); _0x8164++) { | ||
var _0x2a7ea9 = _0x5e5712[0] + _0x60274b["chars"]["substr"](_0x212ce4, 1) + _0x60274b["chars"]["substr"](_0x8164, 1) + _0x5e5712[1]; | ||
if (_0x2228a0["DRnYs"](hash(_0x60274b['ha'], _0x2a7ea9), _0x56cbce)) { | ||
return [_0x2a7ea9, _0x2228a0["ZJMqu"](new Date(), _0x34d7a8)]; | ||
} | ||
} | ||
} | ||
} | ||
var _0x732635 = _0x14e035(_0x60274b['ct'], _0x60274b['bts']); | ||
return {'__jsl_clearance_s' : _0x732635[0]}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# ======================= | ||
# --*-- coding: utf-8 --*-- | ||
# @Time : 2022/7/27 | ||
# @Author : 微信公众号:K哥爬虫 | ||
# @FileName: jsl.py | ||
# @Software: PyCharm | ||
# ======================= | ||
|
||
|
||
import json | ||
import re | ||
import requests | ||
import execjs | ||
|
||
|
||
cookies = {} | ||
headers = { | ||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36" | ||
} | ||
url = "https://www.mps.gov.cn/index.html" | ||
|
||
|
||
def get_first_cookie(): | ||
global cookies | ||
resp_first = requests.get(url=url, headers=headers) | ||
# 获取 cookie 值 __jsluid_s | ||
cookies.update(resp_first.cookies) | ||
# 获取第一层响应内容, AAEncode 加密 | ||
content_first = re.findall('cookie=(.*?);location', resp_first.text)[0] | ||
jsl_clearance_s = execjs.eval(content_first).split(';')[0] | ||
# 获取 cookie 值 __jsl_clearance_s | ||
cookies['__jsl_clearance_s'] = jsl_clearance_s.split("=")[1] | ||
|
||
|
||
def get_second_cookie(): | ||
global cookies | ||
# 通过携带 jsluid_s 和 jsl_clearance_s 值的 cookie 获取第二层响应内容 | ||
resp_second = requests.get(url=url, headers=headers, cookies=cookies) | ||
# 获取 go 字典参数 | ||
go_params = re.findall(';go\((.*?)\)</script>', resp_second.text)[0] | ||
params = json.loads(go_params) | ||
return params | ||
|
||
|
||
def get_third_cookie(): | ||
with open('jsl.js', 'r', encoding='utf-8') as f: | ||
jsl_js = f.read() | ||
params = get_second_cookie() | ||
# 传入字典 | ||
third_cookie = execjs.compile(jsl_js).call('cookies', params) | ||
cookies.update(third_cookie) | ||
|
||
|
||
def main(): | ||
get_first_cookie() | ||
get_third_cookie() | ||
resp_third = requests.get(url=url, headers=headers, cookies=cookies) | ||
resp_third.encoding = 'utf-8' | ||
print(resp_third.text) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |