Skip to content

Commit

Permalink
Update dianping
Browse files Browse the repository at this point in the history
  • Loading branch information
lxb321 committed Aug 10, 2019
1 parent 71a77c0 commit 4eb81d1
Show file tree
Hide file tree
Showing 7 changed files with 173 additions and 2,807 deletions.
14 changes: 3 additions & 11 deletions FontDianPing/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
@file:constants.py
@time:2019/6/4-9:12
"""
# 芝麻代理测试
PROXY_URL = ""
# 阿布云代理

# 列表页请求头
HEADERS = {
"Referer": "http://www.dianping.com/",
Expand All @@ -26,7 +22,6 @@
'Upgrade-Insecure-Requests': '1',
}

UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36"
# 随机延时
COMMENTS_SLEEP = (20, 30)

Expand All @@ -37,7 +32,8 @@
# 从正文提取CSS
PATTERN_SVG_CSS = r'href="([^"]+svgtextcss[^"]+)"'
PATTERN_SVG_TEXT = r'y=.*?(\d+)">(.*?)</text>'

# 提取自定义字体链接
PATTERN_FONT_NAME = r"@font-face{font-family: \"(.*?)\";.*?format\(\"embedded-opentype\"\),url\(\"(.*?\.woff)\"\)"
# 请求协议
PAGE_PREFIX = "https:"
# 请求资源前缀
Expand All @@ -46,9 +42,9 @@
START_URL = "https://www.dianping.com/hangzhou/ch10/g101"
# 加密的标签
DECRYPT_TAGS = ['d', 'e', 'svgmtsi', 'span']

# 加密文本中无效的标签
IGNORED_SPAN_CLASS = ['info-name', ]

# 允许网络请求的HTTP方法
HTTP_METHODS = ['get', 'head', 'post', 'put', 'options']

Expand All @@ -70,7 +66,3 @@
'ERROR': '%(asctime)s %(name)s(%(levelname)s) - %(message)s',
'CRITICAL': '%(asctime)s %(name)s(%(levelname)s) - %(message)s',
}

# Mongodb配置
# MONGO_CLIENT = 'mongodb://localhost:27017'

3 changes: 3 additions & 0 deletions FontDianPing/decrypt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from constants import PATTERN_SVG_TEXT


# ============ WOFF ============
def _decrypt_woff_tag(unitext, dictionary):
"""
解析自定义字体
Expand All @@ -17,9 +18,11 @@ def _decrypt_woff_tag(unitext, dictionary):
for key, value in dictionary.items():
# ef44 in unief44
if unitext in key:

return value


# ============ SVG ============
def _decrypt_text_tag(svg_content, font_size, x_offset, y_offset):
"""
:param svg_content:
Expand Down
Loading

0 comments on commit 4eb81d1

Please sign in to comment.