Skip to content

Commit

Permalink
Update and rename start.py to start01.py
Browse files Browse the repository at this point in the history
  • Loading branch information
voicegao authored Jun 17, 2022
1 parent 65019e7 commit 446f741
Showing 1 changed file with 11 additions and 38 deletions.
49 changes: 11 additions & 38 deletions start.py → start01.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,46 +143,19 @@ def __init__(self, url, pubdate, idx, title):
self.title = title


# 从fiddler保存的json文件中提取文章url等信息
# 被我乱改一桶,原来是从json文件中读取抓包的工具
# 时间戳随便写一个,文章链接也要获取后写到代码中
# title名字随便起一个
# 只是为了能够下载已知文章链接的公众号图文
def GetArticleList(jsondir):
filelist = os.listdir(jsondir)
ArtList = []
for file in filelist:
try:
filepath = os.path.join(jsondir, file)
filetxt = ReadFile(filepath)
jsbody = json.loads(filetxt)
general_msg_list = jsbody["general_msg_list"]
jsbd2 = json.loads(general_msg_list)
list = jsbd2["list"]
for item in list: # 一个item里可能有多篇文章
artidx = 1 # 请注意这里的编号只是为了保存html方便,并不对应于真实的文章发文位置(比如头条、次条、3条)
comm_msg_info = item["comm_msg_info"]

pubstamp = comm_msg_info["datetime"]
pubdate = Timestamp2Datetime(pubstamp)
if comm_msg_info["type"] == 49: # 49为普通图文类型,还有其他类型,暂不考虑
app_msg_ext_info = item["app_msg_ext_info"]
url = app_msg_ext_info["content_url"] # 文章链接
idx = artidx
title = app_msg_ext_info["title"]
art = Article(url, pubdate, idx, title)
if len(url) > 3: # url不完整则跳过
ArtList.append(art)
print(len(ArtList), pubdate, idx, title)
if app_msg_ext_info["is_multi"] == 1: # 一次发多篇
artidx += 1
multi_app_msg_item_list = app_msg_ext_info["multi_app_msg_item_list"]
for subArt in multi_app_msg_item_list:
url = subArt["content_url"]
idx = artidx
title = subArt["title"]
art = Article(url, pubdate, idx, title)
if len(url) > 3:
ArtList.append(art)
print(len(ArtList), pubdate, idx, title)
except:
print("跳过,可不用管", file)
pubstamp = 1845588900
pubdate = Timestamp2Datetime(pubstamp)
url = "https://mp.weixin.qq.com/s/j12KabNDpGiWaePoBIs6kQ" # 文章链接
idx = 1
title = "mybest"
art = Article(url, pubdate, idx, title)
ArtList.append(art)
return ArtList


Expand Down

0 comments on commit 446f741

Please sign in to comment.