forked from SilverComet7/yolov5-DNF
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: 爬取计算机各分类书籍 每个分类500本?总共11112本 todo 去重
- Loading branch information
1 parent
b18a312
commit b5c69fc
Showing
13 changed files
with
4,375 additions
and
54 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import time | ||
import random | ||
import requests | ||
import csv | ||
|
||
# 不重复,完整,中断要可续接,进数据库或者excel记录 | ||
|
||
# 书名 | ||
# 分类 | ||
# 最大免费章节 maxFreeChapter | ||
# 推荐值 newRating | ||
# 出版时间 | ||
# 介绍 | ||
# 图片 | ||
# 推荐值 | ||
|
||
|
||
def getWXBooks(booksId): | ||
|
||
bookList = [] | ||
|
||
for i in range(554): | ||
page = i * 20 | ||
rTime = random.randint(1, 3) # 随机从1到3内取一个整数值 | ||
time.sleep(rTime) # 把随机取出的整数值传到等待函数中 | ||
res = requests.get( | ||
'https://weread.qq.com/web/bookListInCategory/{booksId}?maxIndex={page}'.format(booksId=booksId, page=page)) | ||
books = res.json()['books'] | ||
hasMore = res.json()['hasMore'] | ||
print(page,hasMore,books) | ||
if hasMore == 0: break | ||
for book in books: | ||
bookList.append(book) | ||
return bookList | ||
|
||
|
||
|
||
BookId = [700003,700004,700005,700006,700007] | ||
for i in BookId: | ||
print(i) | ||
bookInfo = getWXBooks(i) | ||
|
||
with open('{csvName}.csv'.format(csvName=i), 'w', encoding='UTF8', newline='') as f: | ||
fieldnames = ['title', 'publishTime', 'category', 'intro', 'maxFreeChapter','newRating', 'free', 'price', 'cover'] | ||
writer = csv.DictWriter(f, fieldnames=fieldnames, restval='intro', extrasaction='ignore') | ||
|
||
# 写入头 | ||
writer.writeheader() | ||
|
||
for book in bookInfo: | ||
# 写入数据 | ||
writer.writerow(book['bookInfo']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# https://weread.qq.com/web/search/global?keyword=%E8%AE%A1%E7%AE%97%E6%9C%BA%E8%A7%86%E8%A7%89&maxIdx=180&fragmentSize=120&count=20 | ||
searchKeyWord = [''] |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1 @@ | ||
title,publishTime | ||
脑机穿越:脑机接口改变人类未来,2015-03-01 00:00:00 | ||
深度学习:核心技术、工具与案例解析,2018-06-01 00:00:00 | ||
智能制造时代的研发智慧:知识工程2.0,2017-05-01 00:00:00 | ||
第二次机器革命,2016-12-01 00:00:00 | ||
AI速成课:从AI编程到构建智能软件,2020-07-01 00:00:00 | ||
人工智能程序员面试笔试宝典,2019-12-24 00:00:00 | ||
人工智能,2016-09-01 00:00:00 | ||
人工智能:智能制造,2020-12-01 00:00:00 | ||
人工智能会抢哪些工作,2018-05-01 00:00:00 | ||
OpenCV 4机器学习算法原理与编程实战,2021-04-01 00:00:00 | ||
联邦学习技术及实战,2021-03-01 00:00:00 | ||
PaddlePaddle深度学习实战,2018-05-01 00:00:00 | ||
人工智能:理论基础+商业落地+实战场景+案例分析,2021-03-01 00:00:00 | ||
人工智能算法(卷3):深度学习和神经网络,2021-03-01 00:00:00 | ||
TensorFlow学习指南:深度学习系统构建详解,2018-05-01 00:00:00 | ||
机器新脑:我是如何学会停止担忧并爱上AI的,2021-01-21 00:00:00 | ||
人工智能算法Python案例实战,2021-01-01 00:00:00 | ||
传感器技术及应用,2013-02-01 00:00:00 | ||
人人可懂的深度学习,2021-04-01 00:00:00 | ||
title,author,publishTime,category,intro,free,price,cover |