Skip to content

Commit

Permalink
Merge pull request jjeejj#12 from niyalishanda/master
Browse files Browse the repository at this point in the history
1: 解决非法文件名问题
2: 指定文章章节进行下载
  • Loading branch information
jjeejj authored Apr 22, 2020
2 parents 362d644 + 86c9b5c commit 0fc5796
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 10 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module.exports = {
columnBaseUrl: 'https://time.geekbang.org/column/article/', // 该配置项不需要改动
columnName: '玩转VScode', // 专栏名称
firstArticalId: 18053, //专栏第一篇文章的ID
articalIds: [201700,202772,204472,205784], //指定下载的articalId, 优先级更高, 配置后firstArticalId配置将失效
isdownloadVideo: false, // 是否下载音频
isComment: false, // 是否导出评论
cookie: 'cookie'
Expand All @@ -30,6 +31,8 @@ module.exports = {

* `firstArticalId` 这个参数最好配置专栏第一篇文章的 `ID` ,这个可以获取专栏的所有的文章,若不是第一篇文章的`ID` 则获取的是该文章以及之后的文章

* `articalIds` 这个参数配置为需要获取的文章的所有的 `ID`

* `cookie` 你在网页版登录后返回的`cookie`信息

### 运行
Expand Down
35 changes: 28 additions & 7 deletions columnArticleList.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,23 @@ const downloadComment = require('./downloadComment.js');
* 执行方法
*/
(async function getColumnArticleList (firstArticalId){
await utils.createDir('geektime_'+config.columnName);
await utils.createDir('geektime_' + config.columnName);
console.log('专栏文章链接开始获取');
let columnArticleUrlList = [];
let type = 0;

//指定id下载
let assignIndex = 1;
if (config.articalIds && config.articalIds.length > 0) {
type = 1;
firstArticalId = config.articalIds[0];
console.log('通过articalIds配置进行文章获取');
} else {
console.log('通过firstArticalId配置进行文章获取');
}

let articalId = firstArticalId;

async function getNextColumnArticleUrl (){
try {
let res = await superagent.post(config.url)
Expand Down Expand Up @@ -56,8 +69,8 @@ const downloadComment = require('./downloadComment.js');
articleInfo.commentsTotal = commentsTotal;
articleInfo.commentsArr = commentsArr;
};
// 替换文章名称的 / 线, 解决路径被分割的问题
let useArticleTtle = columnArticle.article_title.replace(/\//g, '-');
// 替换非法文件名
let useArticleTtle = columnArticle.article_title.replace(/[\/:*?"<>|]/g, '-');
//生成PDF
await generaterPdf(articleInfo,
useArticleTtle + '.pdf',
Expand All @@ -71,18 +84,26 @@ const downloadComment = require('./downloadComment.js');
path.resolve(__dirname, 'geektime_' + config.columnName)
);
};

// 判断是否还有下一篇文章
let neighborRight = columnArticle.neighbors.right;
if (neighborRight && neighborRight.id){
articalId = neighborRight.id;
let nextId;
if(type == 1) {
nextId = config.articalIds.length > assignIndex ? config.articalIds[assignIndex] : undefined;
assignIndex++;
} else {
nextId = columnArticle.neighbors.right ? columnArticle.neighbors.right : undefined;
}

if (nextId){
articalId = nextId;
await utils.sleep(1.5);
await getNextColumnArticleUrl();
};
} catch(err){
console.log(`访问 地址 ${config.columnBaseUrl + articalId} err`, err.message);
};
};
await getNextColumnArticleUrl(firstArticalId);
await getNextColumnArticleUrl();
console.log('专栏文章链接获取完成');
utils.writeToFile(`geektime_${config.columnName}`, JSON.stringify(columnArticleUrlList,null,4));
return columnArticleUrlList;
Expand Down
5 changes: 3 additions & 2 deletions config.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ module.exports = {
url: 'https://time.geekbang.org/serv/v1/article',
commentUrl: 'https://time.geekbang.org/serv/v1/comments',
columnBaseUrl: 'https://time.geekbang.org/column/article/',
columnName: '软件工程之美',
firstArticalId: 85730, //专栏第一篇文章的ID
columnName: '分布式协议与算法实战',
firstArticalId: 201700, //专栏第一篇文章的ID
articalIds: [201700,202772,204472,205784], //指定下载的articalId, 优先级更高, 配置后firstArticalId配置将失效
isdownloadVideo: true, // 是否下载音频
isComment: true, // 是否导出评论
cookie: 'cookie'
Expand Down
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0fc5796

Please sign in to comment.