Skip to content

Commit

Permalink
add some examples
Browse files Browse the repository at this point in the history
  • Loading branch information
alphardex committed Dec 8, 2019
1 parent b5a4796 commit ce3f3e0
Show file tree
Hide file tree
Showing 15 changed files with 4,360 additions and 101 deletions.
10 changes: 10 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@

- [bilibili_top_bangumi](bilibili_top_bangumi.py): bilibili番剧排行榜

- [codemyui](codemyui.py): codemyui网站归档

- [dlsite](dlsite.py): DLsite上的黄油,按打分排序

- [douban_books](douban_books.py): 豆瓣上的“计算机”标签的书籍

- [douban_book_archive](douban_book_archive.py): 我看过的书籍在豆瓣上的归档

- [douban_movie_archive](douban_movie_archive.py): 我看过的电影在豆瓣上的归档

- [gen_index](gen_index.py): 用来自动生成这篇README.md

- [hacpai](hacpai.py): 黑客派上“好玩”领域的文章,按阅览数排序
Expand All @@ -38,8 +44,12 @@

- [tieba](tieba.py): 百度贴吧的帖子,用HTML注释反爬,把注释去掉再解析HTML就行

- [uimovement](uimovement.py): uimovement网站归档

- [unsplash](unsplash.py): unsplash上的免费壁纸排行

- [v2ex](v2ex.py): v2ex上的Python板块

- [zhangxinxu](zhangxinxu.py): 知名CSS博主张鑫旭的博客归档

- [zhihu_top](zhihu_top.py): 知乎最高点赞的答案排行
4 changes: 2 additions & 2 deletions examples/bangumi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import re
import looter as lt

domain = 'http://bangumi.tv'
domain = 'https://bangumi.tv'
user_id = '399538'
page_limit = 4
page_limit = 5

format_date = lambda date: '-'.join(f'0{d}' if len(d) == 1 else d for d in re.sub(r'年|月|日', '-', date)[:-1].split('-'))

Expand Down
29 changes: 29 additions & 0 deletions examples/codemyui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
codemyui网站归档
"""
import requests
from parsel import Selector
import looter as lt

domain = 'https://codemyui.com'
total = []


def crawl(url):
tree = Selector(text=requests.get(url, headers=lt.DEFAULT_HEADERS).json()['html'])
items = tree.css('.alm-layout .details')
for item in items:
data = {}
data['title'] = item.css('h3 a::text').extract_first()
data['url'] = item.css('h3 a::attr(href)').extract_first()
data['description'] = item.css('p::text').extract_first()
total.append(data)


if __name__ == '__main__':
tasklist = [
f'{domain}/wp-admin/admin-ajax.php?id=LoadArticleToo&post_id=0&slug=home&canonical_url=https%3A%2F%2Fcodemyui.com%2F&posts_per_page=120&page={n}&offset=0&post_type=post&repeater=default&seo_start_page=1&preloaded=false&preloaded_amount=0&cta[cta]=true&cta[cta_position]=after:4&cta[cta_repeater]=template_1&cta[cta_theme_repeater]=null&taxonomy_terms=home&order=DESC&orderby=post__in&post__in=28560,28549,28555,28541,27445,28536,28530,28511,28501,27445,28473,28462,28455,17739,27445,28426,28419,28400,25214,27445,28375,19762,25308,197,27445,24874,217,4011,248,27445,24628,23147,24834,242,27445,76,5016,23177,22950,27445,25409,24739,25519,24684,27445,201,330,22918,24952,27445,5230,20146,17596,36,27445,270,230,206,27558,27445,27415,27409,27404,27362,27445,27357,27352,27347,27342,27445,27330,27324,27319,27312,27445,27307,27286,27270,27265,27445,27260,27238,27234,27230,27445,27226,27222,27218,27213,27445,27208,27203,27190,27179,27445,27142,27062,27058,27051,27445,27046,27041,27038,27034,27445,27030,27027,27023,27020,27445,26988,26978,60,26972,27445,26968,26942,26938,26934,27445,26914,26910,26907,26903,27445,26900,26884,26896,26893,27445,26881,26878,26875,26869,27445,26872,26866,26854,26845,27445,26839,26835,26823,26831,27445,26827,26818,26814,26810,27445,26806,26791,26795,26786,27445,26777,26782,26770,26766,27445,26761,26756,26751,26747,27445,26733,26740,26722,26716,27445,26706,26702,26695,26691,27445,26687,26683,26677,26673,27445,26669,26665,26661,26656,27445,26652,26606,26602,26598,27445,26594,26573,26566,26562,27445,26556,26552,26468,26505,27445,26501,26472,26461,26449,27445,26445,26437,26434,26441,27445,26412,26416,26408,26404,27445,26400,26393,26389,26382,27445,26373,25651,25646,25637,27445,25634,25630,25626,25623,27445,25620,25617,25614,25611,27445,25540,25543,25534,25524,27445,25513,25507,25502,25490,27445,25485,25481,25477,25474,27445,25470,25462,25466,25458,27445,25454,25448,25444,25438,27445,25434,25430,25423,25419,27445,25415,25401,25405,25397,27445,25393,25388,25384,25378,27445,25374,25370,25317,25365,27445,25360,25355,25351,25342,27445,25322,25327,25337,25333,27445,25312,25009,25244,25299,27445,25135,25290,25286,25281,27445,25238,25208,25229,25232,27445,25247,25262,25226,25235,27445,25203,25170,25078,25558,27445,25177,25139,25173,25122,27445,25142,25075,24983,25116,27445,25119,25094,25099,25103,27445,25085,25072,24971,24996,27445,24989,25061,25041,25044,27445,25037,25034,25030,24986,27445,25012,25018,24978,24977,27445,24974,24958,14552,24817,27445,24853,24919,24922,24910,27445,24879,24905,24900,24896,27445,24892,24888,24884,24870,27445,25347,24845,24848,24842,27445,24839,24823,24820,24810,27445,24806,24782,24801,24797,27445,24791,24787,24749,24753,27445,24756,24746,24743,24729,27445,24719,24726,24723,24716,27445,24701,24704,24698,24694,27445,24690,24668,24678,24673,27445,24663,24659,23623,24612,27445,23592,23253,23235,23219,27445,23180,23112,23091,23070,27445,23050,23034,22974,22869,27445,20397,20369,20127,22838,27445,20111,20095,20077,20062,27445,20034,19981,19960,19941,27445,19925,19902,19880,19861,27445,19828,19812,19796,19778,27445,19747,17941,17925,17888,27445,17904,17860,17578,17670,27445,15715,17636,17771,17755,27445,17723,17250,15834,17652,27445,17562,17526,17510,17494,27445,17478,17446,17427,17373,27445,17391,17407,17234,17355,27445,17340,17322,17303,17290,27445,17218,17186,17170,15919,27445,15895,15877,15591,15850,27445,15818,15802,15786,15757,27445,15737,15699,15418,15366,27445,15666,15643,15625,15400,27445,15573,15549,15520,15503,27445,15486,15470,15452,15434,27445,15383,14362,14330,14813,27445,15180,15268,15228,15093,27445,15059,14347,14841,14830,27445,14864,14878,14315,14391,27445,14379,14535,14565,14596,27445,14410,14447,14428,13889,27445,13936,13953,13968,14007,27445,13314,13903,13983,14024,27445,13632,12369,13647,13662,27445,13617,13835,13602,13458,27445,13058,13473,13406,13428,27445,13443,12924,13043,13028,27445,12910,13075,13091,12895,27445,13108,12947,11817,10965,27445,12608,12625,12645,12667,27445,12568,12581,12411,12395,27445,12429,12446,12340,11802,27445,12286,12094,12076,12119,27445,12148,12052,12006,7597,27445,6161,6533,11837,10931,27445,10912,10889,10865,6389,27445,6805,6100,7582,6194,27445,6434,6741,6491,6662,27445,6628,6579,6349,6307,27445,6129,6078,6204,5972,27445,4607,5931,5150,5848,27445,5814,5790,5752,5728,27445,5682,5137,5122,5109,27445,5487,5444,5405,5367,27445,5334,5301,5158,5085,27445,5066,5000,4983,4943,27445,4925,4878,4842,3131,27445,4827,4753,4724,4689,27445,4583,4550,4564,4416,27445,4511,4379,4446,4463,27445,4399,4382,4359,4337,27445,4207,4083,4107,4097,27445,4073,4244,4224,4139,27445,3981,4038,3963,3942,27445,3919,3902,3830,3844,27445,3403,3761,3742,3708,27445,3310,3377,3238,3659,27445,3547,3573,3590,3290,27445,3300,3280,3325,3357,27445,3331,3221,3204,3166,27445,3112,3092,2945,3053,27445,3024,3007,1407,2602,27445,2965,2523,2535,2571,27445,2529,2580,2586,2594,27445,2755,2365,1813,2382,27445,2499,2481,2464,2445,27445,2427,2256,2297,2275,27445,2349,2321,2240,2191,27445,2167,2137,2124,2083,27445,1895,1515,1925,1755,27445,1870,1848,1773,1742,27445,1549,1531,1688,1669,27445,1603,1639,1622,1586,27445,1436,1567,1479,1463,27445,1409,1422,1432,1244,27445,1428,1419,1414,1400,27445,1395,1387,1384,1379,27445,1317,1355,1351,1336,27445,1229,1251,1332,1233,27445,1323,1313,1296,1292,27445,1288,1255,1238,1226,27445,1223,1219,1211,929,27445,926,923,917,912,27445,908,905,291,292,27445,16,17,18,19,27445,20,21,22,23,27445,24,25,26,27,27445,28,29,30,31,27445,32,33,34,35,27445,37,38,39,40,27445,41,42,43,44,27445,45,46,47,48,27445,1345,49,50,51,27445,52,53,54,55,27445,56,57,58,59,27445,61,62,63,64,27445,65,66,67,68,27445,69,70,71,72,27445,73,74,75,77,27445,78,79,80,81,27445,82,83,84,85,27445,86,87,88,89,27445,90,91,92,93,27445,94,95,96,97,27445,98,99,100,101,27445,102,103,104,105,27445,106,107,108,109,27445,110,111,112,113,27445,114,115,116,117,27445,118,119,120,121,27445,122,123,124,125,27445,126,127,128,129,27445,130,131,132,133,27445,134,135,136,137,27445,138,139,140,141,27445,142,143,144,145,27445,146,147,148,149,27445,150,151,152,153,27445,154,155,156,157,27445,158,159,160,161,27445,162,163,164,165,27445,166,167,168,169,27445,170,171,172,173,27445,174,175,176,177,27445,178,179,180,181,27445,182,183,184,185,27445,186,187,188,189,27445,190,191,192,193,27445,194,195,196,198,27445,199,200,202,203,27445,204,205,207,208,27445,209,210,211,212,27445,213,214,215,216,27445,218,219,220,221,27445,222,223,224,225,27445,226,227,228,229,27445,231,232,233,234,27445,235,236,237,238,27445,239,240,241,243,27445,244,245,246,247,27445,249,250,251,252,27445,253,254,255,256,27445,257,258,259,260,27445,261,262,263,264,27445,265,266,267,268,27445,269,271,272,273,27445,274,275,276,277,27445,278,279,280,281,27445,282,283,284,285,27445,286,287,288,289,27445,290&action=alm_get_posts&query_type=standard'
for n in range(0, 9)
]
[crawl(task) for task in tasklist]
lt.save(total, name='codemyui.csv')
Loading

0 comments on commit ce3f3e0

Please sign in to comment.