File tree Expand file tree Collapse file tree 1 file changed +38
-0
lines changed Expand file tree Collapse file tree 1 file changed +38
-0
lines changed Original file line number Diff line number Diff line change
1
+ import re
2
+ import requests as rq
3
+
4
+ ROOT_URL = "http://wufazhuce.com/one/"
5
+ URL_NUM = 14
6
+
7
+ def yield_url (ROOT_URL , URL_NUM ):
8
+ return ROOT_URL + str (URL_NUM )
9
+
10
+ def get_html (url ):
11
+ return rq .get (url ).content .decode ("utf-8" )
12
+
13
+ def get_data (html ):
14
+ img_url_regex = re .compile ('<img src="(.*?)" alt="" />' )
15
+ cite_regex = re .compile ('<div class="one-cita">(.*?)</div>' , re .S )
16
+ img_url = re .findall (img_url_regex , html )[0 ]
17
+ cite = re .findall (cite_regex , html )[0 ].strip ()
18
+ return img_url , cite
19
+
20
+ def save_data (img_url , cite , URL_NUM ):
21
+ with open ("./{}.jpg" .format (URL_NUM ), "wb" ) as fp :
22
+ fp .write (rq .get (img_url ).content )
23
+ with open ("./cite{}.txt" .format (URL_NUM ), "w" ) as fp :
24
+ fp .write (cite )
25
+ return URL_NUM + 1
26
+
27
+ def main (ROOT_URL , URL_NUM , number ):
28
+ for _ in range (number ):
29
+ url = yield_url (ROOT_URL , URL_NUM )
30
+ html = get_html (url )
31
+ img_url , cite = get_data (html )
32
+ URL_NUM = save_data (img_url , cite , URL_NUM )
33
+
34
+ if __name__ == "__main__" :
35
+ try :
36
+ main (ROOT_URL , URL_NUM , 20 )
37
+ except :
38
+ pass
You can’t perform that action at this time.
0 commit comments