-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmythread.py
79 lines (63 loc) · 1.92 KB
/
mythread.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#coding:utf-8
import Queue
import threading
import string
import urllib
import os
import time
import socket
socket.setdefaulttimeout(30) #设置全局socket超时,解决urllib.urlretrieve下载超时问题
from mm.mm import Mm
mm = Mm()
img_urls = []
class ThreadUrl(threading.Thread):
""" docstring for ThreadUrl:
ThreadUrl Inherited thread,to run multi-threaded
list_url : this is a list.
"""
def __init__(self, queue,site):
self.queue = queue
self.site = site
threading.Thread.__init__(self)
def run(self):
while True:
try:
url = self.queue.get(1,5)
list_url = self.site.img_url(url)
img_urls.extend(list_url)
except:
print "%s: %s finished!" % (time.ctime(),self.getName())
break
self.queue.task_done()
def mainprocess(website=mm,num=10,limit=None,newdir='pics'):
"""
docstring for mainprocess:
1.Used Queue,running multi-threaded crawl,
the default number of threads is 10.
2.website is from all kinds of customize website,default MM.
"""
queue = Queue.Queue()
for i in range(num):
t = ThreadUrl(queue,website)
t.setDaemon(True)
t.start()
for url in website.detail_url():
queue.put(url)
queue.join()
if not os.path.exists(newdir):
os.makedirs(newdir)
os.chdir(newdir)
i = 1
for img_url in img_urls:
filename = string.zfill(000000+i,6)+'.'+ img_url.split('.')[-1]
print "开始下载: %s" % img_url
try:
urllib.urlretrieve(img_url,filename)
i = i + 1
except IOError:
continue
print "已经保存在:%s目录文件名 %s" % (newdir,filename)
if limit and i == limit+1:
break
if __name__ == "__main__":
mainprocess(mm)