forked from jhao104/proxy_pool
-
Notifications
You must be signed in to change notification settings - Fork 1
/
fetch.py
88 lines (75 loc) · 2.92 KB
/
fetch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name: fetchScheduler
Description :
Author : JHao
date: 2019/8/6
-------------------------------------------------
Change Activity:
2021/11/18: 多线程采集
-------------------------------------------------
"""
__author__ = 'JHao'
from threading import Thread
from helper.proxy import Proxy
from helper.check import DoValidator
from handler.logHandler import LogHandler
from handler.proxyHandler import ProxyHandler
from fetcher.proxyFetcher import ProxyFetcher
from handler.configHandler import ConfigHandler
class _ThreadFetcher(Thread):
def __init__(self, fetch_source, proxy_dict):
Thread.__init__(self)
self.fetch_source = fetch_source
self.proxy_dict = proxy_dict
self.fetcher = getattr(ProxyFetcher, fetch_source, None)
self.log = LogHandler("fetcher")
self.conf = ConfigHandler()
self.proxy_handler = ProxyHandler()
def run(self):
self.log.info("ProxyFetch - {func}: start".format(func=self.fetch_source))
try:
for proxy in self.fetcher():
self.log.info('ProxyFetch - %s: %s ok' % (self.fetch_source, proxy.ljust(23)))
proxy = proxy.strip()
if proxy in self.proxy_dict:
self.proxy_dict[proxy].add_source(self.fetch_source)
else:
self.proxy_dict[proxy] = Proxy(
proxy, source=self.fetch_source)
except Exception as e:
self.log.error("ProxyFetch - {func}: error".format(func=self.fetch_source))
self.log.error(str(e))
class Fetcher(object):
name = "fetcher"
def __init__(self):
self.log = LogHandler(self.name)
self.conf = ConfigHandler()
def run(self):
"""
fetch proxy with proxyFetcher
:return:
"""
proxy_dict = dict()
thread_list = list()
self.log.info("ProxyFetch : start")
for fetch_source in self.conf.fetchers:
self.log.info("ProxyFetch - {func}: start".format(func=fetch_source))
fetcher = getattr(ProxyFetcher, fetch_source, None)
if not fetcher:
self.log.error("ProxyFetch - {func}: class method not exists!".format(func=fetch_source))
continue
if not callable(fetcher):
self.log.error("ProxyFetch - {func}: must be class method".format(func=fetch_source))
continue
thread_list.append(_ThreadFetcher(fetch_source, proxy_dict))
for thread in thread_list:
thread.setDaemon(True)
thread.start()
for thread in thread_list:
thread.join()
self.log.info("ProxyFetch - all complete!")
for _ in proxy_dict.values():
if DoValidator.preValidator(_.proxy):
yield _