Skip to content

Commit

Permalink
add 2 free proxy method
Browse files Browse the repository at this point in the history
  • Loading branch information
roronoa-dong committed Sep 9, 2019
1 parent 5554832 commit ed32527
Showing 1 changed file with 144 additions and 117 deletions.
261 changes: 144 additions & 117 deletions ProxyGetter/getFreeProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import re
import sys
import requests
from time import sleep

sys.path.append('..')

Expand All @@ -30,65 +31,85 @@ class GetFreeProxy(object):
"""

@staticmethod
def freeProxyFirst(page=10):
def freeProxy01():
"""
无忧代理 http://www.data5u.com/
几乎没有能用的
:param page: 页数
:return:
"""
url_list = [
'http://www.data5u.com/',
'http://www.data5u.com/free/gngn/index.shtml',
'http://www.data5u.com/free/gnpt/index.shtml'
]
key = 'ABCDEFGHIZ'
for url in url_list:
html_tree = getHtmlTree(url)
ul_list = html_tree.xpath('//ul[@class="l2"]')
for ul in ul_list:
try:
yield ':'.join(ul.xpath('.//li/text()')[0:2])
ip = ul.xpath('./span[1]/li/text()')[0]
classnames = ul.xpath('./span[2]/li/attribute::class')[0]
classname = classnames.split(' ')[1]
port_sum = 0
for c in classname:
port_sum *= 10
port_sum += key.index(c)
port = port_sum >> 3
yield '{}:{}'.format(ip, port)
except Exception as e:
print(e)

@staticmethod
def freeProxySecond(count=20):
def freeProxy02(count=20):
"""
代理66 http://www.66ip.cn/
:param count: 提取数量
:return:
"""
urls = [
"http://www.66ip.cn/mo.php?sxb=&tqsl={count}&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=",
"http://www.66ip.cn/nmtq.php?getnum={count}"
"&isp=0&anonymoustype=0&start=&ports=&export=&ipaddress=&area=1&proxytype=2&api=66ip",
]
request = WebRequest()
for _ in urls:
url = _.format(count=count)
html = request.get(url).content
ips = re.findall(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}", html)
for ip in ips:
yield ip.strip()
"http://www.66ip.cn/mo.php?sxb=&tqsl={}&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=",
"http://www.66ip.cn/nmtq.php?getnum={}&isp=0&anonymoustype=0&s"
"tart=&ports=&export=&ipaddress=&area=0&proxytype=2&api=66ip"
]

@staticmethod
def freeProxyThird(days=1):
"""
ip181 http://www.ip181.com/ 不能用了
:param days:
:return:
"""
url = 'http://www.ip181.com/'
html_tree = getHtmlTree(url)
try:
tr_list = html_tree.xpath('//tr')[1:]
for tr in tr_list:
yield ':'.join(tr.xpath('./td/text()')[0:2])
import execjs
import requests

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
'Accept': '*/*',
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN,zh;q=0.8'}
session = requests.session()
src = session.get("http://www.66ip.cn/", headers=headers).text
src = src.split("</script>")[0] + '}'
src = src.replace("<script>", "function test() {")
src = src.replace("while(z++)try{eval(", ';var num=10;while(z++)try{var tmp=')
src = src.replace(");break}", ";num--;if(tmp.search('cookie') != -1 | num<0){return tmp}}")
ctx = execjs.compile(src)
src = ctx.call("test")
src = src[src.find("document.cookie="): src.find("};if((")]
src = src.replace("document.cookie=", "")
src = "function test() {var window={}; return %s }" % src
cookie = execjs.compile(src).call('test')
js_cookie = cookie.split(";")[0].split("=")[-1]
except Exception as e:
pass
print(e)
return

for url in urls:
try:
html = session.get(url.format(count), cookies={"__jsl_clearance": js_cookie}, headers=headers).text
ips = re.findall(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}", html)
for ip in ips:
yield ip.strip()
except Exception as e:
print(e)
pass

@staticmethod
def freeProxyFourth(page_count=1):
def freeProxy03(page_count=2):
"""
西刺代理 http://www.xicidaili.com
:return:
Expand All @@ -109,7 +130,7 @@ def freeProxyFourth(page_count=1):
pass

@staticmethod
def freeProxyFifth():
def freeProxy04():
"""
guobanjia http://www.goubanjia.com/
:return:
Expand All @@ -128,28 +149,25 @@ def freeProxyFifth():
try:
# :符号裸放在td下,其他放在div span p中,先分割找出ip,再找port
ip_addr = ''.join(each_proxy.xpath(xpath_str))
port = each_proxy.xpath(".//span[contains(@class, 'port')]/text()")[0]
yield '{}:{}'.format(ip_addr, port)

# HTML中的port是随机数,真正的端口编码在class后面的字母中。
# 比如这个:
# <span class="port CFACE">9054</span>
# CFACE解码后对应的是3128。
port = 0
for _ in each_proxy.xpath(".//span[contains(@class, 'port')]"
"/attribute::class")[0]. \
replace("port ", ""):
port *= 10
port += (ord(_) - ord('A'))
port /= 8

yield '{}:{}'.format(ip_addr, int(port))
except Exception as e:
pass

@staticmethod
def freeProxySixth():
"""
讯代理 http://www.xdaili.cn/ 已停用
:return:
"""
url = 'http://www.xdaili.cn/ipagent/freeip/getFreeIps?page=1&rows=10'
request = WebRequest()
try:
res = request.get(url, timeout=10).json()
for row in res['RESULT']['rows']:
yield '{}:{}'.format(row['ip'], row['port'])
except Exception as e:
pass

@staticmethod
def freeProxySeventh():
def freeProxy05():
"""
快代理 https://www.kuaidaili.com
"""
Expand All @@ -160,47 +178,31 @@ def freeProxySeventh():
for url in url_list:
tree = getHtmlTree(url)
proxy_list = tree.xpath('.//table//tr')
sleep(1) # 必须sleep 不然第二条请求不到数据
for tr in proxy_list[1:]:
yield ':'.join(tr.xpath('./td/text()')[0:2])

@staticmethod
def freeProxyEight():
def freeProxy06():
"""
秘密代理 http://www.mimiip.com 已停用
"""
url_gngao = ['http://www.mimiip.com/gngao/%s' % n for n in range(1, 2)] # 国内高匿
url_gnpu = ['http://www.mimiip.com/gnpu/%s' % n for n in range(1, 2)] # 国内普匿
url_gntou = ['http://www.mimiip.com/gntou/%s' % n for n in range(1, 2)] # 国内透明
url_list = url_gngao + url_gnpu + url_gntou

request = WebRequest()
for url in url_list:
r = request.get(url, timeout=10)
proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W].*<td>(\d+)</td>', r.text)
for proxy in proxies:
yield ':'.join(proxy)

@staticmethod
def freeProxyNinth():
"""
码农代理 https://proxy.coderbusy.com/ 已停用
码农代理 https://proxy.coderbusy.com/
:return:
"""
urls = ['https://proxy.coderbusy.com/classical/country/cn.aspx?page=1']
request = WebRequest()
urls = ['https://proxy.coderbusy.com/']
for url in urls:
r = request.get(url, timeout=10)
proxies = re.findall('data-ip="(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})".+?>(\d+)</td>', r.text)
for proxy in proxies:
yield ':'.join(proxy)
tree = getHtmlTree(url)
proxy_list = tree.xpath('.//table//tr')
for tr in proxy_list[1:]:
yield ':'.join(tr.xpath('./td/text()')[0:2])

@staticmethod
def freeProxyTen():
def freeProxy07():
"""
云代理 http://www.ip3366.net/free/
:return:
"""
urls = ['http://www.ip3366.net/free/']
urls = ['http://www.ip3366.net/free/?stype=1',
"http://www.ip3366.net/free/?stype=2"]
request = WebRequest()
for url in urls:
r = request.get(url, timeout=10)
Expand All @@ -209,7 +211,7 @@ def freeProxyTen():
yield ":".join(proxy)

@staticmethod
def freeProxyEleven():
def freeProxy08():
"""
IP海 http://www.iphai.com/free/ng
:return:
Expand All @@ -229,11 +231,10 @@ def freeProxyEleven():
yield ":".join(proxy)

@staticmethod
def freeProxyTwelve(page_count=2):
def freeProxy09(page_count=5):
"""
http://ip.jiangxianli.com/?page=
免费代理库
超多量
:return:
"""
for i in range(1, page_count + 1):
Expand All @@ -245,60 +246,86 @@ def freeProxyTwelve(page_count=2):
for tr in tr_list:
yield tr.xpath("./td[2]/text()")[0] + ":" + tr.xpath("./td[3]/text()")[0]

@staticmethod
def freeProxyWallFirst():
"""
墙外网站 cn-proxy
:return:
"""
urls = ['http://cn-proxy.com/', 'http://cn-proxy.com/archives/218']
request = WebRequest()
for url in urls:
r = request.get(url, timeout=10)
proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W]<td>(\d+)</td>', r.text)
for proxy in proxies:
yield ':'.join(proxy)
# @staticmethod
# def freeProxy10():
# """
# 墙外网站 cn-proxy
# :return:
# """
# urls = ['http://cn-proxy.com/', 'http://cn-proxy.com/archives/218']
# request = WebRequest()
# for url in urls:
# r = request.get(url, timeout=10)
# proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W]<td>(\d+)</td>', r.text)
# for proxy in proxies:
# yield ':'.join(proxy)

# @staticmethod
# def freeProxy11():
# """
# https://proxy-list.org/english/index.php
# :return:
# """
# urls = ['https://proxy-list.org/english/index.php?p=%s' % n for n in range(1, 10)]
# request = WebRequest()
# import base64
# for url in urls:
# r = request.get(url, timeout=10)
# proxies = re.findall(r"Proxy\('(.*?)'\)", r.text)
# for proxy in proxies:
# yield base64.b64decode(proxy).decode()

# @staticmethod
# def freeProxy12():
# urls = ['https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1']
# request = WebRequest()
# for url in urls:
# r = request.get(url, timeout=10)
# proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>', r.text)
# for proxy in proxies:
# yield ':'.join(proxy)

@staticmethod
def freeProxyWallSecond():
"""
https://proxy-list.org/english/index.php
:return:
"""
urls = ['https://proxy-list.org/english/index.php?p=%s' % n for n in range(1, 10)]
def freeProxy13(max_page=5):
base_url = 'http://www.qydaili.com/free/?action=china&page='
request = WebRequest()
import base64
for url in urls:
for page in range(1, max_page + 1):
url = base_url + str(page)
r = request.get(url, timeout=10)
proxies = re.findall(r"Proxy\('(.*?)'\)", r.text)
proxies = re.findall(
r'<td.*?>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td.*?>(\d+)</td>',
r.text)
for proxy in proxies:
yield base64.b64decode(proxy).decode()
yield ':'.join(proxy)

@staticmethod
def freeProxyWallThird():
urls = ['https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1']
def freeProxy14(max_page=5):
base_url = 'http://www.89ip.cn/index_{}.html'
request = WebRequest()
for url in urls:
for page in range(1, max_page + 1):
url = base_url.format(page)
r = request.get(url, timeout=10)
proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>', r.text)
proxies = re.findall(
r'<td.*?>[\s\S]*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\s\S]*?</td>[\s\S]*?<td.*?>[\s\S]*?(\d+)[\s\S]*?</td>',
r.text)
for proxy in proxies:
yield ':'.join(proxy)


if __name__ == '__main__':
from CheckProxy import CheckProxy

# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyFirst)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxySecond)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyThird)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyFourth)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyFifth)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxySixth)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxySeventh)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyEight)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyNinth)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyTen)
CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyEleven)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyTwelve)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy01)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy02)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy03)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy04)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy05)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy06)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy07)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy08)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy09)

# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy13)
CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy14)

# CheckProxy.checkAllGetProxyFunc()

0 comments on commit ed32527

Please sign in to comment.