forked from rieuse/learnPython
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
116 additions
and
17 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from lxml import etree | ||
import aiohttp, asyncio | ||
import time | ||
|
||
list_url = ["https://www.douban.com/doulist/41691053/?start={}&sort=seq&sub_type=4".format(number) for number in | ||
range(0, 125, 25)] | ||
|
||
|
||
async def fetch(url): | ||
async with aiohttp.ClientSession() as session: | ||
async with session.get(url) as html: | ||
response = await html.text(encoding="utf-8") | ||
return response | ||
|
||
|
||
async def parser(url): | ||
response = await fetch(url) | ||
dom = etree.HTML(response) | ||
selector = dom.xpath('//div[starts-with(@id,"item")]') | ||
for item in selector: | ||
print(item.xpath('div/div[2]/div[@class="title"]/a/text()')[0].strip( | ||
"\n").strip()) # div//div表示div后面的class="title"的div不管它在此div下什么位置 | ||
|
||
|
||
# 给一个函数添加了async关键字,就会把它变成一个异步函数 | ||
# 每个线程有一个事件循环,主线程调用asyncio.get_event_loop时会创建事件循环 | ||
# 把异步的任务丢给这个循环的run_until_complete方法,事件循环会安排协同程序的执行 | ||
# async关键字将一个函数声明为协程函数,函数执行时返回一个协程对象。 | ||
# await关键字将暂停协程函数的执行,等待异步IO返回结果。 | ||
|
||
# start = time.time() | ||
loop = asyncio.get_event_loop() | ||
tasks = [parser(url) for url in list_url] | ||
loop.run_until_complete(asyncio.gather(*tasks)) | ||
# print(time.time() - start) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import csv | ||
|
||
# 不用csv操作csv文件,输出内容 | ||
# for line in open("file/sample.csv"): | ||
# title, year, director = line.split(",") | ||
# print(title,year, title) | ||
|
||
|
||
# 使用csv模块操作 csv文件,输出内容 | ||
with open('file/sample.csv', 'a') as file: | ||
# reader = csv.reader(file) | ||
# for title, year, director in reader: | ||
# print(title, year, director) | ||
|
||
writer = csv.writer(file) | ||
writer.writerow(['title', 'summary', 'year']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from selenium import webdriver | ||
from bs4 import BeautifulSoup | ||
import time | ||
|
||
driver = webdriver.PhantomJS(executable_path=r'D:\phantomjs-2.1.1-windows\bin\phantomjs') # 构建无头浏览器,用来解析 Js 加载内容 | ||
# driver = webdriver.Firefox() | ||
driver.get('https://www.shanbay.com/read/news/') | ||
|
||
time.sleep(5) # 显式延时5秒,等待页面完全加载 | ||
soup = BeautifulSoup(driver.page_source, 'lxml') | ||
# print(driver.page_source) | ||
tags = soup.find_all('a', attrs={'class': 'linkContainer'}) | ||
# for i in tags: | ||
# print(i['href']) | ||
# driver.find_element_by_id('kw').send_keys(keyword) | ||
# driver.find_element_by_id('su').click() | ||
# for i in range(1,81): | ||
# driver.find_element_by_class_name('icon-refresh').send_keys(Keys.DOWN) | ||
'''这将给你屏幕截图在那一刻图像将被保存在你的脚本的工作''' | ||
try: | ||
driver.get('http://whatsmyuseragent.com/') | ||
|
||
except Exception as e: | ||
driver.save_screenshot('screenshot.png') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import requests | ||
from bs4 import BeautifulSoup | ||
|
||
# 测试代理的示例1 | ||
# proxies = { | ||
# 'http': '115.127.77.10:80' | ||
# } | ||
# r = requests.get("http://icanhazip.com/", proxies=proxies) # http://httpbin.org/ip也可以 | ||
# print(r.text) | ||
# r2 = requests.get('http://httpbin.org/get?show_env=1', proxies=proxies) | ||
# print(r2.text) | ||
|
||
# 访问 http://httpbin.org/get?show_env=1 ,得到访问头的详细信息,判断代理的匿名程度。 | ||
# 代理池 http://7xrnwq.com1.z0.glb.clouddn.com/proxy_list.txt | ||
# 代理池 http://api.xicidaili.com/free2016.txt | ||
|
||
# request = requests.get('http://7xrnwq.com1.z0.glb.clouddn.com/proxy_list.txt') | ||
# print(request.text) | ||
|
||
# 测试代理的示例2 | ||
ss = requests.session() | ||
ss.proxies = {'http': 'http://123.206.6.17:3128', 'https': 'http://123.206.6.17:3128'} | ||
print(ss.get('http://www.qq.com')) | ||
print(ss.get('https://www.github.com')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import requests | ||
|
||
url = 'http://upload-images.jianshu.io/upload_images/5831032-3e4d3f9ad5a61b78.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1080/q/50' | ||
r = requests.get(url) | ||
with open('chun.jpg', 'wb') as fo: | ||
fo.write(r.content) |