Skip to content

Commit

Permalink
commit
Browse files Browse the repository at this point in the history
  • Loading branch information
rieuse committed Jul 1, 2017
1 parent 828fd42 commit 14b7ea3
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 17 deletions.
28 changes: 11 additions & 17 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added Python各种模块使用/img/1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Python各种模块使用/img/2.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Python各种模块使用/img/3.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
35 changes: 35 additions & 0 deletions Python各种模块使用/异步IO模块学习.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from lxml import etree
import aiohttp, asyncio
import time

list_url = ["https://www.douban.com/doulist/41691053/?start={}&sort=seq&sub_type=4".format(number) for number in
range(0, 125, 25)]


async def fetch(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as html:
response = await html.text(encoding="utf-8")
return response


async def parser(url):
response = await fetch(url)
dom = etree.HTML(response)
selector = dom.xpath('//div[starts-with(@id,"item")]')
for item in selector:
print(item.xpath('div/div[2]/div[@class="title"]/a/text()')[0].strip(
"\n").strip()) # div//div表示div后面的class="title"的div不管它在此div下什么位置


# 给一个函数添加了async关键字,就会把它变成一个异步函数
# 每个线程有一个事件循环,主线程调用asyncio.get_event_loop时会创建事件循环
# 把异步的任务丢给这个循环的run_until_complete方法,事件循环会安排协同程序的执行
# async关键字将一个函数声明为协程函数,函数执行时返回一个协程对象。
# await关键字将暂停协程函数的执行,等待异步IO返回结果。

# start = time.time()
loop = asyncio.get_event_loop()
tasks = [parser(url) for url in list_url]
loop.run_until_complete(asyncio.gather(*tasks))
# print(time.time() - start)
16 changes: 16 additions & 0 deletions Python各种模块使用/技巧-csv的使用.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import csv

# 不用csv操作csv文件,输出内容
# for line in open("file/sample.csv"):
# title, year, director = line.split(",")
# print(title,year, title)


# 使用csv模块操作 csv文件,输出内容
with open('file/sample.csv', 'a') as file:
# reader = csv.reader(file)
# for title, year, director in reader:
# print(title, year, director)

writer = csv.writer(file)
writer.writerow(['title', 'summary', 'year'])
24 changes: 24 additions & 0 deletions Python各种模块使用/技巧-selenium的简单使用.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from selenium import webdriver
from bs4 import BeautifulSoup
import time

driver = webdriver.PhantomJS(executable_path=r'D:\phantomjs-2.1.1-windows\bin\phantomjs') # 构建无头浏览器,用来解析 Js 加载内容
# driver = webdriver.Firefox()
driver.get('https://www.shanbay.com/read/news/')

time.sleep(5) # 显式延时5秒,等待页面完全加载
soup = BeautifulSoup(driver.page_source, 'lxml')
# print(driver.page_source)
tags = soup.find_all('a', attrs={'class': 'linkContainer'})
# for i in tags:
# print(i['href'])
# driver.find_element_by_id('kw').send_keys(keyword)
# driver.find_element_by_id('su').click()
# for i in range(1,81):
# driver.find_element_by_class_name('icon-refresh').send_keys(Keys.DOWN)
'''这将给你屏幕截图在那一刻图像将被保存在你的脚本的工作'''
try:
driver.get('http://whatsmyuseragent.com/')

except Exception as e:
driver.save_screenshot('screenshot.png')
24 changes: 24 additions & 0 deletions Python各种模块使用/技巧-使用代理请求网页.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import requests
from bs4 import BeautifulSoup

# 测试代理的示例1
# proxies = {
# 'http': '115.127.77.10:80'
# }
# r = requests.get("http://icanhazip.com/", proxies=proxies) # http://httpbin.org/ip也可以
# print(r.text)
# r2 = requests.get('http://httpbin.org/get?show_env=1', proxies=proxies)
# print(r2.text)

# 访问 http://httpbin.org/get?show_env=1 ,得到访问头的详细信息,判断代理的匿名程度。
# 代理池 http://7xrnwq.com1.z0.glb.clouddn.com/proxy_list.txt
# 代理池 http://api.xicidaili.com/free2016.txt

# request = requests.get('http://7xrnwq.com1.z0.glb.clouddn.com/proxy_list.txt')
# print(request.text)

# 测试代理的示例2
ss = requests.session()
ss.proxies = {'http': 'http://123.206.6.17:3128', 'https': 'http://123.206.6.17:3128'}
print(ss.get('http://www.qq.com'))
print(ss.get('https://www.github.com'))
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import requests

url = 'http://upload-images.jianshu.io/upload_images/5831032-3e4d3f9ad5a61b78.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1080/q/50'
r = requests.get(url)
with open('chun.jpg', 'wb') as fo:
fo.write(r.content)

0 comments on commit 14b7ea3

Please sign in to comment.