commit

prayjourney · Jul 1, 2017 · 14b7ea3 · 14b7ea3
1 parent 828fd42
commit 14b7ea3
Show file tree

Hide file tree

Showing 9 changed files with 116 additions and 17 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/Python各种模块使用/img/1.jpg b/Python各种模块使用/img/1.jpg
diff --git a/Python各种模块使用/img/2.jpg b/Python各种模块使用/img/2.jpg
diff --git a/Python各种模块使用/img/3.jpg b/Python各种模块使用/img/3.jpg
diff --git a/Python各种模块使用/异步IO模块学习.py b/Python各种模块使用/异步IO模块学习.py
@@ -0,0 +1,35 @@
+from lxml import etree
+import aiohttp, asyncio
+import time
+
+list_url = ["https://www.douban.com/doulist/41691053/?start={}&sort=seq&sub_type=4".format(number) for number in
+            range(0, 125, 25)]
+
+
+async def fetch(url):
+    async with aiohttp.ClientSession() as session:
+        async with session.get(url) as html:
+            response = await html.text(encoding="utf-8")
+            return response
+
+
+async def parser(url):
+    response = await fetch(url)
+    dom = etree.HTML(response)
+    selector = dom.xpath('//div[starts-with(@id,"item")]')
+    for item in selector:
+        print(item.xpath('div/div[2]/div[@class="title"]/a/text()')[0].strip(
+            "\n").strip())  # div//div表示div后面的class="title"的div不管它在此div下什么位置
+
+
+# 给一个函数添加了async关键字，就会把它变成一个异步函数
+# 每个线程有一个事件循环，主线程调用asyncio.get_event_loop时会创建事件循环
+# 把异步的任务丢给这个循环的run_until_complete方法，事件循环会安排协同程序的执行
+# async关键字将一个函数声明为协程函数，函数执行时返回一个协程对象。
+# await关键字将暂停协程函数的执行，等待异步IO返回结果。
+
+# start = time.time()
+loop = asyncio.get_event_loop()
+tasks = [parser(url) for url in list_url]
+loop.run_until_complete(asyncio.gather(*tasks))
+# print(time.time() - start)
diff --git a/Python各种模块使用/技巧-csv的使用.py b/Python各种模块使用/技巧-csv的使用.py
@@ -0,0 +1,16 @@
+import csv
+
+# 不用csv操作csv文件，输出内容
+# for line in open("file/sample.csv"):
+#     title, year, director = line.split(",")
+#     print(title,year, title)
+
+
+# 使用csv模块操作 csv文件，输出内容
+with open('file/sample.csv', 'a') as file:
+    # reader = csv.reader(file)
+    # for title, year, director in reader:
+    #     print(title, year, director)
+
+    writer = csv.writer(file)
+    writer.writerow(['title', 'summary', 'year'])
diff --git a/Python各种模块使用/技巧-selenium的简单使用.py b/Python各种模块使用/技巧-selenium的简单使用.py
@@ -0,0 +1,24 @@
+from selenium import webdriver
+from bs4 import BeautifulSoup
+import time
+
+driver = webdriver.PhantomJS(executable_path=r'D:\phantomjs-2.1.1-windows\bin\phantomjs')  # 构建无头浏览器，用来解析 Js 加载内容
+# driver = webdriver.Firefox()
+driver.get('https://www.shanbay.com/read/news/')
+
+time.sleep(5)  # 显式延时5秒，等待页面完全加载
+soup = BeautifulSoup(driver.page_source, 'lxml')
+# print(driver.page_source)
+tags = soup.find_all('a', attrs={'class': 'linkContainer'})
+# for i in tags:
+#     print(i['href'])
+# driver.find_element_by_id('kw').send_keys(keyword)
+# driver.find_element_by_id('su').click()
+# for i in range(1,81):
+#     driver.find_element_by_class_name('icon-refresh').send_keys(Keys.DOWN)
+'''这将给你屏幕截图在那一刻图像将被保存在你的脚本的工作'''
+try:
+    driver.get('http://whatsmyuseragent.com/')
+
+except Exception as e:
+    driver.save_screenshot('screenshot.png')
diff --git a/Python各种模块使用/技巧-使用代理请求网页.py b/Python各种模块使用/技巧-使用代理请求网页.py
@@ -0,0 +1,24 @@
+import requests
+from bs4 import BeautifulSoup
+
+# 测试代理的示例1
+# proxies = {
+#     'http': '115.127.77.10:80'
+# }
+# r = requests.get("http://icanhazip.com/", proxies=proxies)  # http://httpbin.org/ip也可以
+# print(r.text)
+# r2 = requests.get('http://httpbin.org/get?show_env=1', proxies=proxies)
+# print(r2.text)
+
+# 访问 http://httpbin.org/get?show_env=1 ，得到访问头的详细信息，判断代理的匿名程度。
+# 代理池  http://7xrnwq.com1.z0.glb.clouddn.com/proxy_list.txt
+# 代理池 http://api.xicidaili.com/free2016.txt
+
+# request = requests.get('http://7xrnwq.com1.z0.glb.clouddn.com/proxy_list.txt')
+# print(request.text)
+
+# 测试代理的示例2
+ss = requests.session()
+ss.proxies = {'http': 'http://123.206.6.17:3128', 'https': 'http://123.206.6.17:3128'}
+print(ss.get('http://www.qq.com'))
+print(ss.get('https://www.github.com'))
diff --git a/Python各种模块使用/技巧-利用requests模块下载图片保存文件.py b/Python各种模块使用/技巧-利用requests模块下载图片保存文件.py
@@ -0,0 +1,6 @@
+import requests
+
+url = 'http://upload-images.jianshu.io/upload_images/5831032-3e4d3f9ad5a61b78.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1080/q/50'
+r = requests.get(url)
+with open('chun.jpg', 'wb') as fo:
+    fo.write(r.content)