diff --git "a/Python\347\210\254\345\217\226\346\226\227\351\261\274\346\210\277\351\227\264\344\277\241\346\201\257\345\222\214\346\225\260\346\215\256\345\210\206\346\236\220/\345\210\251\347\224\250\346\226\227\351\261\274API\347\210\254\345\217\226\346\226\227\351\261\274\345\205\250\351\203\250\346\210\277\351\227\264\344\277\241\346\201\257\344\277\235\345\255\230\345\210\260Mongodb.py" "b/Python\347\210\254\345\217\226\346\226\227\351\261\274\346\210\277\351\227\264\344\277\241\346\201\257\345\222\214\346\225\260\346\215\256\345\210\206\346\236\220/\345\210\251\347\224\250\346\226\227\351\261\274API\347\210\254\345\217\226\346\226\227\351\261\274\345\205\250\351\203\250\346\210\277\351\227\264\344\277\241\346\201\257\344\277\235\345\255\230\345\210\260Mongodb.py"
new file mode 100644
index 0000000..79aae0a
--- /dev/null
+++ "b/Python\347\210\254\345\217\226\346\226\227\351\261\274\346\210\277\351\227\264\344\277\241\346\201\257\345\222\214\346\225\260\346\215\256\345\210\206\346\236\220/\345\210\251\347\224\250\346\226\227\351\261\274API\347\210\254\345\217\226\346\226\227\351\261\274\345\205\250\351\203\250\346\210\277\351\227\264\344\277\241\346\201\257\344\277\235\345\255\230\345\210\260Mongodb.py"
@@ -0,0 +1,41 @@
+import json
+
+import requests
+from bs4 import BeautifulSoup
+from pymongo import MongoClient
+
+client = MongoClient('localhost')
+db = client["DouyuTV"]
+col = db["Roominfo"]
+host = 'http://api.douyutv.com/api/v1/live/'
+all_game = 'http://open.douyucdn.cn/api/RoomApi/game'
+sort = []
+
+
+def parser(url):
+    html = requests.get(url).text
+    soup = BeautifulSoup(html, 'lxml')
+    jn = json.loads(soup.text)
+    return jn
+
+
+def get_room_sort(url):
+    jn = parser(url)
+    data = jn['data']
+    for item in data:
+        sort.append(host + item['short_name'])
+
+
+def get_room_info():
+    for item in sort:
+        jn = parser(item)
+        data = jn['data']
+        try:
+            col.insert(data)
+        except Exception as e:
+            pass
+
+
+if __name__ == '__main__':
+    get_room_sort(all_game)
+    get_room_info()
diff --git "a/Python\347\210\254\345\217\226\346\226\227\351\261\274\346\210\277\351\227\264\344\277\241\346\201\257\345\222\214\346\225\260\346\215\256\345\210\206\346\236\220/\345\210\251\347\224\250\347\275\221\345\235\200\346\236\204\351\200\240\347\210\254\345\217\226\346\226\227\351\261\274\345\205\250\351\203\250\346\210\277\351\227\264\344\277\241\346\201\257\345\210\260Mongodb.py" "b/Python\347\210\254\345\217\226\346\226\227\351\261\274\346\210\277\351\227\264\344\277\241\346\201\257\345\222\214\346\225\260\346\215\256\345\210\206\346\236\220/\345\210\251\347\224\250\347\275\221\345\235\200\346\236\204\351\200\240\347\210\254\345\217\226\346\226\227\351\261\274\345\205\250\351\203\250\346\210\277\351\227\264\344\277\241\346\201\257\345\210\260Mongodb.py"
new file mode 100644
index 0000000..360c08a
--- /dev/null
+++ "b/Python\347\210\254\345\217\226\346\226\227\351\261\274\346\210\277\351\227\264\344\277\241\346\201\257\345\222\214\346\225\260\346\215\256\345\210\206\346\236\220/\345\210\251\347\224\250\347\275\221\345\235\200\346\236\204\351\200\240\347\210\254\345\217\226\346\226\227\351\261\274\345\205\250\351\203\250\346\210\277\351\227\264\344\277\241\346\201\257\345\210\260Mongodb.py"
@@ -0,0 +1,73 @@
+import re
+from datetime import datetime
+import requests
+from bs4 import BeautifulSoup
+from pymongo import MongoClient
+
+HOST = "http://www.douyu.com"
+Directory_url = "http://www.douyu.com/directory?isAjax=1"
+Qurystr = "/?page=1&isAjax=1"
+
+client = MongoClient('localhost')
+db = client["Douyu2"]
+col = db["Roominfo"]
+
+
+def get_roominfo(data):
+    if data:
+        firstpage = BeautifulSoup(data, 'lxml')
+        roomlist = firstpage.select('li')
+        print(len(roomlist))
+        if roomlist:
+            for room in roomlist:
+                try:
+                    roomid = room["data-rid"]
+                    roomtitle = room.a["title"]
+                    roomtitle = roomtitle.encode('utf-8')
+                    roomowner = room.select("p > span")
+                    roomtag = room.select("div > span")
+                    roomimg = room.a
+                    roomtag = roomtag[0].string
+                    date = datetime.now()
+                    if len(roomowner) == 2:
+                        zbname = roomowner[0].string
+                        audience = roomowner[1].get_text()
+                        audience = audience.encode('utf-8').decode('utf-8')
+                        image = roomimg.span.img["data-original"]
+                        word = u"万"
+                        if word in audience:
+                            r = re.compile(r'(\d+)(\.?)(\d*)')
+                            data = r.match(audience).group(0)
+                            audience = int(float(data) * 10000)
+                        else:
+                            audience = int(audience)
+                        roominfo = {
+                            "roomid": int(roomid),
+                            "roomtitle": roomtitle,
+                            "anchor": zbname,
+                            "audience": audience,
+                            "tag": roomtag,
+                            "date": date,
+                            "img": image
+                        }
+                        col.insert_one(roominfo)
+                except Exception as e:
+                    print(e)
+
+
+def insert_info():
+    session = requests.session()
+    pagecontent = session.get(Directory_url).text
+    pagesoup = BeautifulSoup(pagecontent, 'lxml')
+    games = pagesoup.select('a')
+    # col.drop()
+    for game in games:
+        links = game["href"]
+        gameurl = HOST + links + Qurystr
+        print(gameurl)
+        gamedata = session.get(gameurl).text
+        get_roominfo(gamedata)
+
+
+if __name__ == '__main__':
+    insert_info()
diff --git "a/Python\347\210\254\345\217\226\346\226\227\351\261\274\346\210\277\351\227\264\344\277\241\346\201\257\345\222\214\346\225\260\346\215\256\345\210\206\346\236\220/\346\226\227\351\261\274\347\233\264\346\222\255\346\210\277\351\227\264\346\225\260\346\215\256\345\210\206\346\236\220.py" "b/Python\347\210\254\345\217\226\346\226\227\351\261\274\346\210\277\351\227\264\344\277\241\346\201\257\345\222\214\346\225\260\346\215\256\345\210\206\346\236\220/\346\226\227\351\261\274\347\233\264\346\222\255\346\210\277\351\227\264\346\225\260\346\215\256\345\210\206\346\236\220.py"
new file mode 100644
index 0000000..e69de29
diff --git "a/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2531.py" "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2531.py"
new file mode 100644
index 0000000..707784f
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2531.py"
@@ -0,0 +1,23 @@
+import urllib.request
+from bs4 import BeautifulSoup
+import os
+
+# 下载网页
+url = 'http://www.yidianzixun.com/home?page=article&id=0G5zThN8&up=0'
+res = urllib.request.urlopen(url)
+html = res.read().decode('utf-8')
+# 解析网页
+soup = BeautifulSoup(html, 'html.parser')
+result = soup.find_all('img', limit=10)
+links = []
+for content in result:
+    links.append(content.get('src'))
+# 下载并存储图片
+if not os.path.exists('photo'):
+    os.makedirs('photo')
+i = 0
+for link in links:
+    i += 1
+    filename = 'photo\\' + 'photo' + str(i) + '.gif'
+    with open(filename, 'w') as file:
+        urllib.request.urlretrieve(link, filename)
diff --git "a/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2532.py" "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2532.py"
new file mode 100644
index 0000000..a196dae
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2532.py"
@@ -0,0 +1,22 @@
+import urllib.request
+from bs4 import BeautifulSoup
+import os
+
+url = 'http://www.8she.com/31988.html'
+res = urllib.request.urlopen(url)
+html = res.read().decode('utf-8')
+soup = BeautifulSoup(html, 'html.parser')
+result = soup.find_all(class_='aligncenter', limit=15)
+# print(result)
+links = []
+for content in result:
+    links.append(content.get('src'))
+# 下载并存储图片
+if not os.path.exists('E:\\rieuse\爬虫图片\photo2'):
+    os.makedirs('E:\\rieuse\爬虫图片\photo2')
+i = 0
+for link in links:
+    i += 1
+    filename = 'E:\\rieuse\爬虫图片\photo2\\' + 'photo' + str(i) + '.jpg'
+    with open(filename, 'w') as file:
+        urllib.request.urlretrieve(link, filename)
diff --git "a/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2533\347\210\254\347\256\200\344\271\2467\346\227\245\347\203\255\351\227\250.py" "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2533\347\210\254\347\256\200\344\271\2467\346\227\245\347\203\255\351\227\250.py"
new file mode 100644
index 0000000..4d0f58e
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2533\347\210\254\347\256\200\344\271\2467\346\227\245\347\203\255\351\227\250.py"
@@ -0,0 +1,37 @@
+# -*-coding:utf-8-*-
+import csv
+import requests
+from bs4 import BeautifulSoup
+
+base_url = 'http://www.jianshu.com/trending/weekly'
+
+articles = []
+data_list = []
+for i in range(1, 7):
+    url = base_url + '?page={}'.format(i)
+    r = requests.get(url)
+    html = r.text
+    soup = BeautifulSoup(html, 'html.parser')
+    for article in soup.find_all(class_='content'):
+        title = article.find(class_='title').get_text()
+        link = 'http://www.jianshu.com' + article.find(class_='title').get('href')
+        author = article.find(class_='blue-link').get_text()
+        time = article.span['data-shared-at']
+        meta = article.find(class_='meta').find_all(['a', 'span'])
+        metas = []
+        for item in meta:
+            metas.append(item.get_text().strip())
+        read = metas[0]
+        comment = metas[1]
+        like = metas[2]
+        try:
+            money = metas[3]
+        except:
+            money = '0'
+        articles.append([title, author, time, read, comment, like, money, link])
+
+with open('jianshu.csv', 'w') as f:
+    writer = csv.writer(f)
+    writer.writerow(['文章标题', '作者', '时间', '阅读量', '评论', '喜欢', '赞赏数', '文章地址'])
+    for row in articles:
+        writer.writerow(row)
diff --git "a/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2534\344\275\277\347\224\250bs\347\232\204select\351\200\211\345\217\226\345\233\276\347\211\207\344\272\214\350\277\233\345\210\266\344\277\235\345\255\230\345\233\276\347\211\207\346\226\207\344\273\266.py" "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2534\344\275\277\347\224\250bs\347\232\204select\351\200\211\345\217\226\345\233\276\347\211\207\344\272\214\350\277\233\345\210\266\344\277\235\345\255\230\345\233\276\347\211\207\346\226\207\344\273\266.py"
new file mode 100644
index 0000000..52a4dae
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2534\344\275\277\347\224\250bs\347\232\204select\351\200\211\345\217\226\345\233\276\347\211\207\344\272\214\350\277\233\345\210\266\344\277\235\345\255\230\345\233\276\347\211\207\346\226\207\344\273\266.py"
@@ -0,0 +1,32 @@
+import requests
+from bs4 import BeautifulSoup
+import os
+
+'''
+下载图片或者文件也可以使用urlretrieve模块
+from urllib import request
+request.urlretrieve('','1.jpg')
+'''
+# proxies = {
+#     "http": "http://175.155.240.127:808",
+#     "https": "http://114.239.149.110:808",
+# }
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
+    'Connection': 'keep-alive'}
+url = 'http://www.wmpic.me/86253'
+r = requests.get(url, headers=headers)
+soup = BeautifulSoup(r.text, 'html.parser')
+result = soup.select('#content > div.content-c > center > img')
+links = []
+for content in result:
+    links.append(content.get('src'))
+if not os.path.exists('花瓶'):
+    os.makedirs('花瓶')
+i = 0
+for link in links:
+    i += 1
+    filename = '花瓶\\' + '花瓶' + str(i) + '.jpg'
+    ir = requests.get(link)
+    with open(filename, 'wb') as fo:
+        fo.write(ir.content)
diff --git "a/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2535\345\244\232\347\272\277\347\250\213\347\210\254\345\217\226\347\263\227\344\272\213\347\231\276\347\247\221\350\257\204\350\256\272.py" "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2535\345\244\232\347\272\277\347\250\213\347\210\254\345\217\226\347\263\227\344\272\213\347\231\276\347\247\221\350\257\204\350\256\272.py"
new file mode 100644
index 0000000..d18c0cc
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/Python\347\210\254\350\231\2535\345\244\232\347\272\277\347\250\213\347\210\254\345\217\226\347\263\227\344\272\213\347\231\276\347\247\221\350\257\204\350\256\272.py"
@@ -0,0 +1,46 @@
+import urllib.request
+import threading
+import re
+import urllib.error
+
+headers = ("User-Agent",
+           "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0")
+opener = urllib.request.build_opener()
+opener.addheaders = [headers]
+urllib.request.install_opener(opener)
+
+
+class One(threading.Thread):
+    def __init__(self):
+        threading.Thread.__init__(self)
+
+    def run(self):
+        for i in range(1, 36, 2):
+            url = "http://www.qiushibaike.com/8hr/page/" + str(i)
+            pagedata = urllib.request.urlopen(url).read().decode("utf-8", "ignore")
+            pat = '<div class="content">.*?<span>(.*?)</span>.*?</div>'
+            datalist = re.compile(pat, re.S).findall(pagedata)
+            for j in range(0, len(datalist)):
+                print("第" + str(i) + "页第" + str(j) + "个段子的内容是：")
+                print(datalist[j])
+
+
+class Two(threading.Thread):
+    def __init__(self):
+        threading.Thread.__init__(self)
+
+    def run(self):
+        for i in range(0, 36, 2):
+            url = "http://www.qiushibaike.com/8hr/page/" + str(i)
+            pagedata = urllib.request.urlopen(url).read().decode("utf-8", "ignore")
+            pat = '<div class="content">.*?<span>(.*?)</span>.*?</div>'
+            datalist = re.compile(pat, re.S).findall(pagedata)
+            for j in range(0, len(datalist)):
+                print("第" + str(i) + "页第" + str(j) + "个段子的内容是：")
+                print(datalist[j])
+
+
+one = One()
+one.start()
+two = Two()
+two.start()
diff --git "a/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/\347\210\254\345\217\226\350\212\261\347\223\243\345\246\271\345\255\220\347\274\251\347\225\245\345\233\276.py" "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/\347\210\254\345\217\226\350\212\261\347\223\243\345\246\271\345\255\220\347\274\251\347\225\245\345\233\276.py"
new file mode 100644
index 0000000..7b381c7
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\345\260\217\347\232\204demo/\347\210\254\345\217\226\350\212\261\347\223\243\345\246\271\345\255\220\347\274\251\347\225\245\345\233\276.py"
@@ -0,0 +1,58 @@
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium import webdriver
+import requests
+import lxml.html
+import os
+
+SERVICE_ARGS = ['--load-images=false', '--disk-cache=true']
+browser = webdriver.PhantomJS(service_args=SERVICE_ARGS)
+# browser = webdriver.Firefox()
+wait = WebDriverWait(browser, 15)
+browser.set_window_size(1400, 900)
+
+
+def get_url():
+    print('打开主页搜寻链接中...')
+    try:
+        browser.get('http://huaban.com/boards/favorite/beauty/')
+        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#waterfall')))
+        html = browser.page_source
+        doc = lxml.html.fromstring(html)
+        name = doc.xpath('//*[@id="waterfall"]/div/a[1]/div[2]/h3/text()')
+        u = doc.xpath('//*[@id="waterfall"]/div/a[1]/@href')
+        for item, fileName in zip(u, name):
+            url = 'http://huaban.com' + item
+            print('主链接已找到：' + url)
+            if '*' in fileName:
+                fileName = fileName.replace('*', '')
+            dowload(url, fileName)
+    except Exception as e:
+        print(e)
+
+
+def dowload(url, fileName):
+    try:
+        browser.get(url)
+        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#waterfall')))
+        html = browser.page_source
+        doc = lxml.html.fromstring(html)
+        if not os.path.exists('image2\\' + fileName):
+            os.makedirs('image2\\' + fileName)
+        link = doc.xpath('//*[@id="waterfall"]/div/a/img/@src')
+        i = 0
+        for item in link:
+            i += 1
+            ur = 'http:' + item
+            print('正在下载第' + str(i) + '张图片，地址：' + ur)
+            r = requests.get(ur)
+            filename = 'image2\\{}\\'.format(fileName) + str(i) + '.jpg'
+            with open(filename, 'wb') as fo:
+                fo.write(r.content)
+    except Exception:
+        print('本次出错了')
+
+
+if __name__ == '__main__':
+    get_url()
diff --git "a/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\270\200\357\274\232\347\210\254\345\217\226\350\261\206\347\223\243\347\224\265\345\275\261\344\270\255\351\200\237\345\272\246\344\270\216\346\277\200\346\203\2058\346\274\224\345\221\230\345\233\276\347\211\207.py" "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\270\200\357\274\232\347\210\254\345\217\226\350\261\206\347\223\243\347\224\265\345\275\261\344\270\255\351\200\237\345\272\246\344\270\216\346\277\200\346\203\2058\346\274\224\345\221\230\345\233\276\347\211\207.py"
new file mode 100644
index 0000000..12241ea
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\270\200\357\274\232\347\210\254\345\217\226\350\261\206\347\223\243\347\224\265\345\275\261\344\270\255\351\200\237\345\272\246\344\270\216\346\277\200\346\203\2058\346\274\224\345\221\230\345\233\276\347\211\207.py"
@@ -0,0 +1,26 @@
+import urllib.request
+import os
+import re
+
+
+def douban(url):
+    r = urllib.request.urlopen(url)
+    html = r.read().decode('utf-8')
+    result = re.findall(r'https://img\d.doubanio.com/img/celebrity/medium/.*.jpg', html)
+    result2 = re.findall(r'(?<=title=").\S+', html)
+    result2.pop()
+    result3 = sorted(set(result2), key=result2.index)
+    result3.pop(-3)
+    if not os.path.exists('douban'):
+        os.makedirs('douban')
+    i = 0
+    for link in result:
+        filename = 'douban\\' + str(result3[i]) + '.jpg'
+        i += 1
+        with open(filename, 'w') as file:
+            urllib.request.urlretrieve(link, filename)
+
+
+url = 'https://movie.douban.com/subject/26260853/celebrities'
+if __name__ == '__main__':
+    douban(url)
diff --git "a/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\270\203\357\274\232\346\211\271\351\207\217\346\212\223\345\217\226\350\212\261\347\223\243\347\275\221\351\253\230\346\270\205\347\276\216\345\233\276\345\271\266\344\277\235\345\255\230.py" "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\270\203\357\274\232\346\211\271\351\207\217\346\212\223\345\217\226\350\212\261\347\223\243\347\275\221\351\253\230\346\270\205\347\276\216\345\233\276\345\271\266\344\277\235\345\255\230.py"
new file mode 100644
index 0000000..d505575
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\270\203\357\274\232\346\211\271\351\207\217\346\212\223\345\217\226\350\212\261\347\223\243\347\275\221\351\253\230\346\270\205\347\276\216\345\233\276\345\271\266\344\277\235\345\255\230.py"
@@ -0,0 +1,74 @@
+__author__ = '布咯咯_rieuse'
+
+import os
+import lxml.html
+import requests
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+
+SERVICE_ARGS = ['--load-images=false', '--disk-cache=true']
+browser = webdriver.PhantomJS(service_args=SERVICE_ARGS)
+# browser = webdriver.Firefox()
+wait = WebDriverWait(browser, 5)
+browser.set_window_size(1400, 900)
+
+
+def parser(url, param):
+    # 解析模块
+    browser.get(url)
+    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, param)))
+    html = browser.page_source
+    doc = lxml.html.fromstring(html)
+    return doc
+
+
+def get_main_url():
+    print('打开主页搜寻链接中...')
+    try:
+        doc = parser('http://huaban.com/boards/favorite/beauty/', '#waterfall')
+        name = doc.xpath('//*[@id="waterfall"]/div/a[1]/div[2]/h3/text()')
+        u = doc.xpath('//*[@id="waterfall"]/div/a[1]/@href')
+        for item, fileName in zip(u, name):
+            main_url = 'http://huaban.com' + item
+            print('主链接已找到' + main_url)
+            if '*' in fileName:
+                fileName = fileName.replace('*', '')
+            download(main_url, fileName)
+    except Exception as e:
+        print(e)
+
+
+def download(main_url, fileName):
+    print('-------准备下载中-------')
+    try:
+        doc = parser(main_url, '#waterfall')
+        if not os.path.exists('image\\' + fileName):
+            print('创建文件夹...')
+            os.makedirs('image\\' + fileName)
+        link = doc.xpath('//*[@id="waterfall"]/div/a/@href')
+        # print(link)
+        i = 0
+        for item in link:
+            i += 1
+            minor_url = 'http://huaban.com' + item
+            doc = parser(minor_url, '#pin_view_page')
+            img_url = doc.xpath('//*[@id="baidu_image_holder"]/a/img/@src')
+            img_url2 = doc.xpath('//*[@id="baidu_image_holder"]/img/@src')
+            img_url += img_url2
+            try:
+                url = 'http:' + str(img_url[0])
+                print('正在下载第' + str(i) + '张图片，地址：' + url)
+                r = requests.get(url)
+                filename = 'image\\{}\\'.format(fileName) + str(i) + '.jpg'
+                with open(filename, 'wb') as fo:
+                    fo.write(r.content)
+            except Exception:
+                print('出错了！')
+    except Exception:
+        print('出错啦!')
+
+
+if __name__ == '__main__':
+    get_main_url()
diff --git "a/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\270\211\357\274\232\347\210\254\345\217\226v2ex\346\225\260\346\215\256\347\224\250csv\344\277\235\345\255\230.py" "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\270\211\357\274\232\347\210\254\345\217\226v2ex\346\225\260\346\215\256\347\224\250csv\344\277\235\345\255\230.py"
new file mode 100644
index 0000000..675afcf
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\270\211\357\274\232\347\210\254\345\217\226v2ex\346\225\260\346\215\256\347\224\250csv\344\277\235\345\255\230.py"
@@ -0,0 +1,20 @@
+import csv, requests, re
+from bs4 import BeautifulSoup
+
+url = 'https://www.v2ex.com/?tab=all'
+html = requests.get(url).text
+soup = BeautifulSoup(html, 'html.parser')
+articles = []
+for article in soup.find_all(class_='cell item'):
+    title = article.find(class_='item_title').get_text()
+    category = article.find(class_='node').get_text()
+    author = re.findall(r'(?<=<a href="/member/).+(?="><img)', str(article))[0]
+    u = article.select('.item_title > a')
+    link = 'https://www.v2ex.com' + re.findall(r'(?<=href=").+(?=")', str(u))[0]
+    articles.append([title, category, author, link])
+
+with open(r'document\v2ex.csv', 'w') as f:
+    writer = csv.writer(f)
+    writer.writerow(['文章标题', '分类', '作者', '文章地址'])
+    for row in articles:
+        writer.writerow(row)
diff --git "a/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\272\214\357\274\232\344\275\277\347\224\250lxml\350\247\243\346\236\220HTML\357\274\214\350\276\223\345\207\272\345\257\271\345\272\224\345\200\274.py" "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\272\214\357\274\232\344\275\277\347\224\250lxml\350\247\243\346\236\220HTML\357\274\214\350\276\223\345\207\272\345\257\271\345\272\224\345\200\274.py"
new file mode 100644
index 0000000..2ed2425
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\272\214\357\274\232\344\275\277\347\224\250lxml\350\247\243\346\236\220HTML\357\274\214\350\276\223\345\207\272\345\257\271\345\272\224\345\200\274.py"
@@ -0,0 +1,16 @@
+import requests
+import lxml.html
+
+url = 'http://news.ifeng.com/listpage/11502/0/1/rtlist.shtml'
+html = requests.get(url).text
+doc = lxml.html.fromstring(html)
+titles = doc.xpath('//div[@class="newsList"]/ul/li/a/text()')
+href = doc.xpath('//div[@class="newsList"]/ul/li/a/@href')
+i = 0
+for content in titles:
+    results = {
+        '标题': titles[i],
+        '链接': href[i]
+    }
+    i += 1
+    print(results)
diff --git "a/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\272\224\357\274\232\344\275\277\347\224\250Selenium\347\210\254\345\217\226\344\270\200\347\202\271\350\265\204\350\256\257\345\212\250\346\200\201\346\225\260\346\215\256.py" "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\272\224\357\274\232\344\275\277\347\224\250Selenium\347\210\254\345\217\226\344\270\200\347\202\271\350\265\204\350\256\257\345\212\250\346\200\201\346\225\260\346\215\256.py"
new file mode 100644
index 0000000..7c1855a
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\344\272\224\357\274\232\344\275\277\347\224\250Selenium\347\210\254\345\217\226\344\270\200\347\202\271\350\265\204\350\256\257\345\212\250\346\200\201\346\225\260\346\215\256.py"
@@ -0,0 +1,26 @@
+from selenium.webdriver.common.keys import Keys
+from selenium import webdriver
+from bs4 import BeautifulSoup
+import csv
+
+driver = webdriver.Firefox()
+driver.implicitly_wait(3)
+first_url = 'http://www.yidianzixun.com/channel/c6'
+driver.get(first_url)
+driver.find_element_by_class_name('icon-refresh').click()
+for i in range(1, 90):
+    driver.find_element_by_class_name('icon-refresh').send_keys(Keys.DOWN)
+soup = BeautifulSoup(driver.page_source, 'lxml')
+articles = []
+for article in soup.find_all(class_='item doc style-small-image style-content-middle'):
+    title = article.find(class_='doc-title').get_text()
+    source = article.find(class_='source').get_text()
+    comment = article.find(class_='comment-count').get_text()
+    link = 'http://www.yidianzixun.com' + article.get('href')
+    articles.append([title, source, comment, link])
+driver.quit()
+with open(r'document\yidian.csv', 'w') as f:
+    writer = csv.writer(f)
+    writer.writerow(['文章标题', '作者', '评论数', '文章地址'])
+    for row in articles:
+        writer.writerow(row)
diff --git "a/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\345\205\255\357\274\232Selenium+xpath+bs4\347\210\254\345\217\226\344\272\232\351\251\254\351\200\212\346\225\260\346\215\256\344\277\235\345\255\230\345\210\260mongodb.py" "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\345\205\255\357\274\232Selenium+xpath+bs4\347\210\254\345\217\226\344\272\232\351\251\254\351\200\212\346\225\260\346\215\256\344\277\235\345\255\230\345\210\260mongodb.py"
new file mode 100644
index 0000000..c946b6f
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\345\205\255\357\274\232Selenium+xpath+bs4\347\210\254\345\217\226\344\272\232\351\251\254\351\200\212\346\225\260\346\215\256\344\277\235\345\255\230\345\210\260mongodb.py"
@@ -0,0 +1,101 @@
+from selenium.common.exceptions import TimeoutException
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium import webdriver
+from bs4 import BeautifulSoup
+import lxml.html
+import pymongo
+import re
+
+MONGO_URL = 'localhost'
+MONGO_DB = 'amazon'
+MONGO_TABLE = 'amazon-python'
+SERVICE_ARGS = ['--load-images=false', '--disk-cache=true']
+KEYWORD = 'python'
+client = pymongo.MongoClient(MONGO_URL)
+db = client[MONGO_DB]
+
+browser = webdriver.PhantomJS(service_args=SERVICE_ARGS)
+# browser = webdriver.Firefox()
+wait = WebDriverWait(browser, 10)
+browser.set_window_size(1400, 900)
+
+
+def search():
+    print('正在搜索')
+    try:
+        browser.get('https://www.amazon.cn/')
+        input = wait.until(
+            EC.presence_of_element_located((By.CSS_SELECTOR, '#twotabsearchtextbox'))
+        )
+        submit = wait.until(
+            EC.element_to_be_clickable((By.CSS_SELECTOR, '#nav-search > form > div.nav-right > div > input')))
+        input.send_keys(KEYWORD)
+        submit.click()
+        total = wait.until(
+            EC.presence_of_element_located((By.CSS_SELECTOR, '#pagn > span.pagnDisabled')))
+        get_products()
+        print('一共' + total.text + '页')
+        return total.text
+    except TimeoutException:
+        return search()
+
+
+def next_page(number):
+    print('正在翻页', number)
+    try:
+        wait.until(EC.text_to_be_present_in_element(
+            (By.CSS_SELECTOR, '#pagnNextString'), '下一页'))
+        submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#pagnNextString')))
+        submit.click()
+        wait.until(EC.text_to_be_present_in_element(
+            (By.CSS_SELECTOR, '.pagnCur'), str(number)))
+        get_products()
+    except TimeoutException:
+        next_page(number)
+
+
+def get_products():
+    try:
+        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#s-results-list-atf')))
+        html = browser.page_source
+        soup = BeautifulSoup(html, 'lxml')
+        doc = lxml.html.fromstring(html)
+        date = doc.xpath('//*[@class="s-result-item  celwidget "]/div/div[2]/div[1]/span[2]/text()')
+        content = soup.find_all(attrs={"id": re.compile(r'result_\d+')})
+        for item, time in zip(content, date):
+            product = {
+                'title': item.find(class_='s-access-title').get_text(),
+                'image': item.find(class_='s-access-image cfMarker').get('src'),
+                'price': item.find(class_='a-size-base a-color-price s-price a-text-bold').get_text(),
+                'date': time
+            }
+            save_to_mongo(product)
+            print(product)
+    except Exception as e:
+        print(e)
+
+
+def save_to_mongo(result):
+    try:
+        if db[MONGO_TABLE].insert(result):
+            print('存储到mongodb成功', result)
+    except Exception:
+        print('存储到mongodb失败', result)
+
+
+def main():
+    try:
+        total = search()
+        total = int(re.compile('(\d+)').search(total).group(1))
+        for i in range(2, total + 1):
+            next_page(i)
+    except Exception as e:
+        print('出错啦', e)
+    finally:
+        browser.close()
+
+
+if __name__ == '__main__':
+    main()
diff --git "a/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\345\233\233\357\274\232\350\216\267\345\217\226\351\273\221\345\244\247\351\252\214\350\257\201\347\240\201\345\271\266\347\231\273\345\275\225.py" "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\345\233\233\357\274\232\350\216\267\345\217\226\351\273\221\345\244\247\351\252\214\350\257\201\347\240\201\345\271\266\347\231\273\345\275\225.py"
new file mode 100644
index 0000000..9ad7735
--- /dev/null
+++ "b/Python\347\210\254\350\231\253\346\227\245\350\256\260\347\263\273\345\210\227/Python\347\210\254\350\231\253\346\227\245\350\256\260\345\233\233\357\274\232\350\216\267\345\217\226\351\273\221\345\244\247\351\252\214\350\257\201\347\240\201\345\271\266\347\231\273\345\275\225.py"
@@ -0,0 +1,27 @@
+import requests
+from PIL import Image
+from bs4 import BeautifulSoup
+
+url1 = 'http://my.hlju.edu.cn/captchaGenerate.portal?'
+url2 = 'http://my.hlju.edu.cn/userPasswordValidate.portal'
+url3 = 'http://my.hlju.edu.cn/index.portal'
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
+}
+s = requests.session()
+response = s.get(url1, headers=headers)
+html = response.text
+soup = BeautifulSoup(html, 'html.parser')
+with open('img\code.jpg', 'wb') as f:
+    f.write(response.content)
+img = Image.open('img\code.jpg')
+img.show()
+data = {}
+data['Login.Token1'] = '20154433'
+data['Login.Token2'] = '134868'
+data['captcha'] = input('输入验证码：')
+data['goto'] = 'http://my.hlju.edu.cn/loginSuccess.portal'
+data['gotoOnFail'] = 'http://my.hlju.edu.cn/loginFailure.portal'
+response2 = s.post(url=url2, data=data, headers=headers)
+response3 = s.get(url3, headers=headers)
+print(response3.text)