diff --git "a/12306\346\212\242\347\245\250/new_qiangpiao.py" "b/12306\346\212\242\347\245\250/new_qiangpiao.py" new file mode 100644 index 0000000..5fa06d9 --- /dev/null +++ "b/12306\346\212\242\347\245\250/new_qiangpiao.py" @@ -0,0 +1,263 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +通过splinter刷12306火车票 +可以自动填充账号密码,同时,在登录时,也可以修改账号密码 +然后手动识别验证码,并登陆,接下来的事情,交由脚本来做了,静静的等待抢票结果就好(刷票过程中,浏览器不可关闭) +author: cuizy +time: 2018-11-21 +""" + +import re +from splinter.browser import Browser +from time import sleep +import sys +import httplib2 +from urllib import parse +import smtplib +from email.mime.text import MIMEText + + +class BrushTicket(object): + """买票类及实现方法""" + + def __init__(self, passengers, from_time, from_station, to_station, number, seat_type, receiver_mobile, + receiver_email): + """定义实例属性,初始化""" + # 乘客姓名 + self.passengers = passengers + # 起始站和终点站 + self.from_station = from_station + self.to_station = to_station + # 乘车日期 + self.from_time = from_time + # 车次编号 + self.number = number.capitalize() + # 座位类型所在td位置 + if seat_type == '商务座特等座': + seat_type_index = 1 + seat_type_value = 9 + elif seat_type == '一等座': + seat_type_index = 2 + seat_type_value = 'M' + elif seat_type == '二等座': + seat_type_index = 3 + seat_type_value = 0 + elif seat_type == '高级软卧': + seat_type_index = 4 + seat_type_value = 6 + elif seat_type == '软卧': + seat_type_index = 5 + seat_type_value = 4 + elif seat_type == '动卧': + seat_type_index = 6 + seat_type_value = 'F' + elif seat_type == '硬卧': + seat_type_index = 7 + seat_type_value = 3 + elif seat_type == '软座': + seat_type_index = 8 + seat_type_value = 2 + elif seat_type == '硬座': + seat_type_index = 9 + seat_type_value = 1 + elif seat_type == '无座': + seat_type_index = 10 + seat_type_value = 1 + elif seat_type == '其他': + seat_type_index = 11 + seat_type_value = 1 + else: + seat_type_index = 7 + seat_type_value = 3 + self.seat_type_index = seat_type_index + self.seat_type_value = seat_type_value + # 通知信息 + self.receiver_mobile = receiver_mobile + self.receiver_email = receiver_email + # 新版12306官网主要页面网址 + self.login_url = 'https://kyfw.12306.cn/otn/resources/login.html' + self.init_my_url = 'https://kyfw.12306.cn/otn/view/index.html' + self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc' + # 浏览器驱动信息,驱动下载页:https://sites.google.com/a/chromium.org/chromedriver/downloads + self.driver_name = 'chrome' + self.driver = Browser(driver_name=self.driver_name) + + def do_login(self): + """登录功能实现,手动识别验证码进行登录""" + self.driver.visit(self.login_url) + sleep(1) + # 选择登陆方式登陆 + print('请扫码登陆或者账号登陆……') + while True: + if self.driver.url != self.init_my_url: + sleep(1) + else: + break + + def start_brush(self): + """买票功能实现""" + # 浏览器窗口最大化 + self.driver.driver.maximize_window() + # 登陆 + self.do_login() + # 跳转到抢票页面 + self.driver.visit(self.ticket_url) + try: + print('开始刷票……') + # 加载车票查询信息 + self.driver.cookies.add({"_jc_save_fromStation": self.from_station}) + self.driver.cookies.add({"_jc_save_toStation": self.to_station}) + self.driver.cookies.add({"_jc_save_fromDate": self.from_time}) + self.driver.reload() + count = 0 + while self.driver.url == self.ticket_url: + self.driver.find_by_text('查询').click() + sleep(1) + count += 1 + print('第%d次点击查询……' % count) + try: + car_no_location = self.driver.find_by_id("queryLeftTable")[0].find_by_text(self.number)[1] + current_tr = car_no_location.find_by_xpath("./../../../../..") + if current_tr.find_by_tag('td')[self.seat_type_index].text == '--': + print('无此座位类型出售,已结束当前刷票,请重新开启!') + sys.exit(1) + elif current_tr.find_by_tag('td')[self.seat_type_index].text == '无': + print('无票,继续尝试……') + sleep(1) + else: + # 有票,尝试预订 + print('刷到票了(余票数:' + str(current_tr.find_by_tag('td')[self.seat_type_index].text) + '),开始尝试预订……') + current_tr.find_by_css('td.no-br>a')[0].click() + sleep(1) + key_value = 1 + for p in self.passengers: + # 选择用户 + print('开始选择用户……') + self.driver.find_by_text(p).last.click() + # 选择座位类型 + print('开始选择席别……') + if self.seat_type_value != 0: + self.driver.find_by_xpath( + "//select[@id='seatType_" + str(key_value) + "']/option[@value='" + str( + self.seat_type_value) + "']").first.click() + key_value += 1 + sleep(0.2) + if p[-1] == ')': + self.driver.find_by_id('dialog_xsertcj_ok').click() + print('正在提交订单……') + self.driver.find_by_id('submitOrder_id').click() + sleep(2) + # 查看放回结果是否正常 + submit_false_info = self.driver.find_by_id('orderResultInfo_id')[0].text + if submit_false_info != '': + print(submit_false_info) + self.driver.find_by_id('qr_closeTranforDialog_id').click() + sleep(0.2) + self.driver.find_by_id('preStep_id').click() + sleep(0.3) + continue + print('正在确认订单……') + self.driver.find_by_id('qr_submit_id').click() + print('预订成功,请及时前往支付……') + # 发送通知信息 + self.send_mail(self.receiver_email, '恭喜您,抢到票了,请及时前往12306支付订单!') + self.send_sms(self.receiver_mobile, '您的验证码是:8888。请不要把验证码泄露给其他人。') + except Exception as error_info: + print(error_info) + except Exception as error_info: + print(error_info) + + def send_sms(self, mobile, sms_info): + """发送手机通知短信,用的是-互亿无线-的测试短信""" + host = "106.ihuyi.com" + sms_send_uri = "/webservice/sms.php?method=Submit" + account = "C59782899" + pass_word = "19d4d9c0796532c7328e8b82e2812655" + params = parse.urlencode( + {'account': account, 'password': pass_word, 'content': sms_info, 'mobile': mobile, 'format': 'json'} + ) + headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"} + conn = httplib2.HTTPConnectionWithTimeout(host, port=80, timeout=30) + conn.request("POST", sms_send_uri, params, headers) + response = conn.getresponse() + response_str = response.read() + conn.close() + return response_str + + def send_mail(self, receiver_address, content): + """发送邮件通知""" + # 连接邮箱服务器信息 + host = 'smtp.163.com' + port = 25 + sender = 'gxcuizy@163.com' # 你的发件邮箱号码 + pwd = 'CUIzy9118' # 不是登陆密码,是客户端授权密码 + # 发件信息 + receiver = receiver_address + body = '
' + content + '
' + msg = MIMEText(body, 'html', _charset="utf-8") + msg['subject'] = '抢票成功通知!' + msg['from'] = sender + msg['to'] = receiver + s = smtplib.SMTP(host, port) + # 开始登陆邮箱,并发送邮件 + s.login(sender, pwd) + s.sendmail(sender, receiver, msg.as_string()) + + +if __name__ == '__main__': + # 乘客姓名 + passengers_input = input('请输入乘车人姓名,多人用英文逗号“,”连接,(例如单人“张三”或者多人“张三,李四”):') + passengers = passengers_input.split(",") + while passengers_input == '' or len(passengers) > 4: + print('乘车人最少1位,最多4位!') + passengers_input = input('请重新输入乘车人姓名,多人用英文逗号“,”连接,(例如单人“张三”或者多人“张三,李四”):') + passengers = passengers_input.split(",") + # 乘车日期 + from_time = input('请输入乘车日期(例如“2018-08-08”):') + date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}$') + while from_time == '' or re.findall(date_pattern, from_time) == []: + from_time = input('乘车日期不能为空或者时间格式不正确,请重新输入:') + # 城市cookie字典 + city_list = { + 'bj': '%u5317%u4EAC%2CBJP', # 北京 + 'hd': '%u5929%u6D25%2CTJP', # 邯郸 + 'nn': '%u5357%u5B81%2CNNZ', # 南宁 + 'wh': '%u6B66%u6C49%2CWHN', # 武汉 + 'cs': '%u957F%u6C99%2CCSQ', # 长沙 + 'ty': '%u592A%u539F%2CTYV', # 太原 + 'yc': '%u8FD0%u57CE%2CYNV', # 运城 + 'gzn': '%u5E7F%u5DDE%u5357%2CIZQ', # 广州南 + 'wzn': '%u68A7%u5DDE%u5357%2CWBZ', # 梧州南 + } + # 出发站 + from_input = input('请输入出发站,只需要输入首字母就行(例如北京“bj”):') + while from_input not in city_list.keys(): + from_input = input('出发站不能为空或不支持当前出发站(如有需要,请联系管理员!),请重新输入:') + from_station = city_list[from_input] + # 终点站 + to_input = input('请输入终点站,只需要输入首字母就行(例如北京“bj”):') + while to_input not in city_list.keys(): + to_input = input('终点站不能为空或不支持当前终点站(如有需要,请联系管理员!),请重新输入:') + to_station = city_list[to_input] + # 车次编号 + number = input('请输入车次号(例如“G110”):') + while number == '': + number = input('车次号不能为空,请重新输入:') + # 座位类型 + seat_type = input('请输入座位类型(例如“软卧”):') + while seat_type == '': + seat_type = input('座位类型不能为空,请重新输入:') + # 抢票成功,通知该手机号码 + receiver_mobile = input('请预留一个手机号码,方便抢到票后进行通知(例如:18888888888):') + mobile_pattern = re.compile(r'^1{1}\d{10}$') + while receiver_mobile == '' or re.findall(mobile_pattern, receiver_mobile) == []: + receiver_mobile = input('预留手机号码不能为空或者格式不正确,请重新输入:') + receiver_email = input('请预留一个邮箱,方便抢到票后进行通知(例如:test@163.com):') + while receiver_email == '': + receiver_email = input('预留邮箱不能为空,请重新输入:') + # 开始抢票 + ticket = BrushTicket(passengers, from_time, from_station, to_station, number, seat_type, receiver_mobile, + receiver_email) + ticket.start_brush() diff --git "a/\347\234\201\345\270\202\345\214\272\344\271\241\346\235\221\344\272\224\347\272\247\345\234\260\345\235\200\345\272\223/get_city.py" "b/\347\234\201\345\270\202\345\214\272\344\271\241\346\235\221\344\272\224\347\272\247\345\234\260\345\235\200\345\272\223/get_city.py" new file mode 100644 index 0000000..a361fac --- /dev/null +++ "b/\347\234\201\345\270\202\345\214\272\344\271\241\346\235\221\344\272\224\347\272\247\345\234\260\345\235\200\345\272\223/get_city.py" @@ -0,0 +1,218 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +利用BeautifulSoup爬取省、市、区、街道、办事处五级地址 +author: gxcuizy +date: 2018-11-01 +""" + +import requests +from bs4 import BeautifulSoup +from urllib import parse +import json +import os +from datetime import datetime +import threading + + +class GetCity(object): + """爬取国家统计局省、市、区、街道、办事处五级地址""" + # 地址 + url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/' + + def __init__(self): + """初始化属性""" + self.json_folder = 'json' + self.json_file = {'province': 'province.json', 'city': 'city.json', 'county': 'county.json', + 'town': 'town.json', 'village': 'village.json'} + self.lock = threading.Lock() + + def get_html(self, url): + """请求html页面信息""" + header = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' + } + try: + request = requests.get(url=url, headers=header) + request.encoding = 'gbk' + html = request.text + return html + except Exception as e: + return '' + + def get_city(self, origin_url, now_url, origin_code): + """获取市级地址信息""" + province_url = parse.urljoin(origin_url, now_url) + # 解析市级的html + print('开始解析市级信息……') + html = self.get_html(province_url) + soup = BeautifulSoup(html, 'lxml') + city_list = soup.select('.citytr') + for city_info in city_list: + a_info = city_info.find_all(name='a') + city_name = a_info[1].get_text() + city_code = a_info[0].get_text() + city_url = a_info[0].attrs['href'] + print(city_name, city_code, city_url) + # 数据存入字典 + dict_info = {} + dict_info.update({'name': city_name}) + dict_info.update({'code': city_code}) + dict_info.update({'parent_code': origin_code}) + dict_info.update({'level': 2}) + # 读写json数据 + self.read_write_by_json(dict_info, 'city') + # 获取县区信息 + self.get_county(province_url, city_url, city_code) + print('市级解析结束!') + + def get_county(self, origin_url, now_url, origin_code): + """获取县、区级地址信息""" + city_url = parse.urljoin(origin_url, now_url) + # 解析县区的html + print('开始解析县/区级信息……') + html = self.get_html(city_url) + soup = BeautifulSoup(html, 'lxml') + county_list = soup.select('.countytr') + for county_info in county_list: + a_info = county_info.find_all(name='a') + if a_info: + county_name = a_info[1].get_text() + county_code = a_info[0].get_text() + county_url = a_info[0].attrs['href'] + print(county_name, county_code, county_url) + # 数据存入字典 + dict_info = {} + dict_info.update({'name': county_name}) + dict_info.update({'code': county_code}) + dict_info.update({'parent_code': origin_code}) + dict_info.update({'level': 3}) + # 读写json数据 + self.read_write_by_json(dict_info, 'county') + # 获取乡镇信息 + self.get_town(city_url, county_url, county_code) + else: + td_info = county_info.find_all(name='td') + county_name = td_info[1].get_text() + county_code = td_info[0].get_text() + county_url = '' + print(county_name, county_code, county_url) + print('县/区级解析结束!') + + def get_town(self, origin_url, now_url, origin_code): + """获取乡镇地址信息""" + county_url = parse.urljoin(origin_url, now_url) + # 解析县区的html + print('开始解析乡镇级信息……') + html = self.get_html(county_url) + soup = BeautifulSoup(html, 'lxml') + town_list = soup.select('.towntr') + for town_info in town_list: + a_info = town_info.find_all(name='a') + town_name = a_info[1].get_text() + town_code = a_info[0].get_text() + town_url = a_info[0].attrs['href'] + print(town_name, town_code, town_url) + # 数据存入字典 + dict_info = {} + dict_info.update({'name': town_name}) + dict_info.update({'code': town_code}) + dict_info.update({'parent_code': origin_code}) + dict_info.update({'level': 4}) + # 读写json数据 + self.read_write_by_json(dict_info, 'town') + # 获取村级信息 + self.get_village(county_url, town_url, town_code) + print('乡镇级解析结束!') + + def get_village(self, origin_url, now_url, origin_code): + """获取村级地址信息""" + town_url = parse.urljoin(origin_url, now_url) + # 解析县区的html + print('开始解析村级信息……') + html = self.get_html(town_url) + soup = BeautifulSoup(html, 'lxml') + village_list = soup.select('.villagetr') + for village_info in village_list: + a_info = village_info.find_all(name='td') + village_name = a_info[2].get_text() + village_code = a_info[0].get_text() + village_url = '' + print(village_name, village_code, village_url) + # 数据存入字典 + dict_info = {} + dict_info.update({'name': village_name}) + dict_info.update({'code': village_code}) + dict_info.update({'parent_code': origin_code}) + dict_info.update({'level': 5}) + # 读写json数据 + self.read_write_by_json(dict_info, 'village') + print('村级解析结束!') + + def init_file(self): + """初始化文件夹数据""" + # 目录不存在,先创建 + if not os.path.exists(self.json_folder): + os.mkdir(self.json_folder) + # 文件不存在,也先初始化 + for file_name in self.json_file.values(): + # 初始化空列表写入 + file_path = os.path.join(self.json_folder, file_name) + if not os.path.exists(file_path): + with open(file_path, 'w', encoding='utf-8') as file: + json.dump([], file) + + def read_write_by_json(self, data, city_type): + """读写json文件""" + self.lock.acquire() + file_name = self.json_file[city_type] + file_path = os.path.join(self.json_folder, file_name) + # 读文件 + with open(file_path, 'r', encoding='utf-8') as read_file: + data_list = json.load(read_file) + data_list.append(data) + # 写文件 + with open(file_path, 'w', encoding='utf-8') as write_file: + json.dump(data_list, write_file, ensure_ascii=False) + self.lock.release() + + def run(self): + """执行入口""" + # 初始化存储文件 + self.init_file() + # 解析省份的html + print('开始解析省份信息……') + html = self.get_html(self.url) + soup = BeautifulSoup(html, 'lxml') + province_list = soup.select('.provincetr a') + for province_info in province_list: + province_name = province_info.get_text() + province_url = province_info.attrs['href'] + province_code = province_url.split('.')[0] + print(province_name, province_code, province_url) + # 数据存入字典 + dict_info = {} + dict_info.update({'name': province_name}) + dict_info.update({'code': province_code}) + dict_info.update({'parent_code': '0'}) + dict_info.update({'level': '1'}) + # 读写json数据 + self.read_write_by_json(dict_info, 'province') + # 多线程爬取 + t = threading.Thread(target=self.get_city, name='LoopThread', args=(self.url, province_url, province_code)) + t.start() + # 爬取市级信息 + print('省份解析结束!') + + +# 程序主入口 +if __name__ == '__main__': + # 实例化执行 + print('开始执行……') + start_time = datetime.now() + city = GetCity() + city.run() + end_time = datetime.now() + print('程序执行结束!') + print('开始时间:%s,结束时间:%s' % (start_time.strftime('%Y-%m-%d %H:%M:%S'), end_time.strftime('%Y-%m-%d %H:%M:%S')))