-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
76 lines (62 loc) · 2.58 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import subprocess
def fetch_oier_info(name):
# 设置Chrome WebDriver的路径,确保指向的是具体的chromedriver.exe文件
driver_path = r'chromedriver.exe' # 请确保路径正确
url = f'https://oier.baoshuo.dev/?query={name}' # 直接构造URL
# 使用Service来管理驱动路径
service = Service(driver_path)
# 初始化WebDriver
driver = webdriver.Chrome(service=service)
try:
# 打开构造好的URL
driver.get(url)
# 等待页面加载直到“正在加载”消失
while True:
try:
# 查找“正在加载”的元素
loading_element = WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.XPATH, '//*[contains(text(), "正在加载")]'))
)
# 如果找到“正在加载”元素,等待4秒再检查
time.sleep(2)
except:
# 如果在等待时间内没有找到“正在加载”元素,退出循环
break
# 获取页面源代码
page_source = driver.page_source
return page_source
except Exception as e:
print(f"Error fetching data for {name}: {e}")
return None
finally:
# 关闭浏览器
driver.quit()
def read_names_from_file(filename):
with open(filename, "r", encoding="utf-8") as file:
names = [line.strip() for line in file if line.strip()]
return names
names_file = "input.txt" # 包含待查询姓名的文件
names = read_names_from_file(names_file)
# 将所有页面源代码保存到一个txt文件
with open("medium.txt", "w", encoding="utf-8") as file:
for name in names:
page_source = fetch_oier_info(name)
if page_source:
# 写入文件
file.write(f"Page source for {name}:{page_source}\n")
# if '高中毕业 1 年' in page_source:
# file.write(f"Page source for {name}:{page_source}\n")
# else:
# file.write(f"Page source for {name}:no info\n")
else:
# 如果获取失败,写入失败信息
file.write(f"Error fetching data for {name}\n")
print("Page sources saved to oier_page_sources.txt")
# 运行 a.exe
subprocess.run(["transer.exe"], check=True)