Skip to content

Commit 3ef305b

Browse files
committed
add get_history_weather.py:leaves:
1 parent db1bb52 commit 3ef305b

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

spiderFile/get_history_weather.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import re
2+
import pandas as pd
3+
import requests as rq
4+
from bs4 import BeautifulSoup
5+
6+
7+
def get_data(url):
8+
html = rq.get(url).content.decode("gbk")
9+
soup = BeautifulSoup(html, "html.parser")
10+
tr_list = soup.find_all("tr")
11+
dates, conditions, temperatures = [], [], []
12+
for data in tr_list[1:]:
13+
sub_data = data.text.split()
14+
dates.append(sub_data[0])
15+
conditions.append("".join(sub_data[1:3]))
16+
temperatures.append("".join(sub_data[3:6]))
17+
_data = pd.DataFrame()
18+
_data["日期"] = dates
19+
_data["天气状况"] = conditions
20+
_data["气温"] = temperatures
21+
return _data
22+
23+
# 获取广州市2019年第一季度天气状况
24+
data_1_month = get_data("http://www.tianqihoubao.com/lishi/guangzhou/month/201901.html")
25+
data_2_month = get_data("http://www.tianqihoubao.com/lishi/guangzhou/month/201902.html")
26+
data_3_month = get_data("http://www.tianqihoubao.com/lishi/guangzhou/month/201903.html")
27+
28+
29+
data = pd.concat([data_1_month, data_2_month, data_3_month]).reset_index(drop=True)
30+
31+
data.to_csv("guangzhou_history_weather_data.csv", index=False, encoding="utf-8")

0 commit comments

Comments
 (0)