-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGEV_district_ks.py
75 lines (60 loc) · 2.43 KB
/
GEV_district_ks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import xarray as xr
import numpy as np
from scipy.stats import genextreme
from scipy import stats
import geopandas as gpd
import pandas as pd
import salem
import concurrent.futures
from tqdm import tqdm
def fit_distribution(index, ssp, district_name):
district_path = f'E:/GEO/geodata/{district_name}.shp'
district = gpd.read_file(district_path)
ori = []
for year in range(1961, 2015):
data_path = f'E:/GEO/etccdi/{index}{year}.nc'
data = xr.open_dataset(data_path)
data = data.transpose('time', 'latitude', 'longitude')
data_distr = data.salem.roi(shape=district)
ori.extend(data_distr['pre'].values.flatten().tolist())
# 将 totest 转换为 NumPy 数组
ori = np.array(ori)
# 删除 NaN 值
ori = ori[~np.isnan(ori)]
totest = []
for year in range(2015, 2068):
data_path = f'E:/GEO/etccdi/qpm/mme/cut/{index}_{ssp}_{year}.nc'
data = xr.open_dataset(data_path)
data = data.transpose('time', 'lat', 'lon')
data_distr = data.salem.roi(shape=district)
totest.extend(data_distr['__xarray_dataarray_variable__'].values.flatten().tolist())
# 将 totest 转换为 NumPy 数组
totest = np.array(totest)
# 删除 NaN 值
totest = totest[~np.isnan(totest)]
statistic, p_value = stats.ks_2samp(ori, totest)
return {'index': index, 'ssp': ssp, 'district': district_name,
'statistic': statistic, 'p_value': p_value}
def main():
indices = ['rx1day', 'r99p', 'r95p']
ssps = ['126', '245', '370', '585']
districts = ['CC', 'SWC', 'NWC', 'SC', 'EC', 'NC', 'NEC']
df = pd.DataFrame(columns=['index', 'ssp', 'district', 'statistic', 'p_value'])
with concurrent.futures.ProcessPoolExecutor(max_workers=12) as executor:
futures = []
for index in indices:
for ssp in ssps:
for district_name in districts:
futures.append(executor.submit(fit_distribution, index, ssp, district_name))
results = []
for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
result = future.result()
results.append(result)
# 将结果整理到 DataFrame 中
for result in results:
df = df.append(result, ignore_index=True)
# 将 DataFrame 导出到 Excel 文件
excel_file = 'E:/GEO/result/qpm/gev_ks.xlsx'
df.to_excel(excel_file, index=False)
if __name__ == '__main__':
main()