forked from ermongroup/CSDI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdownload.py
40 lines (37 loc) · 1.27 KB
/
download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import tarfile
import zipfile
import sys
import os
import wget
import requests
import pandas as pd
import pickle
os.makedirs("data/", exist_ok=True)
if sys.argv[1] == "physio":
url = "https://physionet.org/files/challenge-2012/1.0.0/set-a.tar.gz?download"
wget.download(url, out="data")
with tarfile.open("data/set-a.tar.gz", "r:gz") as t:
t.extractall(path="data/physio")
elif sys.argv[1] == "pm25":
url = "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/06/STMVL-Release.zip"
urlData = requests.get(url).content
filename = "data/STMVL-Release.zip"
with open(filename, mode="wb") as f:
f.write(urlData)
with zipfile.ZipFile(filename) as z:
z.extractall("data/pm25")
def create_normalizer_pm25():
df = pd.read_csv(
"./data/pm25/Code/STMVL/SampleData/pm25_ground.txt",
index_col="datetime",
parse_dates=True,
)
test_month = [3, 6, 9, 12]
for i in test_month:
df = df[df.index.month != i]
mean = df.describe().loc["mean"].values
std = df.describe().loc["std"].values
path = "./data/pm25/pm25_meanstd.pk"
with open(path, "wb") as f:
pickle.dump([mean, std], f)
create_normalizer_pm25()