forked from neverforgit/PeMS_Tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_filters.py
executable file
·111 lines (87 loc) · 3.54 KB
/
test_filters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import ConfigParser
from datetime import datetime
import sys
import time
import numpy as np
import pandas as pd
import utils.counts as counts
from utils.station_filter import StationFilter
__author__ = 'Andrew A Campbell'
# This script filters PeMS station data.
def main(args):
##
# Load values from config file
##
config_path = args[1]
conf = ConfigParser.ConfigParser()
conf.read(config_path)
# Paths
meta_path = conf.get('Paths', 'meta_path')
stat_link_map_path = conf.get('Paths', 'stat_link_map_path')
ts_dir = conf.get('Paths', 'ts_dir') # Path to station Time Series
out_cleaned_path = conf.get('Paths', 'out_cleaned_path') # Where to write the results of filtering
out_removed_path = conf.get('Paths', 'out_removed_path') # Where to write the results of filtering
out_log_path = conf.get('Paths', 'out_log_path') # Where to write the results of filtering
poly_path = conf.get('Paths', 'poly_path') # Where to write the results of filtering
# Parameters
start_date = datetime.strptime(conf.get('Params', 'start_date'), '%Y/%m/%d')
end_date = datetime.strptime(conf.get('Params', 'end_date'), '%Y/%m/%d')
weekdays = [int(d) for d in [s.strip() for s in conf.get('Params', 'weekdays').split(',')]]
counts_year = conf.get('Params', 'counts_year')
date_list = counts.date_string_list(start_date, end_date, weekdays)
##
# Initialize the StationFilter and add filters
##
sf = StationFilter(ts_dir, meta_path)
sf.set_stations(['401620']) # Broke __outlier_detection_SVM w/ NaN bugs
#1 - date_range
sf.date_range(start_date, end_date)
#2 - link_mapping
sf.link_mapping(stat_link_map_path)
#3 - Missing Data
sf.missing_data(date_list)
#4 - boundary_buffer
sf.boundary_buffer(poly_path)
#4 - outlier_detection_SVM
sf.outlier_detection_SVM(date_list, decision_dist=4, threshold=0.05)
#5 - observed
sf.observed(date_list)
##
# Run the filters.
##
t_start = time.time()
sf.run_filters(check_removed=True)
t_end = time.time()
print "Time to run %d filters: %d [sec]" % (len(sf.filters), t_end - t_start)
print
##
# Write the filtering results to a DataFrames
##
sf.cleaned_station_ids = list(sf.cleaned_station_ids)
sf.removed_station_ids = list(sf.removed_station_ids)
# if len(sf.cleaned_station_ids) < len(sf.removed_station_ids):
# [sf.cleaned_station_ids.append(999999999) for i in
# np.arange(len(sf.removed_station_ids) - len(sf.cleaned_station_ids))]
# elif len(sf.cleaned_station_ids) > len(sf.removed_station_ids):
# [sf.removed_station_ids.append(999999999) for i in
# np.arange(len(sf.cleaned_station_ids) - len(sf.removed_station_ids))]
print "cleaned_station_ids"
print sf.cleaned_station_ids
print
print "removed_station_ids"
print sf.removed_station_ids
print
# print "removed station reasons"
# with open(out_log_path, 'w') as fo:
# for stat in sf.removed_stats_reasons.items():
# print stat
# fo.write(str(stat) + '\n')
# df_out = pd.DataFrame({'cleaned': sf.cleaned_station_ids, 'removed': sf.removed_station_ids})
# df_out.to_csv(out_df_path, index=False) # Write the results to a csv
sf.write_cleaned_stations(out_cleaned_path)
sf.write_removed_stations(out_removed_path)
sf.write_removed_reasons_log(out_log_path)
if __name__ == '__main__':
if len(sys.argv) < 2:
print 'ERROR: need to supply the path to the conifg file'
main(sys.argv)