-
Notifications
You must be signed in to change notification settings - Fork 1
/
RunHeatMapTests.py
executable file
·260 lines (249 loc) · 15.1 KB
/
RunHeatMapTests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import warnings
import numpy as np
import math
import pandas as pd
from sklearn.gaussian_process.kernels import ExpSineSquared
import random
import argparse
from training import ModelsGaussianProcessRegression, EvarsGpr, TrainHelper
from evaluation import EvaluationHelper
if __name__ == '__main__':
"""
Script to run heatmap tests based on different scenarios of manipulated simulated data.
All parameters of EVARS-GPR and its components can be specified
"""
warnings.simplefilter("ignore")
np.random.seed(0)
random.seed(0)
doc_results = None
parser = argparse.ArgumentParser()
### General Params ###
# Data Setup
parser.add_argument("-seaslen", "--seasonal_length", type=int, default=50, help="specify season length")
parser.add_argument("-nper", "--n_periods", type=int, default=5, help="specify number of periods")
parser.add_argument("-noise", "--noise", type=float, default=0.01, help="specify noise for sine x and y")
parser.add_argument("-offset", "--offset", type=float, default=10, help="specify offset for sine y")
# Pipeline
parser.add_argument("-scalethr", "--scale_threshold", type=float, default=0.1,
help="specify output scale threshold")
parser.add_argument("-scaleseas", "--scale_seasons", type=int, default=2,
help="specify output scale seasons taken into account")
parser.add_argument("-scalew-factor", "--scale_window_factor", type=float, default=0.1,
help="specify scale window factor based on seasonal periods")
parser.add_argument("-scalew-min", "--scale_window_minimum", type=int, default=2,
help="specify scale window minimum")
### DA Params ###
# General
parser.add_argument("-da", "--data_augmentation", type=str, default='scaled',
help="specify data augmentation")
parser.add_argument("-app", "--append", type=str, default='no', help="specify append mechanism")
# GN Params
parser.add_argument("-operc", "--o_percentage", type=float, default=1.1, help="specify oversampling perentage gn")
parser.add_argument("-uperc", "--u_percentage", type=float, default=0.8, help="specify undersampling perentage gn")
parser.add_argument("-thr", "--threshold", type=float, default=0.1, help="specify threshold gn")
# SMOGN Params
parser.add_argument("-relthr", "--relevance_threshold", type=float, default=1.5,
help="specify relevance threshold smogn")
parser.add_argument("-relcoef", "--relevance_coefficient", type=float, default=0.5,
help="specify relevance coefficient smogn")
parser.add_argument("-undersamp", "--undersampling", type=bool, default=False, help="specify undersampling smogn")
### CPD Params ###
parser.add_argument("-cpd", "--change_point_detection", type=str, default='cf',
help="specify cpd algo: bocd or cf")
# BOCPD
parser.add_argument("-chaz", "--constant_hazard", type=int, default=9999, help="specify constant hazard")
parser.add_argument("-chaz-factor", "--constant_hazard_factor", type=int, default=2,
help="specify constant hazard factor based on seasonal periods")
# CF
parser.add_argument("-cfr", "--changefinder_r", type=float, default=0.4,
help="specify changefinders r param (decay factor older values)")
parser.add_argument("-cforder", "--changefinder_order", type=int, default=1,
help="specify changefinders SDAR model order param")
parser.add_argument("-cfsmooth", "--changefinder_smooth", type=int, default=4,
help="specify changefinders smoothing param")
parser.add_argument("-cfthrperc", "--changefinder_threshold_percentile", type=int, default=70,
help="specify percentile of train set anomaly factors as threshold for cpd with changefinder")
args = parser.parse_args()
# General Params
season_length = args.seasonal_length
n_periods = args.n_periods
noise = args.noise
offset = args.offset
scale_thr = args.scale_threshold
scale_window_factor = args.scale_window_factor
scale_window_minimum = args.scale_window_minimum
scale_seasons = args.scale_seasons
scale_window = max(scale_window_minimum, int(scale_window_factor * season_length))
# DA Params
da = args.data_augmentation
append = args.append
o_percentage = args.o_percentage
u_percentage = args.u_percentage
threshold = args.threshold
rel_thr = args.relevance_threshold
rel_coef = args.relevance_coefficient
under_samp = args.undersampling
# CPD Params
cpd = args.change_point_detection
const_hazard_factor = args.constant_hazard_factor
const_hazard = args.constant_hazard
const_hazard = const_hazard_factor * season_length if const_hazard == 9999 else const_hazard
cf_r = args.changefinder_r
cf_order = args.changefinder_order
cf_smooth = args.changefinder_smooth
cf_thr_perc = args.changefinder_threshold_percentile
# get base data
X = TrainHelper.get_periodic_noisy_x(x_base=np.linspace(-0.5 * math.pi, 1.5 * math.pi, season_length),
n_periods=n_periods, noise=noise)
Y = TrainHelper.noisy_sin(X, noise=noise, offset=offset)
data = pd.DataFrame(columns=['X', 'Y'])
data['X'] = X
data['Y'] = Y
train_ind = int(0.6 * data.shape[0])
train = data[0:train_ind]
# Train offline base model
target_column = 'Y'
kernel = ExpSineSquared()
alpha = 0.1
n_restarts_optimizer = 10
standardize = False
normalize_y = True
model_sine = ModelsGaussianProcessRegression.GaussianProcessRegression(
target_column=target_column, seasonal_periods=season_length, kernel=kernel,
alpha=alpha, n_restarts_optimizer=n_restarts_optimizer,
standardize=standardize, normalize_y=normalize_y,
one_step_ahead=False)
model_sine.train(train=train, cross_val_call=False)
columns = ['sampler', 'cpd', 'seas_len', 'start_ind', 'end_ind', 'slope', 'max_factor', 'noise',
'const_hazard', 'scale_window', 'scale_seasons', 'scale_thr', 'append', 'o_perc', 'u_perc', 'threshold',
'under_samp', 'rel_thr', 'rel_coef', 'cf_r', 'cf_order', 'cf_smooth', 'cf_thr_perc',
'rmse', 'rmse_base', 'rmse_ratio']
# heat map Configs
step_size = 10 if season_length <= 50 else 20 # adjust step size for longer scenarios to limit amount of scenarios
heat_map_configs = [
{'type': 'start vs. end index', 'slope': 0.2, 'max_factor': 2, 'step_size': step_size},
{'type': 'start vs. end index', 'slope': 0.4, 'max_factor': 3, 'step_size': step_size},
{'type': 'slope vs. max factor', 'start_ind': train_ind + season_length,
'end_ind': train_ind + 2 * season_length,
'start_slope': 0.1, 'end_slope': 1, 'step_size_slope': 0.1,
'start_max_factor': 0.5, 'end_max_factor': 5, 'step_size_max_factor': 0.5},
{'type': 'slope vs. max factor', 'start_ind': train_ind + season_length,
'end_ind': train_ind + 2 * season_length,
'start_slope': 0.1, 'end_slope': 1, 'step_size_slope': 0.1,
'start_max_factor': 0.4, 'end_max_factor': 3, 'step_size_max_factor': 0.2}
]
# iterate over all heat map configs
for config in heat_map_configs:
doc_results = pd.DataFrame(columns=columns)
results_index = 0
heat_map_type = config['type']
print('####### ' + heat_map_type + ' #######')
print(args)
# heat map start vs. end index params
slope = config['slope'] if 'slope' in config else None
max_factor = config['max_factor'] if 'max_factor' in config else None
step_size = config['step_size'] if 'step_size' in config else None
# heat map slope vs. max factor params
start_ind = config['start_ind'] if 'start_ind' in config else None
end_ind = config['end_ind'] if 'end_ind' in config else None
start_slope = config['start_slope'] if 'start_slope' in config else None
end_slope = config['end_slope'] if 'end_slope' in config else None
step_size_slope = config['step_size_slope'] if 'step_size_slope' in config else None
start_max_factor = config['start_max_factor'] if 'start_max_factor' in config else None
end_max_factor = config['end_max_factor'] if 'end_max_factor' in config else None
step_size_max_factor = config['step_size_max_factor'] if 'step_size_max_factor' in config else None
# setup iterators for heat map
if heat_map_type == 'start vs. end index':
min_diff = int((max_factor-1) / slope) * 2
min_diff = int(math.ceil(min_diff / 10.0)) * 10 if min_diff % 10 != 0 else min_diff
if season_length > 50:
min_diff = max(20, min_diff)
iterator_outer = np.arange(train_ind, len(data), step_size)
iterator_inner = np.arange(train_ind + min_diff, len(data) + step_size, step_size)
iterator_outer = iterator_outer[iterator_outer + min_diff <= len(data)]
iterator_inner = iterator_inner[iterator_inner <= len(data)]
save_str = \
heat_map_type + '_slope' + str(slope) + '_maxfact' + str(max_factor) + '_stepsize' + str(step_size)
elif heat_map_type == 'slope vs. max factor':
iterator_outer = np.arange(start_slope, end_slope + step_size_slope, step_size_slope)
iterator_inner = np.arange(start_max_factor, end_max_factor + step_size_max_factor, step_size_max_factor)
save_str = heat_map_type \
+ '_slopeconf-' + str(start_slope) + '-' + str(end_slope) + '-' + str(step_size_slope) \
+ '_maxfactconf-' + str(start_max_factor) + '-' + str(end_max_factor) \
+ '-' + str(step_size_max_factor)
# iterate over heat map params
for it_out in iterator_outer:
for it_in in iterator_inner:
if heat_map_type == 'start vs. end index':
start_ind = it_out
end_ind = it_in
if start_ind + min_diff > end_ind:
continue
print('--- Running Variant: start_ind=' + str(start_ind) + ', end_ind=' + str(end_ind) + ' ---')
print('slope=' + str(slope) + ', max_factor=' + str(max_factor))
elif heat_map_type == 'slope vs. max factor':
slope = it_out
max_factor = it_in
print('--- Running Variant: slope=' + str(slope) + ', max_factor=' + str(max_factor) + ' ---')
print('start_ind=' + str(start_ind) + ', end_ind=' + str(end_ind))
try:
# manipulate data
data_manip = TrainHelper.get_sloped_dataset(dataset=data, target_column=target_column,
start_ind=start_ind, end_ind=end_ind,
max_factor=max_factor, slope=slope)
# get base predictions
base_predict = model_sine.predict(train=data_manip[:train_ind], test=data_manip[train_ind:])
rmse_base = EvaluationHelper.rmse(actual=data_manip[train_ind:][target_column],
prediction=base_predict['Prediction'])
print('Base Prediction: RMSE=' + str(rmse_base))
# get evars-gpr results
cp_detected, predictions, _, _ = \
EvarsGpr.run_evars_gpr(base_model=model_sine, data=data_manip, season_length=season_length,
target_column=target_column, train_ind=train_ind, scale_thr=scale_thr,
da=da, o_perc=o_percentage, u_perc=u_percentage, thr=threshold,
rel_thr=rel_thr, rel_coef=rel_coef, under_samp=under_samp,
append=append, const_hazard=const_hazard, scale_window=scale_window,
verbose=False, scale_seasons=scale_seasons, cpd=cpd, cf_r=cf_r,
cf_order=cf_order, cf_smooth=cf_smooth, cf_thr_perc=cf_thr_perc)
rmse = EvaluationHelper.rmse(actual=data_manip[train_ind:][target_column],
prediction=predictions['Prediction'])
# store results
print('EVARS-GPR: RMSE=' + str(rmse))
rmse_ratio = rmse / rmse_base
print('RMSE_Ratio=' + str(rmse_ratio))
doc_results.at[results_index, 'sampler'] = da
doc_results.at[results_index, 'cpd'] = cpd
doc_results.at[results_index, 'cf_r'] = float(cf_r)
doc_results.at[results_index, 'cf_order'] = float(cf_order)
doc_results.at[results_index, 'cf_smooth'] = float(cf_smooth)
doc_results.at[results_index, 'cf_thr_perc'] = float(cf_thr_perc)
doc_results.at[results_index, 'seas_len'] = season_length
doc_results.at[results_index, 'start_ind'] = start_ind
doc_results.at[results_index, 'end_ind'] = end_ind
doc_results.at[results_index, 'slope'] = slope
doc_results.at[results_index, 'max_factor'] = max_factor
doc_results.at[results_index, 'noise'] = noise
doc_results.at[results_index, 'o_perc'] = float(o_percentage)
doc_results.at[results_index, 'u_perc'] = float(u_percentage)
doc_results.at[results_index, 'threshold'] = float(threshold)
doc_results.at[results_index, 'under_samp'] = under_samp
doc_results.at[results_index, 'rel_thr'] = float(rel_thr)
doc_results.at[results_index, 'rel_coef'] = float(rel_coef)
doc_results.at[results_index, 'append'] = append
doc_results.at[results_index, 'const_hazard'] = float(const_hazard)
doc_results.at[results_index, 'scale_window'] = scale_window
doc_results.at[results_index, 'scale_seasons'] = scale_seasons
doc_results.at[results_index, 'scale_thr'] = scale_thr
doc_results.at[results_index, 'rmse'] = float(rmse)
doc_results.at[results_index, 'rmse_base'] = float(rmse_base)
doc_results.at[results_index, 'rmse_ratio'] = float(rmse_ratio)
results_index += 1
except Exception as exc:
print(exc)
doc_results['rmse'] = 'Failure'
results_index += 1
# save results to disk
doc_results.to_csv(
'OptimResults/HeatMap_'
+ save_str + '_' + cpd + '_' + da + '_seaslen' + str(season_length) + '_offset' + str(offset) + '.csv',
sep=';', decimal=',', float_format='%.10f')