Skip to content

Commit

Permalink
refactor evaluation pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
ankke committed Jun 18, 2023
1 parent f61d8a5 commit 1e93e5a
Show file tree
Hide file tree
Showing 31 changed files with 2,040 additions and 1,896 deletions.
363 changes: 62 additions & 301 deletions experiments/EXP_SYN_DATA.py
Original file line number Diff line number Diff line change
@@ -1,293 +1,19 @@
import argparse
import multiprocessing
import pathlib
import time

import pandas as pd
from darts.models.forecasting.lgbm import LightGBMModel
from darts.models.forecasting.rnn_model import RNNModel
from darts.models.forecasting.transformer_model import TransformerModel

from experiments.pipeline_experiment import run
from experiments.utils import (
gen_cancel_shape_ar,
gen_cancel_shape_ar_outlier_0p1,
gen_cancel_shape_ar_outlier_1p,
gen_model_and_params,
gen_model_and_params_none,
gen_model_and_params_norm,
gen_one_shape_ar,
gen_one_shape_ar_outlier_0p1,
gen_one_shape_ar_outlier_1p,
gen_one_shape_ar_trend,
gen_one_shape_ar_trend_cp,
gen_one_shape_heteroscedacity,
gen_one_shape_heteroscedacity_op,
gen_struc_break_mean,
gen_struc_break_var,
generate_intermittent,
)
from tot.models import NaiveModel, NeuralProphetModel, SeasonalNaiveModel, TorchProphetModel
from tot.models.models_darts import DartsForecastingModel

FUNCTIONS = {
"gen_one_shape_ar": gen_one_shape_ar,
"gen_one_shape_ar_outlier_0p1": gen_one_shape_ar_outlier_0p1,
"gen_one_shape_ar_outlier_1p": gen_one_shape_ar_outlier_1p,
"gen_cancel_shape_ar": gen_cancel_shape_ar,
"gen_cancel_shape_ar_outlier_0p1": gen_cancel_shape_ar_outlier_0p1,
"gen_cancel_shape_ar_outlier_1p": gen_cancel_shape_ar_outlier_1p,
"gen_one_shape_ar_trend": gen_one_shape_ar_trend,
"gen_one_shape_ar_trend_cp": gen_one_shape_ar_trend_cp,
"generate_intermittent": generate_intermittent,
"gen_one_shape_heteroscedacity": gen_one_shape_heteroscedacity,
"gen_one_shape_heteroscedacity_op": gen_one_shape_heteroscedacity_op,
"gen_struc_break_mean": gen_struc_break_mean,
"gen_struc_break_var": gen_struc_break_var,
}

PARAMS = {
"NP": {
"n_forecasts": 1,
"epochs": 30,
"global_normalization": True,
"normalize": "off",
"n_lags": 4,
},
"NP_localST": {
"n_forecasts": 1,
"epochs": 30,
"global_normalization": True,
"normalize": "off",
"trend_global_local": "local",
"season_global_local": "local",
"n_lags": 4,
},
"NP_FNN": {
"n_forecasts": 1,
"epochs": 30,
"global_normalization": True,
"normalize": "off",
"yearly_seasonality": False,
"weekly_seasonality": False,
"daily_seasonality": False,
"n_changepoints": 0,
"growth": "off",
"n_lags": 4,
},
"NP_FNN_wb": {
"n_forecasts": 1,
"epochs": 30,
"global_normalization": True,
"normalize": "off",
"yearly_seasonality": False,
"weekly_seasonality": False,
"daily_seasonality": False,
"n_changepoints": 0,
"growth": "off",
"n_lags": 24,
},
"NP_FNN_sw_wb": {
"n_forecasts": 1,
"epochs": 30,
"global_normalization": True,
"normalize": "off",
"yearly_seasonality": False,
"weekly_seasonality": False,
"daily_seasonality": False,
"n_changepoints": 0,
"growth": "off",
"n_lags": 24,
"ar_layers": [128],
},
"TP": {
"n_forecasts": 1,
"epochs": 30,
"global_normalization": True,
"normalize": "off",
},
"TP_localST": {
"n_forecasts": 1,
"epochs": 30,
"global_normalization": True,
"normalize": "off",
"trend_global_local": "local",
"season_global_local": "local",
},
"LGBM": {
"model": LightGBMModel,
"n_forecasts": 1,
"output_chunk_length": 1,
"lags": 4,
"n_lags": 4,
"_data_params": {},
},
"RNN": {
"model": RNNModel,
"input_chunk_length": 4,
"hidden_dim": 16,
"n_rnn_layers": 1,
"batch_size": 128,
"n_epochs": 80,
"random_state": 0,
"training_length": 4,
"force_reset": True,
"n_lags": 4,
"n_forecasts": 1,
"pl_trainer_kwargs": {"accelerator": "gpu", "devices": 1},
"_data_params": {},
},
"RNN_wb": {
"model": RNNModel,
"input_chunk_length": 24,
"hidden_dim": 16,
"n_rnn_layers": 1,
"batch_size": 128,
"n_epochs": 30,
"random_state": 0,
"training_length": 24,
"force_reset": True,
"n_lags": 24,
"n_forecasts": 1,
"_data_params": {},
},
"RNN_wb_nl": {
"model": RNNModel,
"input_chunk_length": 4,
"hidden_dim": 16,
"n_rnn_layers": 1,
"batch_size": 128,
"n_epochs": 30,
"random_state": 0,
"training_length": 24,
"force_reset": True,
"n_lags": 24,
"n_forecasts": 1,
"pl_trainer_kwargs": {"accelerator": "gpu", "devices": 1},
"_data_params": {},
},
"TF": {
"model": TransformerModel,
"model_name": "air_transformer",
"n_forecasts": 1,
"n_lags": 4,
"output_chunk_length": 1,
"input_chunk_length": 4,
"batch_size": 128,
"n_epochs": 100,
"nr_epochs_val_period": 10,
"d_model": 16,
# 'n_heads':8,
"num_encoder_layers": 2,
"num_decoder_layers": 2,
"dim_feedforward": 128,
"dropout": 0.1,
"activation": "relu",
"random_state": 42,
"save_checkpoints": True,
"force_reset": True,
"pl_trainer_kwargs": {"accelerator": "gpu", "devices": 1},
"_data_params": {},
},
"Naive": {"n_forecasts": 1},
"SNaive": {"n_forecasts": 1, "season_length": 24},
}

MODELS = {
"NeuralProphetModel": NeuralProphetModel,
"TorchProphetModel": TorchProphetModel,
"LightGBMModel": DartsForecastingModel,
"RNNModel": DartsForecastingModel,
"TransformerModel": DartsForecastingModel,
"NaiveModel": NaiveModel,
"SeasonalNaiveModel": SeasonalNaiveModel,
}
GEN_FUNC = {
"gen_model_and_params": gen_model_and_params,
"gen_model_and_params_norm": gen_model_and_params_norm,
"gen_model_and_params_none": gen_model_and_params_none,
}


def run_benchmark(
model,
params,
data_func,
n_ts_groups,
amplitude_per_group,
gen_func,
offset_per_group=[0, 0],
data_trend_gradient_per_group=None,
proportion_break=None,
):
start_time = time.time()
PLOT = False
FREQ = "H"
SERIES_LENGTH = 24 * 7 * 15
DATE_RNG = pd.date_range(start=pd.to_datetime("2011-01-01 01:00:00"), periods=SERIES_LENGTH, freq="H")

# The data_func, model, and params arguments are now provided as arguments
MODEL = MODELS[model]
MODEL_PARAMS = PARAMS[params]
DIR_NAME = "{}_{}_n_ts_{}_am_{}_of_{}_gr_{}_{}".format(
data_func, params, n_ts_groups, amplitude_per_group, offset_per_group, data_trend_gradient_per_group, proportion_break
)
if params == "TF" or params == "RNN" or params == "RNN_wb" or params == "LGBM" or params == "NP_FNN_sw_wb":
NUM_PROCESSES = 1
else:
NUM_PROCESSES = 10

if data_trend_gradient_per_group is not None:
df = FUNCTIONS[data_func](
series_length=SERIES_LENGTH,
date_rng=DATE_RNG,
n_ts_groups=n_ts_groups,
offset_per_group=offset_per_group,
amplitude_per_group=amplitude_per_group,
trend_gradient_per_group=data_trend_gradient_per_group,
)
elif proportion_break is not None:
df = FUNCTIONS[data_func](
series_length=SERIES_LENGTH,
date_rng=DATE_RNG,
n_ts_groups=n_ts_groups,
offset_per_group=offset_per_group,
amplitude_per_group=amplitude_per_group,
proportion_break=proportion_break,
)
else:
df = FUNCTIONS[data_func](
series_length=SERIES_LENGTH,
date_rng=DATE_RNG,
n_ts_groups=n_ts_groups,
offset_per_group=offset_per_group,
amplitude_per_group=amplitude_per_group,
)

run(
dir_name=DIR_NAME,
save=True,
df=df,
df_name="",
freq=FREQ,
model_class=MODEL,
model_params=MODEL_PARAMS,
scalers="default",
scaling_levels="default",
reweight_loss=True,
metrics=["MAE", "RMSE", "MASE"],
test_percentage=0.25,
plot=PLOT,
num_processes=NUM_PROCESSES,
model_and_params_generator=GEN_FUNC[gen_func],
)
end_time = time.time()
print("time taken", end_time - start_time)

from experiments.pipeline import synthetic_data
from experiments.pipeline.models import params_generators
from experiments.pipeline.models.models import SUPPORTED_MODELS
from experiments.pipeline.models.params import SUPPORTED_PARAMS
from experiments.pipeline.pipeline import Pipeline

if __name__ == "__main__":
# Argument parsing
parser = argparse.ArgumentParser(description="Run a benchmark")
parser.add_argument("--data_func", type=str, required=True, help="Data function")
parser.add_argument("--data_func", type=str, required=True, help="Data function", choices=synthetic_data.__all__)
parser.add_argument(
"--data_n_ts_groups",
type=str,
Expand Down Expand Up @@ -319,10 +45,22 @@ def run_benchmark(
default=None,
help="Optional argument - Proportion of breaks in data function",
)
parser.add_argument("--model", type=str, required=True, help="Model class")
parser.add_argument("--params", type=str, required=True, help="Model parameters")
parser.add_argument("--model", type=str, required=True, help="Model class", choices=SUPPORTED_MODELS)
parser.add_argument("--params", type=str, required=True, help="Model parameters", choices=SUPPORTED_PARAMS)
parser.add_argument(
"--gen_func", type=str, required=False, default="gen_model_and_params", help="Param generation function"
"--gen_func",
type=str,
required=False,
default="gen_model_and_params_custom",
help="Param generation function",
choices=params_generators.__all__,
)
parser.add_argument(
"--with_scalers",
type=bool,
required=False,
default=False,
help="Scaling",
)

args = parser.parse_args()
Expand All @@ -331,27 +69,50 @@ def run_benchmark(
multiprocessing.freeze_support()

# post-processing args
args.data_n_ts_groups = [int(i) for i in args.data_n_ts_groups.split(",")]
args.data_offset_per_group = [int(i) for i in args.data_offset_per_group.split(",")]
args.data_amplitude_per_group = [int(i) for i in args.data_amplitude_per_group.split(",")]
args.data_trend_gradient_per_group = (
data_n_ts_groups = [int(i) for i in args.data_n_ts_groups.split(",")]
data_offset_per_group = [int(i) for i in args.data_offset_per_group.split(",")]
data_amplitude_per_group = [int(i) for i in args.data_amplitude_per_group.split(",")]
data_trend_gradient_per_group = (
[float(i) for i in args.data_trend_gradient_per_group.split(",")]
if args.data_trend_gradient_per_group is not None
else None
)
args.proportion_break = (
[int(i) for i in args.proportion_break.split(",")] if args.proportion_break is not None else None
)
proportion_break = [int(i) for i in args.proportion_break.split(",")] if args.proportion_break is not None else None

freq = "H"
series_length = 24 * 7 * 15
series_start = pd.to_datetime("2011-01-01 01:00:00")

# Running benchmark
run_benchmark(
model=args.model,
params=args.params,
pipeline = Pipeline(
model_name=args.model,
params_name=args.params,
data_func=args.data_func,
n_ts_groups=args.data_n_ts_groups,
offset_per_group=args.data_offset_per_group,
amplitude_per_group=args.data_amplitude_per_group,
data_trend_gradient_per_group=args.data_trend_gradient_per_group,
gen_func=args.gen_func,
proportion_break=args.proportion_break,
n_ts_groups=data_n_ts_groups,
offset_per_group=data_offset_per_group,
amplitude_per_group=data_amplitude_per_group,
data_trend_gradient_per_group=data_trend_gradient_per_group,
proportion_break=proportion_break,
freq=freq,
series_length=series_length,
series_start=series_start,
base_dir_name=pathlib.Path(__file__).parent.absolute(),
)

start_time = time.time()

# kwargs could contain:
# scalers,
# scaling_levels,
# weighted_loss,
# norm_types,
# norm_modes,
# norm_affines,
# e.g. kwargs = {"scalers": [StandardScaler()], "scaling_levels": ["per_time_series"]}
kwargs = {}

pipeline.run(
save=True, test_percentage=0.25, params_generator_name=args.gen_func, with_scalers=args.with_scalers, **kwargs
)

end_time = time.time()
print("Pipeline execution taken", end_time - start_time)
Loading

0 comments on commit 1e93e5a

Please sign in to comment.