-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
102 lines (81 loc) · 3.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from typing import Iterable
import pandas as pd
from evaluation import utils
from models import regression
from output import render
import os
# Dataset info
dataset_info = {'name': 'Concrete Compressive Strength 2',
'type': 'Regression',
'target': 'Concrete compressive strength(MPa, megapascals) ',
'split': 0.2,
'path': "Concrete_Data 2.xls",
'source': """Prof. I-Cheng Yeh
Department of Information Management
Chung-Hua University,
Hsin Chu, Taiwan 30067, R.O.C.
e-mail:icyeh@chu.edu.tw
TEL:886-3-5186511"""}
models = [regression.GaussianProcessRegressor(),
regression.KNeighborsRegressor(),
regression.Ridge(),
regression.LinearRegression(),
regression.MLPRegressor(),
regression.PolynomialRegression(),
regression.SVR(),
regression.DecisionTreeRegressor(),
regression.Lasso(),
regression.RandomForestRegressor()
]
dataset_dir = "data/"
def main():
# Test with 1 target
dataset = pd.read_excel(dataset_dir + dataset_info['path'])
dataset_info['size'] = len(dataset)
data = utils.load_Xy(dataset, dataset_info['target'], dataset_info['split'])
# Using a temporary variable to hold the best performing model
# we'll define the best performing model by the highest r2
top_model = None
best_r2 = None
scores=[]
for model in models:
score = utils.regression_train_and_test(model, *data)
# Check for best R2 score
if (not best_r2) or (score['r2'] > best_r2):
top_model = model
best_r2 = score['r2']
scores.append(score)
scores.sort(key=lambda x: x['mse'])
render.render_results_html(dataset_info, scores)
stats, results = utils.regression_parametric_study(top_model, dataset, dataset_info['target'], c1=[0.1, 0.2, 0.3, 0.4, 0.5], c2=[6, 9, 12, 15, 18, 21, 24, 27, 30, 33])
render.plot_parametric_graphs(stats, results, dataset_info['target'], 'results', True)
os.makedirs(os.path.join('results', "custom_parametric"), exist_ok=True)
results, values = utils.custom_parametric(top_model, dataset,{dataset.columns[1]: range(0, 10), dataset.columns[2]: range(9, 12)}, dataset_info['target'])
df = pd.DataFrame(values, columns=dataset.drop(dataset_info['target'], axis=1).columns)
df[dataset_info['target']] = results
df.to_csv(os.path.join('results', "custom_parametric", 'custom_parametric_data.csv'))
def two_target():
# Test with 2 targets
dataset_info['target'] = ['Concrete compressive strength(MPa, megapascals) ', 'Age (day)']
dataset = pd.read_excel(dataset_dir + dataset_info['path'])
dataset_info['size'] = len(dataset)
data = utils.load_Xy(dataset, dataset_info['target'], dataset_info['split'])
# Using a temporary variable to hold the best performing model
# we'll define the best performing model by the highest r2
top_model = None
best_r2 = None
scores=[]
for model in models:
score = utils.regression_train_and_test(model, *data)
# Check for best R2 score
if (not best_r2) or (score['r2'] > best_r2):
top_model = model
best_r2 = score['r2']
scores.append(score)
scores.sort(key=lambda x: x['mse'])
render.render_results_html(dataset_info, scores, f"results/twoTargets")
stats, results = utils.regression_parametric_study(top_model, dataset, dataset_info['target'])
render.plot_parametric_graphs(stats, results, dataset_info['target'], "results/twoTargets", True)
if __name__ == "__main__":
main()
# two_target()