-
Notifications
You must be signed in to change notification settings - Fork 2
/
toy_example_TODELETE.R
116 lines (100 loc) · 2.35 KB
/
toy_example_TODELETE.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
library(tidyverse)
library(tidymodels)
library(stacks)
wind_raw <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-10-27/wind-turbine.csv')
wind <-
wind_raw %>%
select(
province_territory,
total_project_capacity_mw,
turbine_rated_capacity_kw = turbine_rated_capacity_k_w,
rotor_diameter_m,
hub_height_m,
year = commissioning_date
) %>%
group_by(province_territory) %>%
mutate(
year = as.numeric(year),
province_territory = case_when(
n() < 50 ~ "Other",
TRUE ~ province_territory
)
) %>%
filter(!is.na(year)) %>%
ungroup()
# split into training and testing sets
set.seed(1)
wind_split <- initial_split(wind)
wind_train <- training(wind_split)
wind_test <- testing(wind_split)
# use a 5-fold cross-validation
set.seed(1)
folds <- rsample::vfold_cv(wind_train, v = 5)
# set up a basic recipe
wind_rec <-
recipe(turbine_rated_capacity_kw ~ ., data = wind_train) %>%
step_dummy(all_nominal()) %>%
step_zv(all_predictors())
# define a minimal workflow
wind_wflow <-
workflow() %>%
add_recipe(wind_rec)
metric <- metric_set(rmse)
ctrl_grid <- control_stack_grid()
ctrl_res <- control_stack_resamples()
# create a linear model definition
lin_reg_spec <-
linear_reg() %>%
set_engine("lm")
# add it to a workflow
lin_reg_wflow <-
wind_wflow %>%
add_model(lin_reg_spec)
# fit to the 5-fold cv
set.seed(1)
lin_reg_res <-
fit_resamples(
lin_reg_wflow,
resamples = folds,
metrics = metric,
control = ctrl_res
)
# modify the recipe and use the same linear reg spec
spline_rec <-
wind_rec %>%
step_ns(rotor_diameter_m, deg_free = tune::tune("length"))
# add it to a workflow
spline_wflow <-
workflow() %>%
add_recipe(spline_rec) %>%
add_model(lin_reg_spec)
# tune deg_free and fit to the 5-fold cv
set.seed(1)
spline_res <-
tune_grid(
spline_wflow,
resamples = folds,
metrics = metric,
control = ctrl_grid
)
# define a model using parsnip
svm_spec <-
svm_rbf(
cost = tune(),
rbf_sigma = tune()
) %>%
set_engine("kernlab") %>%
set_mode("regression")
# add it to a workflow
svm_wflow <-
wind_wflow %>%
add_model(svm_spec)
# tune cost and rbf_sigma and fit to the 5-fold cv
set.seed(1)
svm_res <-
tune_grid(
svm_wflow,
resamples = folds,
grid = 5,
control = ctrl_grid
)