library(here)
library(tidyverse)
library(tidymodels)
tidymodels_prefer()
Comparing Models
Data
data(ames)
<- ames %>% mutate(Sale_Price = log10(Sale_Price))
ames
set.seed(502)
<- initial_split(ames, prop = 0.80, strata = Sale_Price)
ames_split <- training(ames_split)
ames_train <- testing(ames_split)
ames_test
<- vfold_cv(ames_train, v = 10) ames_folds
Recipes
<-
basic_rec recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type +
+ Longitude, data = ames_train) %>%
Latitude step_log(Gr_Liv_Area, base = 10) %>%
step_other(Neighborhood, threshold = 0.01) %>%
step_dummy(all_nominal_predictors())
<-
interaction_rec %>%
basic_rec step_interact( ~ Gr_Liv_Area:starts_with("Bldg_Type_") )
<-
spline_rec %>%
interaction_rec step_ns(Latitude, Longitude, deg_free = 50)
# List of Recipes
<-
preproc list(basic = basic_rec,
interact = interaction_rec,
splines = spline_rec
)
Models
<- list(
mspecs lm = linear_reg()
)
class(mspecs$lm)
[1] "linear_reg" "model_spec"
Workflows
<- workflow_set(preproc, mspecs, cross = FALSE)
lm_models class(lm_models)
[1] "workflow_set" "tbl_df" "tbl" "data.frame"
lm_models
# A workflow set/tibble: 3 × 4
wflow_id info option result
<chr> <list> <list> <list>
1 basic_lm <tibble [1 × 4]> <opts[0]> <list [0]>
2 interact_lm <tibble [1 × 4]> <opts[0]> <list [0]>
3 splines_lm <tibble [1 × 4]> <opts[0]> <list [0]>
Fit Workflows
<-
lm_wfs %>%
lm_models workflow_map("fit_resamples",
# Options to `workflow_map()`:
seed = 1101, verbose = TRUE,
# Options to `fit_resamples()`:
resamples = ames_folds,
control = control_resamples(save_pred = T))
i 1 of 3 resampling: basic_lm
✔ 1 of 3 resampling: basic_lm (1.8s)
i 2 of 3 resampling: interact_lm
✔ 2 of 3 resampling: interact_lm (1.8s)
i 3 of 3 resampling: splines_lm
✔ 3 of 3 resampling: splines_lm (3.2s)
class(lm_wfs)
[1] "workflow_set" "tbl_df" "tbl" "data.frame"
lm_wfs
# A workflow set/tibble: 3 × 4
wflow_id info option result
<chr> <list> <list> <list>
1 basic_lm <tibble [1 × 4]> <opts[2]> <rsmp[+]>
2 interact_lm <tibble [1 × 4]> <opts[2]> <rsmp[+]>
3 splines_lm <tibble [1 × 4]> <opts[2]> <rsmp[+]>
Metric
Metric for each workflows
<- collect_metrics(lm_wfs)
lm_wfs_metrics lm_wfs_metrics
# A tibble: 6 × 9
wflow_id .config preproc model .metric .estimator mean n std_err
<chr> <chr> <chr> <chr> <chr> <chr> <dbl> <int> <dbl>
1 basic_lm Preprocesso… recipe line… rmse standard 0.0804 10 0.00313
2 basic_lm Preprocesso… recipe line… rsq standard 0.793 10 0.0122
3 interact_lm Preprocesso… recipe line… rmse standard 0.0800 10 0.00301
4 interact_lm Preprocesso… recipe line… rsq standard 0.795 10 0.0114
5 splines_lm Preprocesso… recipe line… rmse standard 0.0786 10 0.00288
6 splines_lm Preprocesso… recipe line… rsq standard 0.802 10 0.0120