library(here)
library(tidyverse)
library(tidymodels)
tidymodels_prefer()Comparing Models
Data
data(ames)
ames <- ames %>% mutate(Sale_Price = log10(Sale_Price))
set.seed(502)
ames_split <- initial_split(ames, prop = 0.80, strata = Sale_Price)
ames_train <- training(ames_split)
ames_test <- testing(ames_split)
ames_folds <- vfold_cv(ames_train, v = 10)Recipes
basic_rec <-
recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type +
Latitude + Longitude, data = ames_train) %>%
step_log(Gr_Liv_Area, base = 10) %>%
step_other(Neighborhood, threshold = 0.01) %>%
step_dummy(all_nominal_predictors())
interaction_rec <-
basic_rec %>%
step_interact( ~ Gr_Liv_Area:starts_with("Bldg_Type_") )
spline_rec <-
interaction_rec %>%
step_ns(Latitude, Longitude, deg_free = 50)
# List of Recipes
preproc <-
list(basic = basic_rec,
interact = interaction_rec,
splines = spline_rec
)Models
mspecs <- list(
lm = linear_reg()
)
class(mspecs$lm)[1] "linear_reg" "model_spec"
Workflows
lm_models <- workflow_set(preproc, mspecs, cross = FALSE)
class(lm_models)[1] "workflow_set" "tbl_df" "tbl" "data.frame"
lm_models# A workflow set/tibble: 3 × 4
wflow_id info option result
<chr> <list> <list> <list>
1 basic_lm <tibble [1 × 4]> <opts[0]> <list [0]>
2 interact_lm <tibble [1 × 4]> <opts[0]> <list [0]>
3 splines_lm <tibble [1 × 4]> <opts[0]> <list [0]>
Fit Workflows
lm_wfs <-
lm_models %>%
workflow_map("fit_resamples",
# Options to `workflow_map()`:
seed = 1101, verbose = TRUE,
# Options to `fit_resamples()`:
resamples = ames_folds,
control = control_resamples(save_pred = T))i 1 of 3 resampling: basic_lm
✔ 1 of 3 resampling: basic_lm (1.8s)
i 2 of 3 resampling: interact_lm
✔ 2 of 3 resampling: interact_lm (1.8s)
i 3 of 3 resampling: splines_lm
✔ 3 of 3 resampling: splines_lm (3.2s)
class(lm_wfs)[1] "workflow_set" "tbl_df" "tbl" "data.frame"
lm_wfs# A workflow set/tibble: 3 × 4
wflow_id info option result
<chr> <list> <list> <list>
1 basic_lm <tibble [1 × 4]> <opts[2]> <rsmp[+]>
2 interact_lm <tibble [1 × 4]> <opts[2]> <rsmp[+]>
3 splines_lm <tibble [1 × 4]> <opts[2]> <rsmp[+]>
Metric
Metric for each workflows
lm_wfs_metrics <- collect_metrics(lm_wfs)
lm_wfs_metrics# A tibble: 6 × 9
wflow_id .config preproc model .metric .estimator mean n std_err
<chr> <chr> <chr> <chr> <chr> <chr> <dbl> <int> <dbl>
1 basic_lm Preprocesso… recipe line… rmse standard 0.0804 10 0.00313
2 basic_lm Preprocesso… recipe line… rsq standard 0.793 10 0.0122
3 interact_lm Preprocesso… recipe line… rmse standard 0.0800 10 0.00301
4 interact_lm Preprocesso… recipe line… rsq standard 0.795 10 0.0114
5 splines_lm Preprocesso… recipe line… rmse standard 0.0786 10 0.00288
6 splines_lm Preprocesso… recipe line… rsq standard 0.802 10 0.0120