Comparing Models

library(here)
library(tidyverse)
library(tidymodels)

tidymodels_prefer()

Compare model with resampling

Data

data(ames)
ames <- ames %>% mutate(Sale_Price = log10(Sale_Price))

set.seed(502)
ames_split <- initial_split(ames, prop = 0.80, strata = Sale_Price)
ames_train <- training(ames_split)
ames_test  <-  testing(ames_split)

ames_folds <- vfold_cv(ames_train, v = 10)

Recipes

basic_rec <- 
  recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type + 
           Latitude + Longitude, data = ames_train) %>%
  step_log(Gr_Liv_Area, base = 10) %>% 
  step_other(Neighborhood, threshold = 0.01) %>% 
  step_dummy(all_nominal_predictors())

interaction_rec <- 
  basic_rec %>% 
  step_interact( ~ Gr_Liv_Area:starts_with("Bldg_Type_") ) 

spline_rec <- 
  interaction_rec %>% 
  step_ns(Latitude, Longitude, deg_free = 50)

# List of Recipes 
preproc <- 
  list(basic = basic_rec, 
       interact = interaction_rec, 
       splines = spline_rec
  )

Models

mspecs <- list(
  lm = linear_reg()
)

class(mspecs$lm)

[1] "linear_reg" "model_spec"

Workflows

lm_models <- workflow_set(preproc, mspecs, cross = FALSE)
class(lm_models)

[1] "workflow_set" "tbl_df"       "tbl"          "data.frame"

lm_models

# A workflow set/tibble: 3 × 4
  wflow_id    info             option    result    
  <chr>       <list>           <list>    <list>    
1 basic_lm    <tibble [1 × 4]> <opts[0]> <list [0]>
2 interact_lm <tibble [1 × 4]> <opts[0]> <list [0]>
3 splines_lm  <tibble [1 × 4]> <opts[0]> <list [0]>

Fit Workflows

lm_wfs <- 
  lm_models %>% 
  workflow_map("fit_resamples", 
               # Options to `workflow_map()`: 
               seed = 1101, verbose = TRUE,
               # Options to `fit_resamples()`: 
               resamples = ames_folds, 
               control = control_resamples(save_pred = T))

i 1 of 3 resampling: basic_lm

✔ 1 of 3 resampling: basic_lm (1.8s)

i 2 of 3 resampling: interact_lm

✔ 2 of 3 resampling: interact_lm (1.8s)

i 3 of 3 resampling: splines_lm

✔ 3 of 3 resampling: splines_lm (3.2s)

class(lm_wfs)

[1] "workflow_set" "tbl_df"       "tbl"          "data.frame"

lm_wfs

# A workflow set/tibble: 3 × 4
  wflow_id    info             option    result   
  <chr>       <list>           <list>    <list>   
1 basic_lm    <tibble [1 × 4]> <opts[2]> <rsmp[+]>
2 interact_lm <tibble [1 × 4]> <opts[2]> <rsmp[+]>
3 splines_lm  <tibble [1 × 4]> <opts[2]> <rsmp[+]>

Metric

Metric for each workflows

lm_wfs_metrics <- collect_metrics(lm_wfs)
lm_wfs_metrics

# A tibble: 6 × 9
  wflow_id    .config      preproc model .metric .estimator   mean     n std_err
  <chr>       <chr>        <chr>   <chr> <chr>   <chr>       <dbl> <int>   <dbl>
1 basic_lm    Preprocesso… recipe  line… rmse    standard   0.0804    10 0.00313
2 basic_lm    Preprocesso… recipe  line… rsq     standard   0.793     10 0.0122 
3 interact_lm Preprocesso… recipe  line… rmse    standard   0.0800    10 0.00301
4 interact_lm Preprocesso… recipe  line… rsq     standard   0.795     10 0.0114 
5 splines_lm  Preprocesso… recipe  line… rmse    standard   0.0786    10 0.00288
6 splines_lm  Preprocesso… recipe  line… rsq     standard   0.802     10 0.0120