Tune Models

library(here)
library(tidyverse)
library(tidymodels)
library(AmesHousing)

tidymodels_prefer()

Split

ames <- make_ames()

set.seed(4595)
data_split <- initial_split(ames, strata = "Sale_Price")

ames_train <- training(data_split)

set.seed(2453)
rs_splits <- vfold_cv(ames_train, strata = "Sale_Price")

Recipes

ames_rec <-
  recipe(Sale_Price ~ ., data = ames_train) %>%
  step_log(Sale_Price, base = 10) %>%
  step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>%
  step_other(Neighborhood, threshold = .1)  %>%
  step_dummy(all_nominal()) %>%
  step_zv(all_predictors()) %>%
  step_ns(Longitude, deg_free = tune("lon")) %>%
  step_ns(Latitude, deg_free = tune("lat"))

Models

knn_model <-
  nearest_neighbor(
    mode = "regression",
    neighbors = tune("K"),
    weight_func = tune(),
    dist_power = tune()
  ) %>%
  set_engine("kknn")

Workflow & Parameters

ames_wflow <-
  workflow() %>%
  add_recipe(ames_rec) %>%
  add_model(knn_model)

class(ames_wflow)
[1] "workflow"
ames_set <-
  extract_parameter_set_dials(ames_wflow) %>%
  update(K = neighbors(c(1, 50)))

class(ames_set)
[1] "parameters" "tbl_df"     "tbl"        "data.frame"
ames_set
Collection of 5 parameters for tuning

  identifier        type    object
           K   neighbors nparam[+]
 weight_func weight_func dparam[+]
  dist_power  dist_power nparam[+]
         lon    deg_free nparam[+]
         lat    deg_free nparam[+]

Grid

Parameter Grids

set.seed(7014)

### Space-filling parameter grids
ames_grid <-
  ames_set %>%
  grid_max_entropy(size = 10)

ames_grid
# A tibble: 10 × 5
       K weight_func  dist_power   lon   lat
   <int> <chr>             <dbl> <int> <int>
 1    35 optimal           1.32      8     1
 2    35 rank              1.29      3    13
 3    21 cos               0.626     1     4
 4     4 biweight          0.311     8     4
 5    32 triangular        0.165     9    15
 6     3 rank              1.86     10    15
 7    40 triangular        0.167    11     7
 8    12 epanechnikov      1.53      4     7
 9     5 rank              0.411     2     7
10    33 triweight         0.511    10     3

Finalized

Select Best Tune Result

lowest_rmse <- select_best(ames_grid_search, metric = "rmse")
class(lowest_rmse)
[1] "tbl_df"     "tbl"        "data.frame"
lowest_rmse
# A tibble: 1 × 6
      K weight_func dist_power   lon   lat .config              
  <int> <chr>            <dbl> <int> <int> <chr>                
1    33 triweight        0.511    10     3 Preprocessor10_Model1

Last Fit

ames_res_last <- workflow(ames_rec, knn_model) |> 
  finalize_workflow(lowest_rmse) |> 
  last_fit(split = data_split, metrics = metric_set(rmse))


class(ames_res_last)
[1] "last_fit"         "resample_results" "tune_results"     "tbl_df"          
[5] "tbl"              "data.frame"      
ames_res_last
# Resampling results
# Manual resampling 
# A tibble: 1 × 6
  splits             id               .metrics .notes   .predictions .workflow 
  <list>             <chr>            <list>   <list>   <list>       <list>    
1 <split [2197/733]> train/test split <tibble> <tibble> <tibble>     <workflow>