library(here)
library(tidyverse)
library(tidymodels)
library(AmesHousing)
tidymodels_prefer()
Tune Models
Split
<- make_ames()
ames
set.seed(4595)
<- initial_split(ames, strata = "Sale_Price")
data_split
<- training(data_split)
ames_train
set.seed(2453)
<- vfold_cv(ames_train, strata = "Sale_Price") rs_splits
Recipes
<-
ames_rec recipe(Sale_Price ~ ., data = ames_train) %>%
step_log(Sale_Price, base = 10) %>%
step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>%
step_other(Neighborhood, threshold = .1) %>%
step_dummy(all_nominal()) %>%
step_zv(all_predictors()) %>%
step_ns(Longitude, deg_free = tune("lon")) %>%
step_ns(Latitude, deg_free = tune("lat"))
Models
<-
knn_model nearest_neighbor(
mode = "regression",
neighbors = tune("K"),
weight_func = tune(),
dist_power = tune()
%>%
) set_engine("kknn")
Workflow & Parameters
<-
ames_wflow workflow() %>%
add_recipe(ames_rec) %>%
add_model(knn_model)
class(ames_wflow)
[1] "workflow"
<-
ames_set extract_parameter_set_dials(ames_wflow) %>%
update(K = neighbors(c(1, 50)))
class(ames_set)
[1] "parameters" "tbl_df" "tbl" "data.frame"
ames_set
Collection of 5 parameters for tuning
identifier type object
K neighbors nparam[+]
weight_func weight_func dparam[+]
dist_power dist_power nparam[+]
lon deg_free nparam[+]
lat deg_free nparam[+]
Grid
Parameter Grids
set.seed(7014)
### Space-filling parameter grids
<-
ames_grid %>%
ames_set grid_max_entropy(size = 10)
ames_grid
# A tibble: 10 × 5
K weight_func dist_power lon lat
<int> <chr> <dbl> <int> <int>
1 35 optimal 1.32 8 1
2 35 rank 1.29 3 13
3 21 cos 0.626 1 4
4 4 biweight 0.311 8 4
5 32 triangular 0.165 9 15
6 3 rank 1.86 10 15
7 40 triangular 0.167 11 7
8 12 epanechnikov 1.53 4 7
9 5 rank 0.411 2 7
10 33 triweight 0.511 10 3
Grid Search !
### Perform Grid Search (Not Run)
<-
ames_grid_search tune_grid(
ames_wflow,resamples = rs_splits,
grid = ames_grid
)
data("example_ames_knn")
class(ames_grid_search)
[1] "tune_results" "tbl_df" "tbl" "data.frame"
ames_grid_search
# Tuning results
# 10-fold cross-validation using stratification
# A tibble: 10 × 4
splits id .metrics .notes
<list> <chr> <list> <list>
1 <split [1978/0]> Fold01 <tibble [20 × 9]> <tibble [0 × 1]>
2 <split [1979/0]> Fold02 <tibble [20 × 9]> <tibble [0 × 1]>
3 <split [1979/0]> Fold03 <tibble [20 × 9]> <tibble [0 × 1]>
4 <split [1979/0]> Fold04 <tibble [20 × 9]> <tibble [0 × 1]>
5 <split [1979/0]> Fold05 <tibble [20 × 9]> <tibble [0 × 1]>
6 <split [1979/0]> Fold06 <tibble [20 × 9]> <tibble [0 × 1]>
7 <split [1979/0]> Fold07 <tibble [20 × 9]> <tibble [0 × 1]>
8 <split [1979/0]> Fold08 <tibble [20 × 9]> <tibble [0 × 1]>
9 <split [1979/0]> Fold09 <tibble [20 × 9]> <tibble [0 × 1]>
10 <split [1981/0]> Fold10 <tibble [20 × 9]> <tibble [0 × 1]>
Finalized
Select Best Tune Result
<- select_best(ames_grid_search, metric = "rmse")
lowest_rmse class(lowest_rmse)
[1] "tbl_df" "tbl" "data.frame"
lowest_rmse
# A tibble: 1 × 6
K weight_func dist_power lon lat .config
<int> <chr> <dbl> <int> <int> <chr>
1 33 triweight 0.511 10 3 Preprocessor10_Model1
Last Fit
<- workflow(ames_rec, knn_model) |>
ames_res_last finalize_workflow(lowest_rmse) |>
last_fit(split = data_split, metrics = metric_set(rmse))
class(ames_res_last)
[1] "last_fit" "resample_results" "tune_results" "tbl_df"
[5] "tbl" "data.frame"
ames_res_last
# Resampling results
# Manual resampling
# A tibble: 1 × 6
splits id .metrics .notes .predictions .workflow
<list> <chr> <list> <list> <list> <list>
1 <split [2197/733]> train/test split <tibble> <tibble> <tibble> <workflow>