library(here)
library(tidyverse)
library(tidymodels)
library(AmesHousing)
tidymodels_prefer()Tune Models
Split
ames <- make_ames()
set.seed(4595)
data_split <- initial_split(ames, strata = "Sale_Price")
ames_train <- training(data_split)
set.seed(2453)
rs_splits <- vfold_cv(ames_train, strata = "Sale_Price")Recipes
ames_rec <-
recipe(Sale_Price ~ ., data = ames_train) %>%
step_log(Sale_Price, base = 10) %>%
step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>%
step_other(Neighborhood, threshold = .1) %>%
step_dummy(all_nominal()) %>%
step_zv(all_predictors()) %>%
step_ns(Longitude, deg_free = tune("lon")) %>%
step_ns(Latitude, deg_free = tune("lat"))Models
knn_model <-
nearest_neighbor(
mode = "regression",
neighbors = tune("K"),
weight_func = tune(),
dist_power = tune()
) %>%
set_engine("kknn")Workflow & Parameters
ames_wflow <-
workflow() %>%
add_recipe(ames_rec) %>%
add_model(knn_model)
class(ames_wflow)[1] "workflow"
ames_set <-
extract_parameter_set_dials(ames_wflow) %>%
update(K = neighbors(c(1, 50)))
class(ames_set)[1] "parameters" "tbl_df" "tbl" "data.frame"
ames_setCollection of 5 parameters for tuning
identifier type object
K neighbors nparam[+]
weight_func weight_func dparam[+]
dist_power dist_power nparam[+]
lon deg_free nparam[+]
lat deg_free nparam[+]
Grid
Parameter Grids
set.seed(7014)
### Space-filling parameter grids
ames_grid <-
ames_set %>%
grid_max_entropy(size = 10)
ames_grid# A tibble: 10 × 5
K weight_func dist_power lon lat
<int> <chr> <dbl> <int> <int>
1 35 optimal 1.32 8 1
2 35 rank 1.29 3 13
3 21 cos 0.626 1 4
4 4 biweight 0.311 8 4
5 32 triangular 0.165 9 15
6 3 rank 1.86 10 15
7 40 triangular 0.167 11 7
8 12 epanechnikov 1.53 4 7
9 5 rank 0.411 2 7
10 33 triweight 0.511 10 3
Grid Search !
### Perform Grid Search (Not Run)
ames_grid_search <-
tune_grid(
ames_wflow,
resamples = rs_splits,
grid = ames_grid
)data("example_ames_knn")
class(ames_grid_search)[1] "tune_results" "tbl_df" "tbl" "data.frame"
ames_grid_search# Tuning results
# 10-fold cross-validation using stratification
# A tibble: 10 × 4
splits id .metrics .notes
<list> <chr> <list> <list>
1 <split [1978/0]> Fold01 <tibble [20 × 9]> <tibble [0 × 1]>
2 <split [1979/0]> Fold02 <tibble [20 × 9]> <tibble [0 × 1]>
3 <split [1979/0]> Fold03 <tibble [20 × 9]> <tibble [0 × 1]>
4 <split [1979/0]> Fold04 <tibble [20 × 9]> <tibble [0 × 1]>
5 <split [1979/0]> Fold05 <tibble [20 × 9]> <tibble [0 × 1]>
6 <split [1979/0]> Fold06 <tibble [20 × 9]> <tibble [0 × 1]>
7 <split [1979/0]> Fold07 <tibble [20 × 9]> <tibble [0 × 1]>
8 <split [1979/0]> Fold08 <tibble [20 × 9]> <tibble [0 × 1]>
9 <split [1979/0]> Fold09 <tibble [20 × 9]> <tibble [0 × 1]>
10 <split [1981/0]> Fold10 <tibble [20 × 9]> <tibble [0 × 1]>
Finalized
Select Best Tune Result
lowest_rmse <- select_best(ames_grid_search, metric = "rmse")
class(lowest_rmse)[1] "tbl_df" "tbl" "data.frame"
lowest_rmse# A tibble: 1 × 6
K weight_func dist_power lon lat .config
<int> <chr> <dbl> <int> <int> <chr>
1 33 triweight 0.511 10 3 Preprocessor10_Model1
Last Fit
ames_res_last <- workflow(ames_rec, knn_model) |>
finalize_workflow(lowest_rmse) |>
last_fit(split = data_split, metrics = metric_set(rmse))
class(ames_res_last)[1] "last_fit" "resample_results" "tune_results" "tbl_df"
[5] "tbl" "data.frame"
ames_res_last# Resampling results
# Manual resampling
# A tibble: 1 × 6
splits id .metrics .notes .predictions .workflow
<list> <chr> <list> <list> <list> <list>
1 <split [2197/733]> train/test split <tibble> <tibble> <tibble> <workflow>