library(yardstick)
#>
#> Attaching package: 'yardstick'
#> The following object is masked from 'package:readr':
#>
#> spec
DTA (Yardstick)
data("pathology")
glimpse(pathology)
#> Rows: 344
#> Columns: 2
#> $ pathology <fct> abnorm, abnorm, abnorm, abnorm, abnorm, abnorm, abnorm, abno…
#> $ scan <fct> abnorm, abnorm, abnorm, abnorm, abnorm, abnorm, abnorm, abno…
Confusion Matric
<- conf_mat(pathology, truth = pathology, estimate = scan)
cm
cm#> Truth
#> Prediction abnorm norm
#> abnorm 231 32
#> norm 27 54
summary(cm)
#> # A tibble: 13 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 accuracy binary 0.828
#> 2 kap binary 0.534
#> 3 sens binary 0.895
#> 4 spec binary 0.628
#> 5 ppv binary 0.878
#> 6 npv binary 0.667
#> 7 mcc binary 0.534
#> 8 j_index binary 0.523
#> 9 bal_accuracy binary 0.762
#> 10 detection_prevalence binary 0.765
#> 11 precision binary 0.878
#> 12 recall binary 0.895
#> 13 f_meas binary 0.887
Plot Bar Chart
autoplot(cm, type = "mosaic")
autoplot(cm, type = "heatmap")
<- pathology |>
pathology_cell count(pathology, scan) |>
mutate(prop = n/sum(n))
pathology_cell#> pathology scan n prop
#> 1 abnorm abnorm 231 0.67151163
#> 2 abnorm norm 27 0.07848837
#> 3 norm abnorm 32 0.09302326
#> 4 norm norm 54 0.15697674
|>
pathology_cell ggplot(aes(pathology, prop, fill = scan, color = scan)) +
geom_col(alpha = 0.5, position = "fill")
Metric: Default
|> metrics(truth = pathology, estimate = scan)
pathology #> # A tibble: 2 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 accuracy binary 0.828
#> 2 kap binary 0.534
Metric Set
<- metric_set(accuracy, sens, spec, ppv, npv) class_metrics_1
|> class_metrics_1(truth = pathology, estimate = scan)
pathology #> # A tibble: 5 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 accuracy binary 0.828
#> 2 sens binary 0.895
#> 3 spec binary 0.628
#> 4 ppv binary 0.878
#> 5 npv binary 0.667
Custom Class Metric
Custom LR+ & LR-
From: How to implemen custom metric set
Formular
The positive likelihood ratio is calculated as:
\[ {\displaystyle {\text{LR}}+={\frac {\text{sensitivity}}{1-{\text{specificity}}}}} \]
\[ {\displaystyle {\text{LR}}+={\frac {\text{TP / (TP + FN)}}{\text{FP / (FP + TN)}}}} \]
The negative likelihood ratio is calculated as:
\[ {\displaystyle {\text{LR}}-={\frac {1-{\text{sensitivity}}}{\text{specificity}}}} \]
\[ {\displaystyle {\text{LR}}-={\frac {\text{FN / (TP + FN)}}{\text{TN / (FP + TN)}}}} \]
Example
<- table(pathology$scan, pathology$pathology)
pathology_xtab
pathology_xtab#>
#> abnorm norm
#> abnorm 231 32
#> norm 27 54
Helpers
# Logic for `event_level`
<- function(xtab, event_level) {
event_col if (identical(event_level, "first")) {
colnames(xtab)[[1]]
else {
} colnames(xtab)[[2]]
} }
<- function(metric_dispatcher, x, estimator, call) {
finalize_estimator_internal.lr_pos
validate_estimator(estimator, estimator_override = "binary")
if (!is.null(estimator)) {
return(estimator)
}
<- levels(x)
lvls if (length(lvls) > 2) {
stop("A multiclass `truth` input was provided, but only `binary` is supported.")
} "binary"
}
Implement
LR Pos
<- function(truth, estimate, estimator, event_level) {
lr_pos_impl <- table(estimate, truth)
xtab # Rather than implement the actual method here, we rely on
# an *_estimator_impl() function that can handle binary
# and multiclass cases
lr_pos_estimator_impl(xtab, estimator, event_level)
}
# This function switches between binary and multiclass implementations
<- function(data, estimator, event_level) {
lr_pos_estimator_impl if(estimator == "binary") {
lr_pos_binary(data, event_level)
else {
} # Encapsulates the macro, macro weighted, and micro cases
# TODO
} }
LR Neg
<- function(truth, estimate, estimator, event_level) {
lr_neg_impl <- table(estimate, truth)
xtab # Rather than implement the actual method here, we rely on
# an *_estimator_impl() function that can handle binary
# and multiclass cases
lr_neg_estimator_impl(xtab, estimator, event_level)
}
# This function switches between binary and multiclass implementations
<- function(data, estimator, event_level) {
lr_neg_estimator_impl if(estimator == "binary") {
lr_neg_binary(data, event_level)
else {
} # Encapsulates the macro, macro weighted, and micro cases
# TODO
} }
Binary Implementation
LR Pos
<- function(data, event_level) {
lr_pos_binary <- event_col(data, event_level)
col <- setdiff(colnames(data), col)
col2
<- data[col, col]
tp <- data[col2, col2]
tn <- data[col, col2]
fp <- data[col2, col]
fn # list(tp = tp, tn = tn, fp = fp, fn = fn)
/ (tp + fn)) / (fp / (fp + tn))
(tp
}
lr_pos_binary(pathology_xtab, event_level = "first")
#> [1] 2.40625
LR Neg
<- function(data, event_level) {
lr_neg_binary <- event_col(data, event_level)
col <- setdiff(colnames(data), col)
col2
<- data[col, col]
tp <- data[col2, col2]
tn <- data[col, col2]
fp <- data[col2, col]
fn # list(tp = tp, tn = tn, fp = fp, fn = fn)
/ (tp + fn)) / (tn / (fp + tn))
(fn
}
lr_neg_binary(pathology_xtab, event_level = "first")
#> [1] 0.1666667
# Checking
pathology_xtab#>
#> abnorm norm
#> abnorm 231 32
#> norm 27 54
colnames(pathology_xtab)
#> [1] "abnorm" "norm"
# TP
"abnorm", "abnorm"]
pathology_xtab[#> [1] 231
# TN
"norm", "norm"]
pathology_xtab[#> [1] 54
# FP
"abnorm", "norm"]
pathology_xtab[#> [1] 32
# FN
"norm", "abnorm"]
pathology_xtab[#> [1] 27
Multiclass Implementation
[TODO]
Vec implement
LR Pos
<- function(truth,
lr_pos_vec
estimate,estimator = NULL,
na_rm = TRUE,
case_weights = NULL,
event_level = "first",
...) {# calls finalize_estimator_internal() internally
<- finalize_estimator(truth, estimator, metric_class = "lr_pos")
estimator
check_class_metric(truth, estimate, case_weights, estimator)
if (na_rm) {
<- yardstick_remove_missing(truth, estimate, case_weights)
result
<- result$truth
truth <- result$estimate
estimate <- result$case_weights
case_weights else if (yardstick_any_missing(truth, estimate, case_weights)) {
} return(NA_real_)
}
lr_pos_impl(truth, estimate, estimator, event_level)
}
lr_pos_vec(pathology$pathology, pathology$scan)
#> [1] 2.40625
LR Neg
<- function(truth,
lr_neg_vec
estimate,estimator = NULL,
na_rm = TRUE,
case_weights = NULL,
event_level = "first",
...) {# calls finalize_estimator_internal() internally
<- finalize_estimator(truth, estimator, metric_class = "lr_neg")
estimator
check_class_metric(truth, estimate, case_weights, estimator)
if (na_rm) {
<- yardstick_remove_missing(truth, estimate, case_weights)
result
<- result$truth
truth <- result$estimate
estimate <- result$case_weights
case_weights else if (yardstick_any_missing(truth, estimate, case_weights)) {
} return(NA_real_)
}
lr_neg_impl(truth, estimate, estimator, event_level)
}
lr_neg_vec(pathology$pathology, pathology$scan)
#> [1] 0.1666667
DF implement
# LR Pos
<- function(data, ...) {
lr_pos UseMethod("lr_pos")
}
<- new_class_metric(lr_pos, direction = "maximize")
lr_pos
# LR Neg
<- function(data, ...) {
lr_neg UseMethod("lr_neg")
}
<- new_class_metric(lr_neg, direction = "minimize") lr_neg
<- function(data,
lr_pos.data.frame
truth,
estimate,estimator = NULL,
na_rm = TRUE,
case_weights = NULL,
event_level = "first",
...) {class_metric_summarizer(
name = "lr_pos",
fn = lr_pos_vec,
data = data,
truth = !!rlang::enquo(truth),
estimate = !!rlang::enquo(estimate),
estimator = estimator,
na_rm = na_rm,
case_weights = !!rlang::enquo(case_weights),
event_level = event_level
) }
<- function(data,
lr_neg.data.frame
truth,
estimate,estimator = NULL,
na_rm = TRUE,
case_weights = NULL,
event_level = "first",
...) {class_metric_summarizer(
name = "lr_neg",
fn = lr_neg_vec,
data = data,
truth = !!rlang::enquo(truth),
estimate = !!rlang::enquo(estimate),
estimator = estimator,
na_rm = na_rm,
case_weights = !!rlang::enquo(case_weights),
event_level = event_level
) }
Using lr_pos()
lr_pos(pathology, truth = pathology, estimate = scan)
#> # A tibble: 1 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 lr_pos binary 2.41
lr_pos_vec(truth = pathology$pathology, estimate = pathology$scan)
#> [1] 2.40625
Using lr_neg()
lr_neg(pathology, truth = pathology, estimate = scan)
#> # A tibble: 1 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 lr_neg binary 0.167
lr_neg_vec(truth = pathology$pathology, estimate = pathology$scan)
#> [1] 0.1666667
Using with metric_set()
<- metric_set(accuracy, sens, spec, lr_pos, lr_neg)
class_metrics_2
class_metrics_2#> # A tibble: 5 × 3
#> metric class direction
#> <chr> <chr> <chr>
#> 1 accuracy class_metric maximize
#> 2 sens class_metric maximize
#> 3 spec class_metric maximize
#> 4 lr_pos class_metric maximize
#> 5 lr_neg class_metric minimize
conf_mat(pathology, truth = pathology, estimate = scan)
#> Truth
#> Prediction abnorm norm
#> abnorm 231 32
#> norm 27 54
class_metrics_2(pathology, truth = pathology, estimate = scan)
#> # A tibble: 5 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 accuracy binary 0.828
#> 2 sens binary 0.895
#> 3 spec binary 0.628
#> 4 lr_pos binary 2.41
#> 5 lr_neg binary 0.167
Check LR+
0.8953488 / (1 - 0.6279070) # LR+ = Sens / (1-Spec)
#> [1] 2.40625
Check LR-
1 - 0.8953488) / 0.6279070 # LR- = (1-Sens) / Spec
(#> [1] 0.1666667