Using data & diagram from r-causal: chapter 2
library (gtsummary)
library (corrr)
net_data <- read_rds (here ("data/causal/net_data.Rds" ))
Explore
Exposure: net
Outcome: malaria_risk
[1] "id" "net" "net_num"
[4] "malaria_risk" "income" "health"
[7] "household" "eligible" "temperature"
[10] "insecticide_resistance"
Data summary
Name
net_data
Number of rows
1752
Number of columns
10
_______________________
Column type frequency:
logical
2
numeric
8
________________________
Group variables
None
Variable type: logical
net
0
1
0.26
FAL: 1298, TRU: 454
eligible
0
1
0.02
FAL: 1716, TRU: 36
Variable type: numeric
id
0
1
876.50
505.90
1.0
438.75
876.5
1314.25
1752.0
▇▇▇▇▇
net_num
0
1
0.26
0.44
0.0
0.00
0.0
1.00
1.0
▇▁▁▁▃
malaria_risk
0
1
39.67
15.37
10.0
28.00
36.0
50.00
90.0
▃▇▅▂▁
income
0
1
897.76
191.24
330.0
764.75
893.0
1031.00
1484.0
▁▅▇▅▁
health
0
1
50.21
19.35
5.0
37.00
50.0
64.00
100.0
▂▆▇▅▁
household
0
1
2.97
1.41
1.0
2.00
3.0
4.00
9.0
▇▇▂▁▁
temperature
0
1
23.93
4.10
15.6
20.90
23.8
27.10
32.2
▃▇▇▇▃
insecticide_resistance
0
1
50.08
14.44
5.0
40.00
50.0
60.00
95.0
▁▅▇▃▁
net_data_dep_vars <- net_data |>
select (! c (malaria_risk, id, net_num)) |> names ()
Correlation
net_data_cor <- net_data |>
corrr:: correlate (quiet = T)
corrr:: rplot (shave (net_data_cor), print_cor = TRUE )
network_plot (net_data_cor)
Outcome: malaria_risk
(LM)
Univar LM
Code
net_data_tbl.malaria.uv <- net_data |>
select (all_of (net_data_dep_vars), malaria_risk) |>
tbl_uvregression (
method = lm,
y = malaria_risk,
pvalue_fun = ~ style_pvalue (.x, digits = 3 )
) |>
bold_p () |>
bold_labels ()
net_data_tbl.malaria.uv
net
1,752
FALSE
—
—
TRUE
-16
-18, -15
<0.001
income
1,752
-0.06
-0.07, -0.06
<0.001
health
1,752
-0.45
-0.48, -0.41
<0.001
household
1,752
-0.09
-0.60, 0.43
0.740
eligible
1,752
FALSE
—
—
TRUE
20
15, 25
<0.001
temperature
1,752
0.62
0.45, 0.79
<0.001
insecticide_resistance
1,752
0.21
0.16, 0.25
<0.001
Multivar LM
Using best subset with leaps
library (leaps)
source (here ("R/leaps-extra.R" ))
net_data_mod <- net_data |> select (all_of (net_data_dep_vars), malaria_risk)
net_data_malaria_rss.fit <- leaps:: regsubsets (
malaria_risk ~ .,
data = net_data_mod,
force.in = 1 # Force to include `net` as predictors
)
# broom::tidy(net_data_malaria_rss.fit)
Code
library (patchwork)
library (latex2exp)
p_cp <- autoplot (net_data_malaria_rss.fit, res = "mallows_cp" ) +
labs (y = TeX ("$C_p$" ))
p_bic <- autoplot (net_data_malaria_rss.fit, res = "BIC" )
p_adj_rsq <- autoplot (net_data_malaria_rss.fit, res = "adj.r.squared" ) +
labs (y = TeX ("Adjusted $R^2$" ))
p_cp + p_bic + p_adj_rsq +
plot_annotation (title = "Best Subset Selection at Each Model Sizes" ,
subtitle = TeX ("Estimate test error by $C_p$, BIC, and Adjusted $R^2$" ))
plot (net_data_malaria_rss.fit, scale = "bic" )
Fit Best Multi LM
net_data_malaria_lm.fit <- lm (
malaria_risk ~ net + income + health + temperature + insecticide_resistance,
data = net_data_mod
)
Code
net_data_tbl.malaria.mv <- net_data_malaria_lm.fit |>
tbl_regression (
pvalue_fun = ~ style_pvalue (.x, digits = 3 )
) |>
bold_p () |>
bold_labels ()
net_data_tbl.malaria.mv
net
FALSE
—
—
TRUE
-12
-13, -12
<0.001
income
-0.08
-0.08, -0.07
<0.001
health
0.14
0.12, 0.15
<0.001
temperature
1.0
0.98, 1.1
<0.001
insecticide_resistance
0.22
0.20, 0.23
<0.001
Summary
Outcome = malaria_risk
Code
net_data_tbl.malaria.uv.mv <- tbl_merge (
list (net_data_tbl.malaria.uv, net_data_tbl.malaria.mv),
tab_spanner = c ("**Univar**" , "**Multivar**" )
)
net_data_tbl.malaria.uv.mv
N
Beta
95% CI
p-value
Beta
95% CI
p-value
net
1,752
FALSE
—
—
—
—
TRUE
-16
-18, -15
<0.001
-12
-13, -12
<0.001
income
1,752
-0.06
-0.07, -0.06
<0.001
-0.08
-0.08, -0.07
<0.001
health
1,752
-0.45
-0.48, -0.41
<0.001
0.14
0.12, 0.15
<0.001
household
1,752
-0.09
-0.60, 0.43
0.740
eligible
1,752
FALSE
—
—
TRUE
20
15, 25
<0.001
temperature
1,752
0.62
0.45, 0.79
<0.001
1.0
0.98, 1.1
<0.001
insecticide_resistance
1,752
0.21
0.16, 0.25
<0.001
0.22
0.20, 0.23
<0.001
Outcome: net
(LR)
Univar LogReg
Code
net_data |>
select (all_of (net_data_dep_vars), malaria_risk) |>
tbl_uvregression (
method = glm,
y = net,
method.args = list (family = binomial),
exponentiate = TRUE ,
pvalue_fun = ~ style_pvalue (.x, digits = 3 )
) |>
bold_p () |>
bold_labels ()
income
1,752
1.00
1.00, 1.00
<0.001
health
1,752
1.01
1.01, 1.02
<0.001
household
1,752
1.09
1.01, 1.17
0.029
eligible
1,752
FALSE
—
—
TRUE
2.94
1.51, 5.73
0.001
temperature
1,752
0.98
0.96, 1.01
0.163
insecticide_resistance
1,752
1.00
1.00, 1.01
0.360
malaria_risk
1,752
0.88
0.87, 0.90
<0.001
Multivar Logreg
net_data_Xy_net <- net_data_mod |>
relocate (net, .after = last_col ()) |>
as.data.frame ()
net_data_net.bestglm <- bestglm:: bestglm (net_data_Xy_net,
family = binomial (),
IC = "BIC" )
Morgan-Tatar search since family is non-gaussian.
net_data_net.bestglm$ BestModels
income health household eligible temperature insecticide_resistance
1 TRUE TRUE FALSE TRUE TRUE TRUE
2 TRUE TRUE TRUE TRUE TRUE TRUE
3 TRUE FALSE FALSE TRUE TRUE TRUE
4 TRUE FALSE TRUE TRUE TRUE TRUE
5 TRUE TRUE FALSE FALSE TRUE TRUE
malaria_risk Criterion
1 TRUE 642.8398
2 TRUE 650.3082
3 TRUE 675.0623
4 TRUE 682.5247
5 TRUE 698.3880
Summary
Code
net_data_tbl.malaria.uv.mv
N
Beta
95% CI
p-value
Beta
95% CI
p-value
net
1,752
FALSE
—
—
—
—
TRUE
-16
-18, -15
<0.001
-12
-13, -12
<0.001
income
1,752
-0.06
-0.07, -0.06
<0.001
-0.08
-0.08, -0.07
<0.001
health
1,752
-0.45
-0.48, -0.41
<0.001
0.14
0.12, 0.15
<0.001
household
1,752
-0.09
-0.60, 0.43
0.740
eligible
1,752
FALSE
—
—
TRUE
20
15, 25
<0.001
temperature
1,752
0.62
0.45, 0.79
<0.001
1.0
0.98, 1.1
<0.001
insecticide_resistance
1,752
0.21
0.16, 0.25
<0.001
0.22
0.20, 0.23
<0.001