library(gtsummary)
library(patchwork)
library(correlation)Causal Inference (DAG)
Code adapted from r-causal (CH.5)

Fork (Confounder)
Sim Data
set.seed(123)
n <- 1000
q <- rbinom(n, size = 1, prob = .35)
x <- 2 * q + rnorm(n)
y <- -3 * q + rnorm(n)
confounder_data <- tibble(x, y, q = as.factor(q))Plot
Code
p1 <- confounder_data |>
ggplot(aes(x, y)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm", se = FALSE, color = "black") +
facet_wrap(~"not adjusting for `q`\n(biased)")
p2 <- confounder_data |>
ggplot(aes(x, y, color = q)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~"adjusting for `q`\n(unbiased)")
p1 + p2
Corr
lares::corr_cross(confounder_data)
correlation(confounder_data)# Correlation Matrix (pearson-method)
Parameter1 | Parameter2 | r | 95% CI | t(998) | p
---------------------------------------------------------------------
x | y | -0.54 | [-0.58, -0.49] | -20.26 | < .001***
p-value adjustment method: Holm (1979)
Observations: 1000
Model
# Not Adjust
## y ~ x
confounder_data.fit.y_x <- lm(y ~ x, data = confounder_data)
## y ~ q
confounder_data.fit.y_q <- lm(y ~ q, data = confounder_data)
## x ~ q
confounder_data.fit.x_q <- lm(x ~ q, data = confounder_data)
# Adjusted: y ~ x + q
confounder_data.fit.y_xq <- lm(y ~ x + q, data = confounder_data)Code
confounder_data.fit.tbls <-
list(
y_x = confounder_data.fit.y_x,
y_q = confounder_data.fit.y_q,
x_q = confounder_data.fit.x_q,
y_xq = confounder_data.fit.y_xq
) |>
map(tbl_regression_custom)
confounder_data.fit.tbls |>
tbl_merge(
tab_spanner = c("**y ~ x**", "**y ~ q**", "**x ~ q**", "**y ~ x + q**")
)| Characteristic | y ~ x | y ~ q | x ~ q | y ~ x + q | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | |
| x | -0.70 | -0.77, -0.63 | <0.001 | 0.06 | 0.00, 0.12 | 0.059 | ||||||
| q | ||||||||||||
| 0 | — | — | — | — | — | — | ||||||
| 1 | -3.1 | -3.2, -3.0 | <0.001 | 1.9 | 1.8, 2.1 | <0.001 | -3.2 | -3.4, -3.0 | <0.001 | |||
| 1 CI = Confidence Interval | ||||||||||||
Chain (Mediator)
Sim Data
set.seed(123)
x <- rnorm(n)
linear_pred <- 2 * x + rnorm(n)
prob <- 1 / (1 + exp(-linear_pred))
q <- rbinom(n, size = 1, prob = prob)
y <- 2 * q + rnorm(n)
mediator_data <- tibble(x, y, q = as.factor(q))Plot
Code
p1 <- mediator_data |>
ggplot(aes(x, y)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm", se = FALSE, color = "black") +
facet_wrap(~"not adjusting for `q`\n(total effect)")
p2 <- mediator_data |>
ggplot(aes(x, y, color = q)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~"adjusting for `q`\n(direct effect)")
p1 + p2
Corr
lares::corr_cross(mediator_data)
correlation(mediator_data)# Correlation Matrix (pearson-method)
Parameter1 | Parameter2 | r | 95% CI | t(998) | p
------------------------------------------------------------------
x | y | 0.41 | [0.35, 0.46] | 14.04 | < .001***
p-value adjustment method: Holm (1979)
Observations: 1000
Model
# Not Adjust
## y ~ x
mediator_data.fit.y_x <- lm(y ~ x, data = mediator_data)
## y ~ q
mediator_data.fit.y_q <- lm(y ~ q, data = mediator_data)
## x ~ q
mediator_data.fit.x_q <- lm(x ~ q, data = mediator_data)
# Adjusted: y ~ x + q
mediator_data.fit.y_xq <- lm(y ~ x + q, data = mediator_data)Code
mediator_data.fit.tbls <-
list(
y_x = mediator_data.fit.y_x,
y_q = mediator_data.fit.y_q,
x_q = mediator_data.fit.x_q,
y_xq = mediator_data.fit.y_xq
) |>
map(tbl_regression_custom)
mediator_data.fit.tbls |>
tbl_merge(
tab_spanner = c("**y ~ x**", "**y ~ q**", "**x ~ q**", "**y ~ x + q**")
)| Characteristic | y ~ x | y ~ q | x ~ q | y ~ x + q | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | |
| x | 0.57 | 0.49, 0.65 | <0.001 | -0.02 | -0.10, 0.06 | 0.622 | ||||||
| q | ||||||||||||
| 0 | — | — | — | — | — | — | ||||||
| 1 | 2.0 | 1.8, 2.1 | <0.001 | 1.2 | 1.1, 1.3 | <0.001 | 2.0 | 1.8, 2.1 | <0.001 | |||
| 1 CI = Confidence Interval | ||||||||||||
Collider (Bias)
Sim Data
set.seed(1)
x <- rnorm(n)
y <- rnorm(n)
linear_pred <- 2 * x + 3 * y + rnorm(n)
prob <- 1 / (1 + exp(-linear_pred))
q <- rbinom(n, size = 1, prob = prob)
collider_data <- tibble(x, y, q = as.factor(q))Plot
Code
p1 <- collider_data |>
ggplot(aes(x, y)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm", se = FALSE, color = "black") +
facet_wrap(~"not adjusting for `q`\n(unbiased)")
p2 <- collider_data |>
ggplot(aes(x, y, color = q)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~"adjusting for `q`\n(biased)")
p1 + p2
Corr
lares::corr_cross(collider_data)
correlation(collider_data)# Correlation Matrix (pearson-method)
Parameter1 | Parameter2 | r | 95% CI | t(998) | p
-------------------------------------------------------------------
x | y | 6.40e-03 | [-0.06, 0.07] | 0.20 | 0.840
p-value adjustment method: Holm (1979)
Observations: 1000
Model
# Not Adjust
## y ~ x
collider_data.fit.y_x <- lm(y ~ x, data = collider_data)
## y ~ q
collider_data.fit.y_q <- lm(y ~ q, data = collider_data)
## x ~ q
collider_data.fit.x_q <- lm(x ~ q, data = collider_data)
# Bias: y ~ x + q
collider_data.fit.y_xq <- lm(y ~ x + q, data = collider_data)Code
collider_data.fit.tbls <-
list(
y_x = collider_data.fit.y_x,
y_q = collider_data.fit.y_q,
x_q = collider_data.fit.x_q,
y_xq = collider_data.fit.y_xq
) |>
map(tbl_regression_custom)
collider_data.fit.tbls |>
tbl_merge(
tab_spanner = c("**y ~ x**", "**y ~ q**", "**x ~ q**", "**Bias: y ~ x + q**")
)| Characteristic | y ~ x | y ~ q | x ~ q | Bias: y ~ x + q | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | |
| x | 0.01 | -0.06, 0.07 | 0.840 | -0.25 | -0.30, -0.20 | <0.001 | ||||||
| q | ||||||||||||
| 0 | — | — | — | — | — | — | ||||||
| 1 | 1.2 | 1.1, 1.3 | <0.001 | 0.79 | 0.67, 0.91 | <0.001 | 1.4 | 1.3, 1.5 | <0.001 | |||
| 1 CI = Confidence Interval | ||||||||||||
Summary
