Area Under the ROC Curve (AUC)
Simulate Data
library(data.table)
set.seed(123456)
n <- 1000
dt <- data.table(x = runif(n))
p0 <- 0.2
or <- 2
dt <- dt[, odds0 := p0 / (1 - p0)
][, log_odds := log(odds0) + x * log(or)
][, p := exp(log_odds) / (1 + exp(log_odds))]
vsample <- function(p){
sample(c(1, 0), size = 1, replace = TRUE, prob = c(p, 1 - p))
}
vsample <- Vectorize(vsample)
dt <- dt[, outcome := vsample(p)]
m <- glm(outcome ~ x, data = dt, family = binomial)
library(sjPlot)
tab_model(m)
 | outcome | ||
---|---|---|---|
Predictors | Odds Ratios | CI | p |
(Intercept) | 0.23 | 0.17 – 0.31 | <0.001 |
x | 1.99 | 1.20 – 3.30 | 0.008 |
Observations | 1000 | ||
R2 Tjur | 0.007 |
ggplot
pred <- predict(m, newdata = dt, type = "response")
library(pROC)
r <- roc(dt$outcome, pred, ci = TRUE, direction = "<")
dr <- data.table(
tpr = r$sensitivities
, fpr = 1 - r$specificities
)
dr <- dr[order(fpr, tpr)]
ggplot(dr, aes(x = fpr, y = tpr)) +
geom_segment(x = 0, y = 0, xend = 1, yend = 1, size = 0.5, color = "#999999", linetype = "longdash") +
geom_step(size = 1.5, color = "#333333", direction = "hv") +
xlab("FPR") + ylab("TPR") +
theme_bw() +
theme(axis.ticks.x = element_blank()
, axis.ticks.y = element_blank()
, axis.text.x = element_blank()
, axis.text.y = element_blank()
, panel.grid.major = element_blank()
, panel.grid.minor = element_blank()
)
Two AUC Curves
set.seed(654321)
n <- 800
dt2 <- data.table(x = runif(n))
p0 <- 0.2
or <- 3
dt2 <- dt2[, odds0 := p0 / (1 - p0)
][, log_odds := log(odds0) + x * log(or)
][, p := exp(log_odds) / (1 + exp(log_odds))]
vsample <- function(p){
sample(c(1, 0), size = 1, replace = TRUE, prob = c(p, 1 - p))
}
vsample <- Vectorize(vsample)
dt2 <- dt2[, outcome := vsample(p)]
m2 <- glm(outcome ~ x, data = dt2, family = binomial)
pred <- predict(m, newdata = dt, type = "response")
library(pROC)
r <- roc(dt$outcome, pred, ci = TRUE, direction = "<")
dr <- data.table(
tpr = r$sensitivities
, fpr = 1 - r$specificities
)
dr <- dr[order(fpr, tpr)][, group := 1]
pred2 <- predict(m2, newdata = dt2, type = "response")
library(pROC)
r2 <- roc(dt2$outcome, pred2, ci = TRUE, direction = "<")
dr2 <- data.table(
tpr = r2$sensitivities
, fpr = 1 - r2$specificities
)
dr2 <- dr2[order(fpr, tpr)][, group := 2]
ggplot(dr, aes(x = fpr, y = tpr)) +
geom_segment(x = 0, y = 0, xend = 1, yend = 1, size = 0.5, color = "#999999", linetype = "longdash", alpha = 0.8) +
geom_step(size = 1.5, color = "#666666", direction = "hv", alpha = 0.6) +
geom_step(data = dr2, aes(x = fpr, y = tpr), size = 1.5, color = "#000000", direction = "hv", inherit = FALSE) +
xlab("FPR") + ylab("TPR") +
theme_bw() +
theme(axis.ticks.x = element_blank()
, axis.ticks.y = element_blank()
, axis.text.x = element_blank()
, axis.text.y = element_blank()
, panel.grid.major = element_blank()
, panel.grid.minor = element_blank()
)
Compare Two AUCs
Put Multiple AUCs Together
p1 <- plt_roc(r) %>% ann("I")
p2 <- plt_roc(r) %>% ann("II")
p3 <- plt_roc(r2) %>% ann("III")
p21 <- plt_roc2(r, r2) %>% ann("I vs II")
p22 <- p21
p23 <- p21
p33 <- p21
subplot(p1 %>% hide_axis()
, p2 %>% hide_axis()
, p3 %>% hide_axis()
## , plotly_empty()
, p21 %>% hide_axis()
, p22 %>% hide_axis()
, p23 %>% hide_axis()
, p33 %>% hide_axis()
, nrows = 3
, shareX = FALSE
, shareY = FALSE
, titleX = TRUE
, titleY = TRUE
, margin = 0.02
, widths = c(1, 1, 1)/3
, heights = c(1, 1, 1)/3
) %>% layout(autosize = TRUE)
Hosmer and Lemeshow Goodness-of-fit Test
library(ResourceSelection)
hl <- hoslem.test(m$y, m$fitted.values, g = 10)
t <- data.table(
statistic = hl$statistic
, df = hl$parameter
, p.value = hl$p.value
, method = hl$method
)
t %>% prt(caption = "HL Test with 10 bins")
statistic | df | p.value | method |
---|---|---|---|
13.50531 | 8 | 0.0956059 | Hosmer and Lemeshow goodness of fit (GOF) test |
Calibration Plot
R sessionInfo
R version 4.2.0 (2022-04-22) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Ubuntu 20.04.3 LTS
Matrix products: default BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
locale: [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
attached base packages: [1] stats graphics grDevices utils datasets methods base
other attached packages: [1] ResourceSelection_0.3-5 pROC_1.18.0 sjPlot_2.8.10
[4] Wu_0.0.0.9000 flexdashboard_0.5.2 lme4_1.1-29
[7] Matrix_1.4-0 mgcv_1.8-38 nlme_3.1-152
[10] png_0.1-7 scales_1.2.0 nnet_7.3-16
[13] labelled_2.9.1 kableExtra_1.3.4 plotly_4.10.0
[16] gridExtra_2.3 ggplot2_3.3.6 DT_0.23
[19] tableone_0.13.2 magrittr_2.0.3 lubridate_1.8.0
[22] dplyr_1.0.9 plyr_1.8.7 data.table_1.14.2
[25] rmdformats_1.0.4 knitr_1.39
loaded via a namespace (and not attached): [1] TH.data_1.1-1 minqa_1.2.4 colorspace_2.0-3 ellipsis_0.3.2
[5] sjlabelled_1.2.0 estimability_1.4 parameters_0.18.1 rstudioapi_0.13
[9] farver_2.1.1 fansi_1.0.3 mvtnorm_1.1-3 xml2_1.3.3
[13] codetools_0.2-18 splines_4.2.0 sjmisc_2.8.9 jsonlite_1.8.0
[17] nloptr_2.0.3 ggeffects_1.1.2 broom_0.8.0 effectsize_0.7.0 [21] compiler_4.2.0 httr_1.4.3 sjstats_0.18.1 emmeans_1.7.5
[25] backports_1.4.1 assertthat_0.2.1 fastmap_1.1.0 lazyeval_0.2.2
[29] survey_4.1-1 cli_3.3.0 htmltools_0.5.3 tools_4.2.0
[33] gtable_0.3.0 glue_1.6.2 Rcpp_1.0.9 jquerylib_0.1.4
[37] vctrs_0.4.1 svglite_2.1.0 crosstalk_1.2.0 insight_0.18.0
[41] xfun_0.31 stringr_1.4.0 rvest_1.0.2 lifecycle_1.0.1
[45] klippy_0.0.0.9500 MASS_7.3-54 zoo_1.8-10 hms_1.1.1
[49] sandwich_3.0-2 yaml_2.3.5 sass_0.4.1 stringi_1.7.8
[53] highr_0.9 bayestestR_0.12.1 boot_1.3-28 rlang_1.0.4
[57] pkgconfig_2.0.3 systemfonts_1.0.4 evaluate_0.15 lattice_0.20-45
[61] purrr_0.3.4 htmlwidgets_1.5.4 labeling_0.4.2 tidyselect_1.1.2 [65] bookdown_0.27 R6_2.5.1 generics_0.1.3 multcomp_1.4-19
[69] DBI_1.1.2 pillar_1.8.0 haven_2.5.0 withr_2.5.0
[73] survival_3.2-13 datawizard_0.4.1 tibble_3.1.8 performance_0.9.1 [77] modelr_0.1.8 utf8_1.2.2 rmarkdown_2.14 grid_4.2.0
[81] forcats_0.5.1 digest_0.6.29 webshot_0.5.3 xtable_1.8-4
[85] tidyr_1.2.0 munsell_0.5.0 viridisLite_0.4.0 bslib_0.3.1
[89] mitools_2.4