Regression in R

Package mgcv

Logistic Regression

Report Coefficients

fit_s <- summary(m0)


prt_coef.gam <- function(obj, exp = FALSE, digits = 2){
    obj_summary <- summary(obj)
    rtn <- data.table(
        coef_name = names(obj_summary$p.coeff)
      , coef_value = obj_summary$p.coeff
      , df = sum(obj$edf)
    )
    rtn <- merge(
        x = rtn
      , y = data.table(coef_name = names(obj_summary$se), se = obj_summary$se)
      , on = "coef_name"
      , all.x = TRUE
      , all.y = FALSE
    )
    rtn[, coef_value_lower := coef_value + qt(0.025, df) * se
        ][, coef_value_upper := coef_value + qt(0.975, df) * se
          ]
    if (exp == TRUE){
        rtn[, coef_value := exp(coef_value)
            ][, coef_value_lower := exp(coef_value_lower)
              ][, coef_value_upper := exp(coef_value_upper)]
    }
    rtn[, ci := paste0(
              format(round(coef_value, digits), nsmall = digits)
            , "("
            , format(round(coef_value_lower, digits), nsmall = digits)
            , ","
            , format(round(coef_value_upper, digits), nsmall = digits)
            , ")")
        ]
    ## print(rtn)
    if (obj$family$family == "multinom"){
        ## print(obj$family$family)
    }
    return(rtn)
}



fit_coefs <- prt_coef.gam(m0, exp = TRUE, digits = 2)
fit_coefs <- fit_coefs[order(coef_name)][-1,]


coef_names <- get_coef_name(dt2[, c(
     "PTOT_group"
  , "age_group"
  , "gender"
  , "ethnicity"
  , "govtpay"
  , "wintermonth"
  , "ecmo"
  , "ventcharge"
  , "nmb72h"
  , "steroid72h"
  , "resp_medical"
  , "card_surg"
  , "infxn"
  , "ccc_composite"
)])

prt_m1_coef2 <- merge(
    x = coef_names
  , y = fit_coefs
  , by.x = "coef.name"
  , by.y = "coef_name"
  , all.x = TRUE
  , all.y = FALSE
)

knitr::kable(
           prt_m1_coef2[order(order.level), list(label, level, ci)]
         , caption = "Logistic Regression"
         , col.names = c("Variable", "Level", "OR")
         , align = c("l","r","r","r","r")
       ) %>%
    styling()

Multinomial Logistic Regression

Report Coefficient

saveRDS(fit, "fit_aim1.RDS")
fit <- readRDS("fit_aim1.RDS")
fit_s <- summary(fit)

prt_coef.gam <- function(obj, exp = FALSE, digits = 2){
    obj_summary <- summary(obj)
    rtn <- data.table(
        coef_name = names(obj_summary$p.coeff)
      , coef_value = obj_summary$p.coeff
      , df = sum(obj$edf)
    )
    rtn <- merge(
        x = rtn
      , y = data.table(coef_name = names(obj_summary$se), se = obj_summary$se)
      , on = "coef_name"
      , all.x = TRUE
      , all.y = FALSE
    )
    rtn[, coef_value_lower := coef_value + qt(0.025, df) * se
        ][, coef_value_upper := coef_value + qt(0.975, df) * se
          ]
    if (exp == TRUE){
        rtn[, coef_value := exp(coef_value)
            ][, coef_value_lower := exp(coef_value_lower)
              ][, coef_value_upper := exp(coef_value_upper)]
    }
    rtn[, ci := paste0(
              format(round(coef_value, digits), nsmall = digits)
            , "("
            , format(round(coef_value_lower, digits), nsmall = digits)
            , ","
            , format(round(coef_value_upper, digits), nsmall = digits)
            , ")")
        ]
    ## print(rtn)

    if (obj$family$family == "multinom"){
        ## print(obj$family$family)
    }
    return(rtn)
}

fit_coefs <- prt_coef.gam(fit, exp = TRUE, digits = 2)
fit_coefs <- fit_coefs[order(coef_name)]
cf_l <- fit_coefs[seq(3, 42, by = 2),][, ci_l := ci][, list(coef_name, ci_l)]
cf_r <- fit_coefs[seq(4, 32, by = 2),][, ci_r := ci][, list(ci_r)]

prt_m1_coef <- cbind(cf_l, cf_r)

coef_names <- get_coef_name(df7[, c(
    "topquartile"
  , "gender"
  , "age_group"
  , "ethnicity"
  , "payor_class"
  , "wintermonth"
  , "ccc_composite"
  , "neuro"
  , "icu_los_in_weeks"
  , "card_surg"
  , "resp_medical"
  , "infxn"
  , "ecmo"
  , "vent"
  , "nmb72h"
  , "steroid72h"
)])

prt_m1_coef2 <- merge(
    x = coef_names
  , y = prt_m1_coef
  , by.x = "coef.name"
  , by.y = "coef_name"
  , all.x = TRUE
  , all.y = FALSE
)

prt_m1_coef2 <- prt_m1_coef2[order(order.level), list(label, level, ci_l, ci_r)]
knitr::kable(
          prt_m1_coef2
         , caption = "Multinomial Logistic Regression"
         , col.names = c("Variable", "Level", "OR(IRF:Other)", "OR(Death:Other)")
         , align = c("l","r","r","r","r")
       ) %>%
    styling()

Package BART

Binary Outcome

BART on Binary outcome

[1] 0.3000000 0.3396226

[1] 0.32

[1] 0.4

[1] 200 2

*****Into main of pbart *****Data: data:n,p,np: 100, 2, 200 y1,yn: 1, 0 x1,x[n*p]: 0.000000, 0.138691 xp1,xp[np*p]: 0.000000, 0.138691 *****Number of Trees: 50 *****Number of Cut Points: 1 … 100 *****burn and ndpost: 1000, 2500 *****Prior:mybeta,alpha,tau: 2.000000,0.950000,0.212132 *****binaryOffset: -0.358459 *****Dirichlet:sparse,theta,omega,a,b,rho,augment: 0,0,1,0.5,1,2,0 *****nkeeptrain,nkeeptest,nkeeptreedraws: 2500,2500,2500 *****printevery: 100 *****skiptr,skipte,skiptreedraws: 1,1,1

MCMC done 0 (out of 3500) done 100 (out of 3500) done 200 (out of 3500) done 300 (out of 3500) done 400 (out of 3500) done 500 (out of 3500) done 600 (out of 3500) done 700 (out of 3500) done 800 (out of 3500) done 900 (out of 3500) done 1000 (out of 3500) done 1100 (out of 3500) done 1200 (out of 3500) done 1300 (out of 3500) done 1400 (out of 3500) done 1500 (out of 3500) done 1600 (out of 3500) done 1700 (out of 3500) done 1800 (out of 3500) done 1900 (out of 3500) done 2000 (out of 3500) done 2100 (out of 3500) done 2200 (out of 3500) done 2300 (out of 3500) done 2400 (out of 3500) done 2500 (out of 3500) done 2600 (out of 3500) done 2700 (out of 3500) done 2800 (out of 3500) done 2900 (out of 3500) done 3000 (out of 3500) done 3100 (out of 3500) done 3200 (out of 3500) done 3300 (out of 3500) done 3400 (out of 3500) time: 1s check counts trcnt,tecnt: 2500,2500

List of 14 $ yhat.train : num [1:2500, 1:100] -1.083 -1.104 -0.854 -0.314 -0.123 … $ yhat.test : num [1:2500, 1:200] -1.083 -1.104 -0.854 -0.314 -0.123 … $ varcount : int [1:2500, 1:2] 28 25 23 25 23 25 26 24 25 29 … ..- attr(, “dimnames”)=List of 2 .. ..$ : NULL .. ..$ : chr [1:2] “treatment” “noise” $ varprob : num [1:2500, 1:2] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 … ..- attr(, “dimnames”)=List of 2 .. ..$ : NULL .. ..$ : chr [1:2] “treatment” “noise” $ treedraws :List of 2 ..$ cutpoints:List of 2 .. ..$ treatment: num 0.5 .. ..$ noise : num [1:100] -2.3 -2.25 -2.2 -2.15 -2.1 … ..$ trees : chr “2500 50 231 0 0 -0.42579033832 0 0 -0.16950011113 0 0 -0.165008988311 0 0 0.255976901511 0 0 -0”| truncated $ proc.time : ‘proc_time’ Named num [1:5] 0.631 0.039 0.672 0 0 ..- attr(, “names”)= chr [1:5] “user.self” “sys.self” “elapsed” “user.child” … $ prob.train : num [1:2500, 1:100] 0.139 0.135 0.197 0.377 0.451 … $ prob.train.mean: num [1:100] 0.292 0.383 0.431 0.316 0.282 … $ prob.test : num [1:2500, 1:200] 0.139 0.135 0.197 0.377 0.451 … $ prob.test.mean : num [1:200] 0.292 0.383 0.431 0.316 0.282 … $ varcount.mean : Named num [1:2] 27.1 25.1 ..- attr(, “names”)= chr [1:2] “treatment” “noise” $ varprob.mean : Named num [1:2] 0.5 0.5 ..- attr(, “names”)= chr [1:2] “treatment” “noise” $ rm.const : int [1:2] 1 2 $ binaryOffset : num -0.358 - attr(, “class”)= chr “pbart”

[1] 2500 200

Min. 1st Qu. Median Mean 3rd Qu. Max. -1.3418 -1.1672 -0.7957 -0.8165 -0.5100 -0.1368

Min. 1st Qu. Median Mean 3rd Qu. Max. 0.01152 0.22718 0.32082 0.33477 0.42733 0.90851

Min. 1st Qu. Median Mean 3rd Qu. Max. 0.01896 0.27770 0.37644 0.38489 0.48340 0.95675

Min. 1st Qu. Median Mean 3rd Qu. Max. 0.01152 0.21517 0.30501 0.32011 0.40898 0.90851

Min. 1st Qu. Median Mean 3rd Qu. Max. 0.01964 0.29422 0.39389 0.39990 0.49842 0.95675

Leave One Out Cross Validation

caret

Generalized Linear Model

150 samples 4 predictor 2 classes: ‘FALSE’, ‘TRUE’

No pre-processing Resampling: Leave-One-Out Cross-Validation Summary of sample sizes: 149, 149, 149, 149, 149, 149, … Resampling results:

Accuracy Kappa 1 1

[1] “train” “train.formula”

[1] confusionMatrix densityplot fitted ggplot
[5] histogram levels plot predict
[9] predictors print residuals stripplot
[13] summary update varImp xyplot
see ‘?methods’ for accessing help and source code

Computing Environment

R version 4.2.0 (2022-04-22) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Ubuntu 20.04.3 LTS

Matrix products: default BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale: [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C

attached base packages: [1] stats graphics grDevices utils datasets methods base

other attached packages: [1] caret_6.0-93 lattice_0.20-45 BART_2.9
[4] survival_3.2-13 Wu_0.0.0.9000 flexdashboard_0.6.0 [7] lme4_1.1-30 Matrix_1.4-0 mgcv_1.8-38
[10] nlme_3.1-152 png_0.1-7 scales_1.2.0
[13] nnet_7.3-16 labelled_2.9.1 kableExtra_1.3.4
[16] plotly_4.10.0 gridExtra_2.3 ggplot2_3.3.6
[19] DT_0.24 tableone_0.13.2 magrittr_2.0.3
[22] lubridate_1.8.0 dplyr_1.0.9 plyr_1.8.7
[25] data.table_1.14.2 rmdformats_1.0.4 knitr_1.39

loaded via a namespace (and not attached): [1] webshot_0.5.3 httr_1.4.4 tools_4.2.0
[4] bslib_0.4.0 utf8_1.2.2 R6_2.5.1
[7] rpart_4.1-15 DBI_1.1.3 lazyeval_0.2.2
[10] colorspace_2.0-3 withr_2.5.0 tidyselect_1.1.2
[13] compiler_4.2.0 cli_3.3.0 rvest_1.0.2
[16] xml2_1.3.3 bookdown_0.28 sass_0.4.2
[19] proxy_0.4-27 systemfonts_1.0.4 stringr_1.4.0
[22] digest_0.6.29 minqa_1.2.4 rmarkdown_2.14
[25] svglite_2.1.0 pkgconfig_2.0.3 htmltools_0.5.3
[28] parallelly_1.32.1 fastmap_1.1.0 htmlwidgets_1.5.4
[31] rlang_1.0.4 rstudioapi_0.13 jquerylib_0.1.4
[34] generics_0.1.3 jsonlite_1.8.0 ModelMetrics_1.2.2.2 [37] Rcpp_1.0.9 munsell_0.5.0 fansi_1.0.3
[40] lifecycle_1.0.1 pROC_1.18.0 stringi_1.7.8
[43] yaml_2.3.5 MASS_7.3-54 recipes_1.0.1
[46] grid_4.2.0 listenv_0.8.0 parallel_4.2.0
[49] forcats_0.5.1 haven_2.5.0 splines_4.2.0
[52] hms_1.1.1 pillar_1.8.1 boot_1.3-28
[55] stats4_4.2.0 reshape2_1.4.4 future.apply_1.9.0
[58] codetools_0.2-18 glue_1.6.2 evaluate_0.16
[61] mitools_2.4 vctrs_0.4.1 nloptr_2.0.3
[64] foreach_1.5.2 gtable_0.3.0 purrr_0.3.4
[67] tidyr_1.2.0 future_1.27.0 cachem_1.0.6
[70] xfun_0.32 gower_1.0.0 prodlim_2019.11.13
[73] survey_4.1-1 e1071_1.7-11 class_7.3-19
[76] viridisLite_0.4.0 timeDate_4021.104 tibble_3.1.8
[79] iterators_1.0.14 hardhat_1.2.0 lava_1.6.10
[82] globals_0.16.0 ellipsis_0.3.2 ipred_0.9-13