MAIC

Back to Index

Simulate Data

Individual Patient Data
- Age with mean 40, sd 10
- Gender with 60% female
Aggregate Data
- Age with mean 30
- Gender with 50% female

set.seed(20221130)
n <- 450
age <- rnorm(n = n, mean = 40, sd = 10)
gender <- sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.4, 0.6))

dt <- data.table(age, gender)

vars <- c("age", "gender")
factorVars <- c("gender")

tbl <- Wu::tbl1n(data = dt
    , vars = vars
    , factorVars = factorVars)

tbl %>% prt()

Variable	level	Overall	Missing
n	n	450
age	mean (SD)	40.58 (10.24)	0.0
	median [IQR]	40.22 [33.11, 47.43]	0.0
	median [range]	40.22 [8.85, 72.15]	0.0
gender (%)	0	183 (40.7)	0.0
	1	267 (59.3)

## sweep(dt, MARGIN = 1, STATS = 1000, FUN = "+")[1:5, ]


ipd <- data.table(
    age = age
  , gender = gender
)

agd <- data.table(
    age_mean = 30
  , age_sd = 8
  , gender_mean = 0.5
)

The Objective Function

The objective function gives one single numeric value
It sums up all covariates (X) with weights (a1) where covariates are matching variables
After centralizing covariates (X) (subtracting the mean), the algorithm tries to find weights to minimise the objective function’s value (toward zero)
Weights vector a1 has the number of coefficients same as the number of covariates
In MAIC setting, the covariates are those variables need to be matched

## x1 <- data.frame(age = c(1, 1.1, 1.2), weight = c(2, 2.4, 2.8))
## x1 <- as.matrix(x1)
## a <- c(1, 2)
## x1 %*% a
## exp(x1)

objfn <- function(a1, X){
    sum(exp(X %*% a1))
}

Gradient Function

It is the partial derivatives of the objective function against parameters (a1)

gradfn <- function(a1, X){
    colSums(sweep(X, 1, exp(X %*% a1), "*"))
}

Centralized Individual Data

It applies the method of moments, for the first-order moment, subtracting the mean; for the second-order moment, subtracting the mean of squared.

ipd <- ipd[, age_squared := age ^ 2]
agd <- agd[, age_squared := age_mean ^ 2 + age_sd ^ 2
                 ][, age := age_mean
                   ][, gender := gender_mean]

Vars <- c("age", "age_squared", "gender")

ipd_centralized <- ipd[, ..Vars] - agd[rep(1, nrow(ipd)), ..Vars]

## X.EM.0 <- sweep(with(AB.IPD, cbind(age
##                                  , age^2
##                                  , gender))
##               , 2
##               , with(AC.AgD, c(age.mean
##                              , age.mean^2 + age.sd^2
##                              , gender.mean))
##               , '-')

Optimize the Weights

opt1 <- optim(par = c(0, 0, 0)
            , fn = objfn
            , gr = gradfn
            , X = as.matrix(ipd_centralized)
            , method = "BFGS")

a1 <- opt1$par

wt <- exp(as.matrix(ipd_centralized) %*% a1)
wt_std <- (wt / sum(wt)) * n

ipd$wts <- wt_std

plt_hist(wt_std, xmin = 0, xmax = 10, xint = 0.2)

Effective Sample Size

Kish’s method

\[Effective \, Sample \, Size = [\sum \omega_i]^2/[\sum \omega_i^2]\]

sample_size <- data.table(
    n_original = nrow(ipd)
    , n_effective_sample_size = (sum(wt_std)) ^ 2 / sum(wt_std ^ 2)
)

sample_size %>% prt()

n_original	n_effective_sample_size
450	184.5694

Balance Table

prs_agd <- agd[, c("age_mean", "age_sd", "gender")]

wmean <- function(x, wt){sum(x * wt) / sum(wt)}
wsd <- function(x, wt){
    wm <- wmean(x, wt)
    n <- length(x)
    sqrt( sum(wt * (x - wm) ^ 2)/ (sum(wt) * (n - 1) / n))
}


parameters_ipd <- c(
    age_mean = mean(ipd$age)
  , age_sd = sd(ipd$age)
  , gender_mean = mean(ipd$gender)
)


parameters_ipd_weighted <- c(
    age_mean = wmean(ipd$age, ipd$wts)
  , age_sd = wsd(ipd$age, ipd$wts)
  , gender_mean = wmean(ipd$gender, ipd$wts)
)


bt <- data.table(
    moment = names(prs_agd)
  , aggregate = as.numeric(prs_agd[1, ])
  , ipd = parameters_ipd
  , ipd_weighted = parameters_ipd_weighted
)

bt <- bt[, diff := ipd - aggregate
         ][, diff_weighted := ipd_weighted - aggregate]


bt[, .(moment, aggregate, ipd, diff, ipd_weighted, diff_weighted)] %>% prt()

moment	aggregate	ipd	diff	ipd_weighted	diff_weighted
age_mean	30.0	40.5799492	10.5799492	30.000000	-0.0000001
age_sd	8.0	10.2402944	2.2402944	8.008904	0.0089037
gender	0.5	0.5933333	0.0933333	0.500000	0.0000000

References

NICE MAIC: https://www.sheffield.ac.uk/nice-dsu/tsds/population-adjusted
R “maic” Package: https://cran.r-project.org/web/packages/maic/maic.pdf

R sessionInfo

sessionInfo()

R version 4.2.0 (2022-04-22) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Ubuntu 20.04.3 LTS

Matrix products: default BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale: [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C

attached base packages: [1] stats graphics grDevices utils datasets methods base

other attached packages: [1] Wu_0.0.0.9000 flexdashboard_0.6.0 lme4_1.1-30
[4] Matrix_1.4-0 mgcv_1.8-38 nlme_3.1-152
[7] png_0.1-7 scales_1.2.0 nnet_7.3-16
[10] labelled_2.9.1 kableExtra_1.3.4 plotly_4.10.0
[13] gridExtra_2.3 ggplot2_3.3.6 DT_0.24
[16] tableone_0.13.2 magrittr_2.0.3 lubridate_1.8.0
[19] dplyr_1.0.9 plyr_1.8.7 data.table_1.14.2
[22] rmdformats_1.0.4 knitr_1.39

loaded via a namespace (and not attached): [1] httr_1.4.4 sass_0.4.2 tidyr_1.2.0 jsonlite_1.8.0
[5] viridisLite_0.4.0 splines_4.2.0 bslib_0.4.0 assertthat_0.2.1
[9] highr_0.9 yaml_2.3.5 pillar_1.8.1 lattice_0.20-45
[13] glue_1.6.2 digest_0.6.29 RColorBrewer_1.1-3 rvest_1.0.2
[17] minqa_1.2.4 colorspace_2.0-3 htmltools_0.5.3 survey_4.1-1
[21] pkgconfig_2.0.3 haven_2.5.0 bookdown_0.28 purrr_0.3.4
[25] webshot_0.5.3 svglite_2.1.0 proxy_0.4-27 tibble_3.1.8
[29] farver_2.1.1 generics_0.1.3 ellipsis_0.3.2 cachem_1.0.6
[33] withr_2.5.0 klippy_0.0.0.9500 lazyeval_0.2.2 cli_3.3.0
[37] survival_3.2-13 evaluate_0.16 fansi_1.0.3 MASS_7.3-54
[41] class_7.3-19 forcats_0.5.1 xml2_1.3.3 tools_4.2.0
[45] hms_1.1.1 mitools_2.4 lifecycle_1.0.1 stringr_1.4.0
[49] munsell_0.5.0 e1071_1.7-11 compiler_4.2.0 jquerylib_0.1.4
[53] systemfonts_1.0.4 rlang_1.0.4 grid_4.2.0 nloptr_2.0.3
[57] rstudioapi_0.13 htmlwidgets_1.5.4 crosstalk_1.2.0 rmarkdown_2.14
[61] boot_1.3-28 gtable_0.3.0 DBI_1.1.3 R6_2.5.1
[65] zoo_1.8-10 fastmap_1.1.0 utf8_1.2.2 stringi_1.7.8
[69] Rcpp_1.0.9 vctrs_0.4.1 tidyselect_1.1.2 xfun_0.32